generic_fns.pcs

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for the manipulation of pseudocontact shift data.""" 25 26 # Python module imports. 27 from copy import deepcopy 28 from math import pi, sqrt 29 from numpy import array, float64, ones, zeros 30 from numpy.linalg import norm 31 import sys 32 from warnings import warn 33 34 # relax module imports. 35 from generic_fns import grace, pipes 36 from generic_fns.align_tensor import get_tensor_index 37 from generic_fns.mol_res_spin import exists_mol_res_spin_data, return_spin, spin_loop 38 from maths_fns.pcs import ave_pcs_tensor 39 from physical_constants import g1H, pcs_constant 40 from relax_errors import RelaxError, RelaxNoPdbError, RelaxNoSequenceError 41 from relax_io import open_write_file, read_spin_data, write_spin_data 42 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 43 44

45 -def back_calc(align_id=None):

46 """Back calculate the PCS from the alignment tensor. 47 48 @keyword align_id: The alignment tensor ID string. 49 @type align_id: str 50 """ 51 52 # Arg check. 53 if align_id and align_id not in cdp.align_ids: 54 raise RelaxError, "The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids) 55 56 # Convert the align IDs to an array, or take all IDs. 57 if align_id: 58 align_ids = [align_id] 59 else: 60 align_ids = cdp.align_ids 61 62 # Add the ID to the PCS IDs, if needed. 63 for align_id in align_ids: 64 # Init. 65 if not hasattr(cdp, 'pcs_ids'): 66 cdp.pcs_ids = [] 67 68 # Add the ID. 69 if align_id not in cdp.pcs_ids: 70 cdp.pcs_ids.append(align_id) 71 72 # The weights. 73 weights = ones(cdp.N, float64) / cdp.N 74 75 # Unit vector data structure init. 76 unit_vect = zeros((cdp.N, 3), float64) 77 78 # Loop over the spins. 79 for spin in spin_loop(): 80 # Skip spins with no position. 81 if not hasattr(spin, 'pos'): 82 continue 83 84 # Atom positions. 85 pos = spin.pos 86 if type(pos[0]) in [float, float64]: 87 pos = [pos] * cdp.N 88 89 # Loop over the alignments. 90 for id in align_ids: 91 # Vectors. 92 vect = zeros((cdp.N, 3), float64) 93 r = zeros(cdp.N, float64) 94 dj = zeros(cdp.N, float64) 95 for c in range(cdp.N): 96 # The vector. 97 vect[c] = pos[c] - cdp.paramagnetic_centre 98 99 # The length. 100 r[c] = norm(vect[c]) 101 102 # Normalise (only if the vector has length). 103 if r[c]: 104 vect[c] = vect[c] / r[c] 105 106 # Calculate the PCS constant. 107 dj[c] = pcs_constant(cdp.temperature[id], cdp.frq[id] * 2.0 * pi / g1H, r[c]/1e10) 108 109 # Initialise if necessary. 110 if not hasattr(spin, 'pcs_bc'): 111 spin.pcs_bc = {} 112 113 # Calculate the PCSs (in ppm). 114 spin.pcs_bc[id] = ave_pcs_tensor(dj, vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights) * 1e6

115 116

117 -def centre(pos=None, atom_id=None, pipe=None, verbosity=1, ave_pos=False, force=False):

118 """Specify the atom in the loaded structure corresponding to the paramagnetic centre. 119 120 @keyword pos: The atomic position. If set, the atom_id string will be ignored. 121 @type pos: list of float 122 @keyword atom_id: The atom identification string. 123 @type atom_id: str 124 @keyword pipe: An alternative data pipe to extract the paramagnetic centre from. 125 @type pipe: None or str 126 @keyword verbosity: The amount of information to print out. The bigger the number, the more information. 127 @type verbosity: int 128 @keyword ave_pos: A flag which if True causes the atomic positions from multiple models to be averaged. 129 @type ave_pos: bool 130 @keyword force: A flag which if True will cause the current PCS centre to be overwritten. 131 """ 132 133 # The data pipe. 134 if pipe == None: 135 pipe = pipes.cdp_name() 136 137 # Test the data pipe. 138 pipes.test(pipe) 139 140 # Get the data pipes. 141 source_dp = pipes.get_pipe(pipe) 142 143 # Test if the structure has been loaded. 144 if not hasattr(source_dp, 'structure'): 145 raise RelaxNoPdbError 146 147 # Test the centre has already been set. 148 if not force and hasattr(cdp, 'paramagnetic_centre'): 149 raise RelaxError("The paramagnetic centre has already been set to the coordinates " + repr(cdp.paramagnetic_centre) + ".") 150 151 # Position is supplied. 152 if pos != None: 153 centre = array(pos) 154 num_pos = 1 155 full_pos_list = [] 156 157 # Position from a loaded structure. 158 else: 159 # Get the positions. 160 centre = zeros(3, float64) 161 full_pos_list = [] 162 num_pos = 0 163 for spin, spin_id in spin_loop(atom_id, pipe=pipe, return_id=True): 164 # No atomic positions. 165 if not hasattr(spin, 'pos'): 166 continue 167 168 # Spin position list. 169 if isinstance(spin.pos[0], float) or isinstance(spin.pos[0], float64): 170 pos_list = [spin.pos] 171 else: 172 pos_list = spin.pos 173 174 # Loop over the model positions. 175 for pos in pos_list: 176 full_pos_list.append(pos) 177 centre = centre + array(pos) 178 num_pos = num_pos + 1 179 180 # No positional information! 181 if not num_pos: 182 raise RelaxError("No positional information could be found for the spin '%s'." % atom_id) 183 184 # Averaging. 185 centre = centre / float(num_pos) 186 187 # Print out. 188 if verbosity: 189 print("Paramagnetic centres located at:") 190 for pos in full_pos_list: 191 print((" [%8.3f, %8.3f, %8.3f]" % (pos[0], pos[1], pos[2]))) 192 print("\nAverage paramagnetic centre located at:") 193 print((" [%8.3f, %8.3f, %8.3f]" % (centre[0], centre[1], centre[2]))) 194 195 # Set the centre (place it into the current data pipe). 196 if ave_pos: 197 if verbosity: 198 print("\nUsing the average paramagnetic position.") 199 cdp.paramagnetic_centre = centre 200 else: 201 if verbosity: 202 print("\nUsing all paramagnetic positions.") 203 cdp.paramagnetic_centre = full_pos_list

204 205

206 -def corr_plot(format=None, file=None, dir=None, force=False):

207 """Generate a correlation plot of the measured vs. back-calculated PCSs. 208 209 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file. 210 @type format: str or None 211 @keyword file: The file name or object to write to. 212 @type file: str or file object 213 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 214 @type dir: str 215 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 216 @type force: bool 217 """ 218 219 # Test if the current pipe exists. 220 pipes.test() 221 222 # Test if the sequence data is loaded. 223 if not exists_mol_res_spin_data(): 224 raise RelaxNoSequenceError 225 226 # Does PCS data exist? 227 if not hasattr(cdp, 'pcs_ids') or not cdp.pcs_ids: 228 warn(RelaxWarning("No PCS data exists, skipping file creation.")) 229 return 230 231 # Open the file for writing. 232 file = open_write_file(file, dir, force) 233 234 # Init. 235 data = [] 236 237 # The diagonal. 238 data.append([[-100, -100, 0], [100, 100, 0]]) 239 240 # The spin types. 241 types = [] 242 for spin in spin_loop(): 243 if spin.element not in types: 244 types.append(spin.element) 245 246 # Loop over the PCS data. 247 for align_id in cdp.pcs_ids: 248 # Loop over the spin types. 249 for i in range(len(types)): 250 # Append a new list for this alignment. 251 data.append([]) 252 253 # Errors present? 254 err_flag = False 255 for spin in spin_loop(): 256 # Skip deselected spins. 257 if not spin.select: 258 continue 259 260 # Error present. 261 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 262 err_flag = True 263 break 264 265 # Loop over the spins. 266 for spin, spin_id in spin_loop(return_id=True): 267 # Skip deselected spins. 268 if not spin.select: 269 continue 270 271 # Incorrect spin type. 272 if spin.element != types[i]: 273 continue 274 275 # Skip if data is missing. 276 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs.keys() or not align_id in spin.pcs_bc.keys(): 277 continue 278 279 # Append the data. 280 data[-1].append([spin.pcs_bc[align_id], spin.pcs[align_id]]) 281 282 # Errors. 283 if err_flag: 284 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 285 data[-1][-1].append(spin.pcs_err[align_id]) 286 else: 287 data[-1][-1].append(None) 288 289 # Label. 290 data[-1][-1].append(spin_id) 291 292 # The data size. 293 size = len(data) 294 295 # Only one data set. 296 data = [data] 297 298 # Graph type. 299 if err_flag: 300 graph_type = 'xydy' 301 else: 302 graph_type = 'xy' 303 304 # Grace file. 305 if format == 'grace': 306 # The set names. 307 set_names = [None] 308 for i in range(len(cdp.pcs_ids)): 309 for j in range(len(types)): 310 set_names.append("%s (%s)" % (cdp.pcs_ids[i], types[j])) 311 312 # The header. 313 grace.write_xy_header(file=file, title="PCS correlation plot", sets=size, set_names=set_names, linestyle=[2]+[0]*size, data_type=['pcs_bc', 'pcs'], axis_min=[-0.5, -0.5], axis_max=[0.5, 0.5], legend_pos=[1, 0.5]) 314 315 # The main data. 316 grace.write_xy_data(data=data, file=file, graph_type=graph_type)

317 318

319 -def delete(align_id=None):

320 """Delete the PCS data corresponding to the alignment ID. 321 322 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted. 323 @type align_id: str or None 324 """ 325 326 # Test if the current data pipe exists. 327 pipes.test() 328 329 # Test if sequence data exists. 330 if not exists_mol_res_spin_data(): 331 raise RelaxNoSequenceError 332 333 # Check that the ID exists. 334 if align_id and not align_id in cdp.pcs_ids: 335 raise RelaxError("The PCS alignment id '%s' does not exist" % align_id) 336 337 # The IDs. 338 if not align_id: 339 ids = deepcopy(cdp.pcs_ids) 340 else: 341 ids = [align_id] 342 343 # Loop over the alignments, removing all the corresponding data. 344 for id in ids: 345 # The PCS ID. 346 cdp.pcs_ids.pop(cdp.pcs_ids.index(id)) 347 348 # The data type. 349 if hasattr(cdp, 'pcs_data_types') and cdp.pcs_data_types.has_key(id): 350 cdp.pcs_data_types.pop(id) 351 352 # The spin data. 353 for spin in spin_loop(): 354 # The data. 355 if hasattr(spin, 'pcs') and spin.pcs.has_key(id): 356 spin.pcs.pop(id) 357 358 # The error. 359 if hasattr(spin, 'pcs_err') and spin.pcs_err.has_key(id): 360 spin.pcs_err.pop(id) 361 362 # Clean the global data. 363 if not hasattr(cdp, 'rdc_ids') or id not in cdp.rdc_ids: 364 cdp.align_ids.pop(id)

365 366

367 -def display(align_id=None, bc=False):

368 """Display the PCS data corresponding to the alignment ID. 369 370 @keyword align_id: The alignment tensor ID string. 371 @type align_id: str 372 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed. 373 @type bc: bool 374 """ 375 376 # Call the write method with sys.stdout as the file. 377 write(align_id=align_id, file=sys.stdout, bc=bc)

378 379

380 -def q_factors(spin_id=None):

381 """Calculate the Q-factors for the PCS data. 382 383 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins. 384 @type spin_id: None or str 385 """ 386 387 # No PCSs, so no Q factors can be calculated. 388 if not hasattr(cdp, 'pcs_ids') or not len(cdp.pcs_ids): 389 warn(RelaxWarning("No PCS data exists, Q factors cannot be calculated.")) 390 return 391 392 # Q-factor dictionary. 393 cdp.q_factors_pcs = {} 394 395 # Loop over the alignments. 396 for align_id in cdp.pcs_ids: 397 # Init. 398 pcs2_sum = 0.0 399 sse = 0.0 400 401 # Spin loop. 402 spin_count = 0 403 pcs_data = False 404 pcs_bc_data = False 405 for spin in spin_loop(spin_id): 406 # Skip deselected spins. 407 if not spin.select: 408 continue 409 410 # Increment the spin counter. 411 spin_count += 1 412 413 # Data checks. 414 if hasattr(spin, 'pcs') and spin.pcs.has_key(align_id): 415 pcs_data = True 416 if hasattr(spin, 'pcs_bc') and spin.pcs_bc.has_key(align_id): 417 pcs_bc_data = True 418 419 # Skip spins without PCS data. 420 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not spin.pcs.has_key(align_id) or spin.pcs[align_id] == None or not spin.pcs_bc.has_key(align_id) or spin.pcs_bc[align_id] == None: 421 continue 422 423 # Sum of squares. 424 sse = sse + (spin.pcs[align_id] - spin.pcs_bc[align_id])**2 425 426 # Sum the PCSs squared (for normalisation). 427 pcs2_sum = pcs2_sum + spin.pcs[align_id]**2 428 429 # The Q-factor for the alignment. 430 if pcs2_sum: 431 Q = sqrt(sse / pcs2_sum) 432 cdp.q_factors_pcs[align_id] = Q 433 434 # Warnings (and then exit). 435 if not spin_count: 436 warn(RelaxWarning("No spins have been used in the calculation.")) 437 return 438 if not pcs_data: 439 warn(RelaxWarning("No PCS data can be found.")) 440 return 441 if not pcs_bc_data: 442 warn(RelaxWarning("No back-calculated PCS data can be found.")) 443 return 444 445 # The total Q-factor. 446 cdp.q_pcs = 0.0 447 for id in cdp.q_factors_pcs: 448 cdp.q_pcs = cdp.q_pcs + cdp.q_factors_pcs[id]**2 449 cdp.q_pcs = cdp.q_pcs / len(cdp.q_factors_pcs) 450 cdp.q_pcs = sqrt(cdp.q_pcs)

451 452

453 -def read(align_id=None, file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):

454 """Read the PCS data from file. 455 456 @param align_id: The alignment tensor ID string. 457 @type align_id: str 458 @param file: The name of the file to open. 459 @type file: str 460 @param dir: The directory containing the file (defaults to the current directory if None). 461 @type dir: str or None 462 @param file_data: An alternative opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 463 @type file_data: list of lists 464 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 465 @type spin_id_col: int or None 466 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 467 @type mol_name_col: int or None 468 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 469 @type res_name_col: int or None 470 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 471 @type res_num_col: int or None 472 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 473 @type spin_name_col: int or None 474 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 475 @type spin_num_col: int or None 476 @keyword data_col: The column containing the PCS data in Hz. 477 @type data_col: int or None 478 @keyword error_col: The column containing the PCS errors. 479 @type error_col: int or None 480 @keyword sep: The column separator which, if None, defaults to whitespace. 481 @type sep: str or None 482 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 483 @type spin_id: None or str 484 """ 485 486 # Test if the current data pipe exists. 487 pipes.test() 488 489 # Test if sequence data exists. 490 if not exists_mol_res_spin_data(): 491 raise RelaxNoSequenceError 492 493 # Either the data or error column must be supplied. 494 if data_col == None and error_col == None: 495 raise RelaxError("One of either the data or error column must be supplied.") 496 497 498 # Spin specific data. 499 ##################### 500 501 # Loop over the PCS data. 502 spin_ids = [] 503 values = [] 504 errors = [] 505 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id): 506 # Unpack. 507 if data_col and error_col: 508 id, value, error = data 509 elif data_col: 510 id, value = data 511 error = None 512 else: 513 id, error = data 514 value = None 515 516 # Test the error value (cannot be 0.0). 517 if error == 0.0: 518 raise RelaxError("An invalid error value of zero has been encountered.") 519 520 # Get the corresponding spin container. 521 spin = return_spin(id) 522 if spin == None: 523 warn(RelaxNoSpinWarning(id)) 524 continue 525 526 # Add the data. 527 if data_col: 528 # Initialise. 529 if not hasattr(spin, 'pcs'): 530 spin.pcs = {} 531 532 # Append the value. 533 spin.pcs[align_id] = value 534 535 # Add the error. 536 if error_col: 537 # Initialise. 538 if not hasattr(spin, 'pcs_err'): 539 spin.pcs_err = {} 540 541 # Append the error. 542 spin.pcs_err[align_id] = error 543 544 # Append the data for print out. 545 spin_ids.append(id) 546 values.append(value) 547 errors.append(error) 548 549 # Print out. 550 write_spin_data(file=sys.stdout, spin_ids=spin_ids, data=values, data_name='PCSs', error=errors, error_name='PCS_error') 551 552 553 # Global (non-spin specific) data. 554 ################################## 555 556 # No data, so return. 557 if not len(values): 558 return 559 560 # Initialise. 561 if not hasattr(cdp, 'align_ids'): 562 cdp.align_ids = [] 563 if not hasattr(cdp, 'pcs_ids'): 564 cdp.pcs_ids = [] 565 566 # Add the PCS id string. 567 if align_id not in cdp.align_ids: 568 cdp.align_ids.append(align_id) 569 if align_id not in cdp.pcs_ids: 570 cdp.pcs_ids.append(align_id)

571 572

573 -def weight(align_id=None, spin_id=None, weight=1.0):

574 """Set optimisation weights on the PCS data. 575 576 @keyword align_id: The alignment tensor ID string. 577 @type align_id: str 578 @keyword spin_id: The spin ID string. 579 @type spin_id: None or str 580 @keyword weight: The optimisation weight. The higher the value, the more importance the PCS will have. 581 @type weight: float or int. 582 """ 583 584 # Test if sequence data exists. 585 if not exists_mol_res_spin_data(): 586 raise RelaxNoSequenceError 587 588 # Test if data corresponding to 'align_id' exists. 589 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 590 raise RelaxNoPCSError(align_id) 591 592 # Loop over the spins. 593 for spin in spin_loop(spin_id): 594 # No data structure. 595 if not hasattr(spin, 'pcs_weight'): 596 spin.pcs_weight = {} 597 598 # Set the weight. 599 spin.pcs_weight[align_id] = weight

600 601

602 -def write(align_id=None, file=None, dir=None, bc=False, force=False):

603 """Display the PCS data corresponding to the alignment ID. 604 605 @keyword align_id: The alignment tensor ID string. 606 @type align_id: str 607 @keyword file: The file name or object to write to. 608 @type file: str or file object 609 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 610 @type dir: str 611 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. 612 @type bc: bool 613 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 614 @type force: bool 615 """ 616 617 # Test if the current pipe exists. 618 pipes.test() 619 620 # Test if the sequence data is loaded. 621 if not exists_mol_res_spin_data(): 622 raise RelaxNoSequenceError 623 624 # Test if data corresponding to 'align_id' exists. 625 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 626 raise RelaxNoPCSError(align_id) 627 628 # Open the file for writing. 629 file = open_write_file(file, dir, force) 630 631 # Loop over the spins and collect the data. 632 mol_names = [] 633 res_nums = [] 634 res_names = [] 635 spin_nums = [] 636 spin_names = [] 637 values = [] 638 errors = [] 639 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 640 # Skip spins with no PCSs. 641 if not bc and (not hasattr(spin, 'pcs') or not align_id in spin.pcs.keys()): 642 continue 643 elif bc and (not hasattr(spin, 'pcs_bc') or align_id not in spin.pcs_bc.keys()): 644 continue 645 646 # Append the spin data. 647 mol_names.append(mol_name) 648 res_nums.append(res_num) 649 res_names.append(res_name) 650 spin_nums.append(spin.num) 651 spin_names.append(spin.name) 652 653 # The value. 654 if bc: 655 values.append(spin.pcs_bc[align_id]) 656 else: 657 values.append(spin.pcs[align_id]) 658 659 # The error. 660 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 661 errors.append(spin.pcs_err[align_id]) 662 else: 663 errors.append(None) 664 665 # Write out. 666 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error')

667

Source Code for Module generic_fns.pcs