generic_fns.pcs

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for the manipulation of pseudocontact shift data.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from math import pi, sqrt 28 from numpy import array, float64, ones, zeros 29 from numpy.linalg import norm 30 import sys 31 from warnings import warn 32 33 # relax module imports. 34 from generic_fns import grace, pipes 35 from generic_fns.align_tensor import get_tensor_index 36 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, return_spin, spin_loop 37 from maths_fns.pcs import ave_pcs_tensor 38 from physical_constants import g1H, pcs_constant 39 from relax_errors import RelaxError, RelaxNoPdbError, RelaxNoSequenceError 40 from relax_io import open_write_file, read_spin_data, write_spin_data 41 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 42 43

44 -def back_calc(align_id=None):

45 """Back calculate the PCS from the alignment tensor. 46 47 @keyword align_id: The alignment tensor ID string. 48 @type align_id: str 49 """ 50 51 # Arg check. 52 if align_id and align_id not in cdp.align_ids: 53 raise RelaxError("The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids)) 54 55 # Convert the align IDs to an array, or take all IDs. 56 if align_id: 57 align_ids = [align_id] 58 else: 59 align_ids = cdp.align_ids 60 61 # Add the ID to the PCS IDs, if needed. 62 for align_id in align_ids: 63 # Init. 64 if not hasattr(cdp, 'pcs_ids'): 65 cdp.pcs_ids = [] 66 67 # Add the ID. 68 if align_id not in cdp.pcs_ids: 69 cdp.pcs_ids.append(align_id) 70 71 # The weights. 72 weights = ones(cdp.N, float64) / cdp.N 73 74 # Unit vector data structure init. 75 unit_vect = zeros((cdp.N, 3), float64) 76 77 # Loop over the spins. 78 for spin in spin_loop(): 79 # Skip spins with no position. 80 if not hasattr(spin, 'pos'): 81 continue 82 83 # Atom positions. 84 pos = spin.pos 85 if type(pos[0]) in [float, float64]: 86 pos = [pos] * cdp.N 87 88 # Loop over the alignments. 89 for id in align_ids: 90 # Vectors. 91 vect = zeros((cdp.N, 3), float64) 92 r = zeros(cdp.N, float64) 93 dj = zeros(cdp.N, float64) 94 for c in range(cdp.N): 95 # The vector. 96 vect[c] = pos[c] - cdp.paramagnetic_centre 97 98 # The length. 99 r[c] = norm(vect[c]) 100 101 # Normalise (only if the vector has length). 102 if r[c]: 103 vect[c] = vect[c] / r[c] 104 105 # Calculate the PCS constant. 106 dj[c] = pcs_constant(cdp.temperature[id], cdp.frq[id] * 2.0 * pi / g1H, r[c]/1e10) 107 108 # Initialise if necessary. 109 if not hasattr(spin, 'pcs_bc'): 110 spin.pcs_bc = {} 111 112 # Calculate the PCSs (in ppm). 113 spin.pcs_bc[id] = ave_pcs_tensor(dj, vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights) * 1e6

114 115

116 -def centre(pos=None, atom_id=None, pipe=None, verbosity=1, ave_pos=False, force=False):

117 """Specify the atom in the loaded structure corresponding to the paramagnetic centre. 118 119 @keyword pos: The atomic position. If set, the atom_id string will be ignored. 120 @type pos: list of float 121 @keyword atom_id: The atom identification string. 122 @type atom_id: str 123 @keyword pipe: An alternative data pipe to extract the paramagnetic centre from. 124 @type pipe: None or str 125 @keyword verbosity: The amount of information to print out. The bigger the number, the more information. 126 @type verbosity: int 127 @keyword ave_pos: A flag which if True causes the atomic positions from multiple models to be averaged. 128 @type ave_pos: bool 129 @keyword force: A flag which if True will cause the current PCS centre to be overwritten. 130 """ 131 132 # The data pipe. 133 if pipe == None: 134 pipe = pipes.cdp_name() 135 136 # Test the data pipe. 137 pipes.test(pipe) 138 139 # Get the data pipes. 140 source_dp = pipes.get_pipe(pipe) 141 142 # Test if the structure has been loaded. 143 if not hasattr(source_dp, 'structure'): 144 raise RelaxNoPdbError 145 146 # Test the centre has already been set. 147 if not force and hasattr(cdp, 'paramagnetic_centre'): 148 raise RelaxError("The paramagnetic centre has already been set to the coordinates " + repr(cdp.paramagnetic_centre) + ".") 149 150 # Position is supplied. 151 if pos != None: 152 centre = array(pos) 153 num_pos = 1 154 full_pos_list = [] 155 156 # Position from a loaded structure. 157 else: 158 # Get the positions. 159 centre = zeros(3, float64) 160 full_pos_list = [] 161 num_pos = 0 162 for spin, spin_id in spin_loop(atom_id, pipe=pipe, return_id=True): 163 # No atomic positions. 164 if not hasattr(spin, 'pos'): 165 continue 166 167 # Spin position list. 168 if isinstance(spin.pos[0], float) or isinstance(spin.pos[0], float64): 169 pos_list = [spin.pos] 170 else: 171 pos_list = spin.pos 172 173 # Loop over the model positions. 174 for pos in pos_list: 175 full_pos_list.append(pos) 176 centre = centre + array(pos) 177 num_pos = num_pos + 1 178 179 # No positional information! 180 if not num_pos: 181 raise RelaxError("No positional information could be found for the spin '%s'." % atom_id) 182 183 # Averaging. 184 centre = centre / float(num_pos) 185 186 # Print out. 187 if verbosity: 188 print("Paramagnetic centres located at:") 189 for pos in full_pos_list: 190 print(" [%8.3f, %8.3f, %8.3f]" % (pos[0], pos[1], pos[2])) 191 print("\nAverage paramagnetic centre located at:") 192 print(" [%8.3f, %8.3f, %8.3f]" % (centre[0], centre[1], centre[2])) 193 194 # Set the centre (place it into the current data pipe). 195 if ave_pos: 196 if verbosity: 197 print("\nUsing the average paramagnetic position.") 198 cdp.paramagnetic_centre = centre 199 else: 200 if verbosity: 201 print("\nUsing all paramagnetic positions.") 202 cdp.paramagnetic_centre = full_pos_list

203 204

205 -def corr_plot(format=None, file=None, dir=None, force=False):

206 """Generate a correlation plot of the measured vs. back-calculated PCSs. 207 208 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file. 209 @type format: str or None 210 @keyword file: The file name or object to write to. 211 @type file: str or file object 212 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 213 @type dir: str 214 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 215 @type force: bool 216 """ 217 218 # Test if the current pipe exists. 219 pipes.test() 220 221 # Test if the sequence data is loaded. 222 if not exists_mol_res_spin_data(): 223 raise RelaxNoSequenceError 224 225 # Does PCS data exist? 226 if not hasattr(cdp, 'pcs_ids') or not cdp.pcs_ids: 227 warn(RelaxWarning("No PCS data exists, skipping file creation.")) 228 return 229 230 # Open the file for writing. 231 file = open_write_file(file, dir, force) 232 233 # Init. 234 data = [] 235 236 # The diagonal. 237 data.append([[-100, -100, 0], [100, 100, 0]]) 238 239 # The spin types. 240 types = [] 241 for spin in spin_loop(): 242 if spin.element not in types: 243 types.append(spin.element) 244 245 # Loop over the PCS data. 246 for align_id in cdp.pcs_ids: 247 # Loop over the spin types. 248 for i in range(len(types)): 249 # Append a new list for this alignment. 250 data.append([]) 251 252 # Errors present? 253 err_flag = False 254 for spin in spin_loop(): 255 # Skip deselected spins. 256 if not spin.select: 257 continue 258 259 # Error present. 260 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 261 err_flag = True 262 break 263 264 # Loop over the spins. 265 for spin, spin_id in spin_loop(return_id=True): 266 # Skip deselected spins. 267 if not spin.select: 268 continue 269 270 # Incorrect spin type. 271 if spin.element != types[i]: 272 continue 273 274 # Skip if data is missing. 275 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs.keys() or not align_id in spin.pcs_bc.keys(): 276 continue 277 278 # Append the data. 279 data[-1].append([spin.pcs_bc[align_id], spin.pcs[align_id]]) 280 281 # Errors. 282 if err_flag: 283 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 284 data[-1][-1].append(spin.pcs_err[align_id]) 285 else: 286 data[-1][-1].append(None) 287 288 # Label. 289 data[-1][-1].append(spin_id) 290 291 # The data size. 292 size = len(data) 293 294 # Only one data set. 295 data = [data] 296 297 # Graph type. 298 if err_flag: 299 graph_type = 'xydy' 300 else: 301 graph_type = 'xy' 302 303 # Grace file. 304 if format == 'grace': 305 # The set names. 306 set_names = [None] 307 for i in range(len(cdp.pcs_ids)): 308 for j in range(len(types)): 309 set_names.append("%s (%s)" % (cdp.pcs_ids[i], types[j])) 310 311 # The header. 312 grace.write_xy_header(file=file, title="PCS correlation plot", sets=size, set_names=set_names, linestyle=[2]+[0]*size, data_type=['pcs_bc', 'pcs'], axis_min=[-0.5, -0.5], axis_max=[0.5, 0.5], legend_pos=[1, 0.5]) 313 314 # The main data. 315 grace.write_xy_data(data=data, file=file, graph_type=graph_type)

316 317

318 -def delete(align_id=None):

319 """Delete the PCS data corresponding to the alignment ID. 320 321 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted. 322 @type align_id: str or None 323 """ 324 325 # Test if the current data pipe exists. 326 pipes.test() 327 328 # Test if sequence data exists. 329 if not exists_mol_res_spin_data(): 330 raise RelaxNoSequenceError 331 332 # Check that the ID exists. 333 if align_id and not align_id in cdp.pcs_ids: 334 raise RelaxError("The PCS alignment id '%s' does not exist" % align_id) 335 336 # The IDs. 337 if not align_id: 338 ids = deepcopy(cdp.pcs_ids) 339 else: 340 ids = [align_id] 341 342 # Loop over the alignments, removing all the corresponding data. 343 for id in ids: 344 # The PCS ID. 345 cdp.pcs_ids.pop(cdp.pcs_ids.index(id)) 346 347 # The data type. 348 if hasattr(cdp, 'pcs_data_types') and id in cdp.pcs_data_types: 349 cdp.pcs_data_types.pop(id) 350 351 # The spin data. 352 for spin in spin_loop(): 353 # The data. 354 if hasattr(spin, 'pcs') and id in spin.pcs: 355 spin.pcs.pop(id) 356 357 # The error. 358 if hasattr(spin, 'pcs_err') and id in spin.pcs_err: 359 spin.pcs_err.pop(id) 360 361 # Clean the global data. 362 if not hasattr(cdp, 'rdc_ids') or id not in cdp.rdc_ids: 363 cdp.align_ids.pop(id)

364 365

366 -def display(align_id=None, bc=False):

367 """Display the PCS data corresponding to the alignment ID. 368 369 @keyword align_id: The alignment tensor ID string. 370 @type align_id: str 371 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed. 372 @type bc: bool 373 """ 374 375 # Call the write method with sys.stdout as the file. 376 write(align_id=align_id, file=sys.stdout, bc=bc)

377 378

379 -def q_factors(spin_id=None):

380 """Calculate the Q-factors for the PCS data. 381 382 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins. 383 @type spin_id: None or str 384 """ 385 386 # No PCSs, so no Q factors can be calculated. 387 if not hasattr(cdp, 'pcs_ids') or not len(cdp.pcs_ids): 388 warn(RelaxWarning("No PCS data exists, Q factors cannot be calculated.")) 389 return 390 391 # Q-factor dictionary. 392 cdp.q_factors_pcs = {} 393 394 # Loop over the alignments. 395 for align_id in cdp.pcs_ids: 396 # Init. 397 pcs2_sum = 0.0 398 sse = 0.0 399 400 # Spin loop. 401 spin_count = 0 402 pcs_data = False 403 pcs_bc_data = False 404 for spin in spin_loop(spin_id): 405 # Skip deselected spins. 406 if not spin.select: 407 continue 408 409 # Increment the spin counter. 410 spin_count += 1 411 412 # Data checks. 413 if hasattr(spin, 'pcs') and align_id in spin.pcs: 414 pcs_data = True 415 if hasattr(spin, 'pcs_bc') and align_id in spin.pcs_bc: 416 pcs_bc_data = True 417 418 # Skip spins without PCS data. 419 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs or spin.pcs[align_id] == None or not align_id in spin.pcs_bc or spin.pcs_bc[align_id] == None: 420 continue 421 422 # Sum of squares. 423 sse = sse + (spin.pcs[align_id] - spin.pcs_bc[align_id])**2 424 425 # Sum the PCSs squared (for normalisation). 426 pcs2_sum = pcs2_sum + spin.pcs[align_id]**2 427 428 # The Q-factor for the alignment. 429 if pcs2_sum: 430 Q = sqrt(sse / pcs2_sum) 431 cdp.q_factors_pcs[align_id] = Q 432 433 # Warnings (and then exit). 434 if not spin_count: 435 warn(RelaxWarning("No spins have been used in the calculation.")) 436 return 437 if not pcs_data: 438 warn(RelaxWarning("No PCS data can be found.")) 439 return 440 if not pcs_bc_data: 441 warn(RelaxWarning("No back-calculated PCS data can be found.")) 442 return 443 444 # The total Q-factor. 445 cdp.q_pcs = 0.0 446 for id in cdp.q_factors_pcs: 447 cdp.q_pcs = cdp.q_pcs + cdp.q_factors_pcs[id]**2 448 cdp.q_pcs = cdp.q_pcs / len(cdp.q_factors_pcs) 449 cdp.q_pcs = sqrt(cdp.q_pcs)

450 451

452 -def read(align_id=None, file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):

453 """Read the PCS data from file. 454 455 @param align_id: The alignment tensor ID string. 456 @type align_id: str 457 @param file: The name of the file to open. 458 @type file: str 459 @param dir: The directory containing the file (defaults to the current directory if None). 460 @type dir: str or None 461 @param file_data: An alternative opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 462 @type file_data: list of lists 463 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 464 @type spin_id_col: int or None 465 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 466 @type mol_name_col: int or None 467 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 468 @type res_name_col: int or None 469 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 470 @type res_num_col: int or None 471 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 472 @type spin_name_col: int or None 473 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 474 @type spin_num_col: int or None 475 @keyword data_col: The column containing the PCS data in Hz. 476 @type data_col: int or None 477 @keyword error_col: The column containing the PCS errors. 478 @type error_col: int or None 479 @keyword sep: The column separator which, if None, defaults to whitespace. 480 @type sep: str or None 481 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 482 @type spin_id: None or str 483 """ 484 485 # Test if the current data pipe exists. 486 pipes.test() 487 488 # Test if sequence data exists. 489 if not exists_mol_res_spin_data(): 490 raise RelaxNoSequenceError 491 492 # Either the data or error column must be supplied. 493 if data_col == None and error_col == None: 494 raise RelaxError("One of either the data or error column must be supplied.") 495 496 497 # Spin specific data. 498 ##################### 499 500 # Loop over the PCS data. 501 mol_names = [] 502 res_nums = [] 503 res_names = [] 504 spin_nums = [] 505 spin_names = [] 506 values = [] 507 errors = [] 508 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id): 509 # Unpack. 510 if data_col and error_col: 511 mol_name, res_num, res_name, spin_num, spin_name, value, error = data 512 elif data_col: 513 mol_name, res_num, res_name, spin_num, spin_name, value = data 514 error = None 515 else: 516 mol_name, res_num, res_name, spin_num, spin_name, error = data 517 value = None 518 519 # Test the error value (cannot be 0.0). 520 if error == 0.0: 521 raise RelaxError("An invalid error value of zero has been encountered.") 522 523 # Get the corresponding spin container. 524 id = generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 525 spin = return_spin(id) 526 if spin == None: 527 warn(RelaxNoSpinWarning(id)) 528 continue 529 530 # Add the data. 531 if data_col: 532 # Initialise. 533 if not hasattr(spin, 'pcs'): 534 spin.pcs = {} 535 536 # Append the value. 537 spin.pcs[align_id] = value 538 539 # Add the error. 540 if error_col: 541 # Initialise. 542 if not hasattr(spin, 'pcs_err'): 543 spin.pcs_err = {} 544 545 # Append the error. 546 spin.pcs_err[align_id] = error 547 548 # Append the data for printout. 549 mol_names.append(mol_name) 550 res_nums.append(res_num) 551 res_names.append(res_name) 552 spin_nums.append(spin_num) 553 spin_names.append(spin_name) 554 values.append(value) 555 errors.append(error) 556 557 # Print out. 558 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error') 559 560 561 # Global (non-spin specific) data. 562 ################################## 563 564 # No data, so return. 565 if not len(values): 566 return 567 568 # Initialise. 569 if not hasattr(cdp, 'align_ids'): 570 cdp.align_ids = [] 571 if not hasattr(cdp, 'pcs_ids'): 572 cdp.pcs_ids = [] 573 574 # Add the PCS id string. 575 if align_id not in cdp.align_ids: 576 cdp.align_ids.append(align_id) 577 if align_id not in cdp.pcs_ids: 578 cdp.pcs_ids.append(align_id)

579 580

581 -def weight(align_id=None, spin_id=None, weight=1.0):

582 """Set optimisation weights on the PCS data. 583 584 @keyword align_id: The alignment tensor ID string. 585 @type align_id: str 586 @keyword spin_id: The spin ID string. 587 @type spin_id: None or str 588 @keyword weight: The optimisation weight. The higher the value, the more importance the PCS will have. 589 @type weight: float or int. 590 """ 591 592 # Test if sequence data exists. 593 if not exists_mol_res_spin_data(): 594 raise RelaxNoSequenceError 595 596 # Test if data corresponding to 'align_id' exists. 597 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 598 raise RelaxNoPCSError(align_id) 599 600 # Loop over the spins. 601 for spin in spin_loop(spin_id): 602 # No data structure. 603 if not hasattr(spin, 'pcs_weight'): 604 spin.pcs_weight = {} 605 606 # Set the weight. 607 spin.pcs_weight[align_id] = weight

608 609

610 -def write(align_id=None, file=None, dir=None, bc=False, force=False):

611 """Display the PCS data corresponding to the alignment ID. 612 613 @keyword align_id: The alignment tensor ID string. 614 @type align_id: str 615 @keyword file: The file name or object to write to. 616 @type file: str or file object 617 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 618 @type dir: str 619 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. 620 @type bc: bool 621 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 622 @type force: bool 623 """ 624 625 # Test if the current pipe exists. 626 pipes.test() 627 628 # Test if the sequence data is loaded. 629 if not exists_mol_res_spin_data(): 630 raise RelaxNoSequenceError 631 632 # Test if data corresponding to 'align_id' exists. 633 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 634 raise RelaxNoPCSError(align_id) 635 636 # Open the file for writing. 637 file = open_write_file(file, dir, force) 638 639 # Loop over the spins and collect the data. 640 mol_names = [] 641 res_nums = [] 642 res_names = [] 643 spin_nums = [] 644 spin_names = [] 645 values = [] 646 errors = [] 647 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 648 # Skip spins with no PCSs. 649 if not bc and (not hasattr(spin, 'pcs') or not align_id in spin.pcs.keys()): 650 continue 651 elif bc and (not hasattr(spin, 'pcs_bc') or align_id not in spin.pcs_bc.keys()): 652 continue 653 654 # Append the spin data. 655 mol_names.append(mol_name) 656 res_nums.append(res_num) 657 res_names.append(res_name) 658 spin_nums.append(spin.num) 659 spin_names.append(spin.name) 660 661 # The value. 662 if bc: 663 values.append(spin.pcs_bc[align_id]) 664 else: 665 values.append(spin.pcs[align_id]) 666 667 # The error. 668 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 669 errors.append(spin.pcs_err[align_id]) 670 else: 671 errors.append(None) 672 673 # Write out. 674 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error')

675

Source Code for Module generic_fns.pcs