generic_fns.pcs

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2012 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for the manipulation of pseudocontact shift data.""" 25 26 # Python module imports. 27 from copy import deepcopy 28 from math import pi, sqrt 29 from numpy import array, float64, ones, zeros 30 from numpy.linalg import norm 31 import sys 32 from warnings import warn 33 34 # relax module imports. 35 from generic_fns import grace, pipes 36 from generic_fns.align_tensor import get_tensor_index 37 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, return_spin, spin_loop 38 from maths_fns.pcs import ave_pcs_tensor 39 from physical_constants import g1H, pcs_constant 40 from relax_errors import RelaxError, RelaxNoPdbError, RelaxNoSequenceError 41 from relax_io import open_write_file, read_spin_data, write_spin_data 42 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 43 44

45 -def back_calc(align_id=None):

46 """Back calculate the PCS from the alignment tensor. 47 48 @keyword align_id: The alignment tensor ID string. 49 @type align_id: str 50 """ 51 52 # Arg check. 53 if align_id and align_id not in cdp.align_ids: 54 raise RelaxError, "The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids) 55 56 # Convert the align IDs to an array, or take all IDs. 57 if align_id: 58 align_ids = [align_id] 59 else: 60 align_ids = cdp.align_ids 61 62 # Add the ID to the PCS IDs, if needed. 63 for align_id in align_ids: 64 # Init. 65 if not hasattr(cdp, 'pcs_ids'): 66 cdp.pcs_ids = [] 67 68 # Add the ID. 69 if align_id not in cdp.pcs_ids: 70 cdp.pcs_ids.append(align_id) 71 72 # The weights. 73 weights = ones(cdp.N, float64) / cdp.N 74 75 # Unit vector data structure init. 76 unit_vect = zeros((cdp.N, 3), float64) 77 78 # Loop over the spins. 79 for spin in spin_loop(): 80 # Skip spins with no position. 81 if not hasattr(spin, 'pos'): 82 continue 83 84 # Atom positions. 85 pos = spin.pos 86 if type(pos[0]) in [float, float64]: 87 pos = [pos] * cdp.N 88 89 # Loop over the alignments. 90 for id in align_ids: 91 # Vectors. 92 vect = zeros((cdp.N, 3), float64) 93 r = zeros(cdp.N, float64) 94 dj = zeros(cdp.N, float64) 95 for c in range(cdp.N): 96 # The vector. 97 vect[c] = pos[c] - cdp.paramagnetic_centre 98 99 # The length. 100 r[c] = norm(vect[c]) 101 102 # Normalise (only if the vector has length). 103 if r[c]: 104 vect[c] = vect[c] / r[c] 105 106 # Calculate the PCS constant. 107 dj[c] = pcs_constant(cdp.temperature[id], cdp.frq[id] * 2.0 * pi / g1H, r[c]/1e10) 108 109 # Initialise if necessary. 110 if not hasattr(spin, 'pcs_bc'): 111 spin.pcs_bc = {} 112 113 # Calculate the PCSs (in ppm). 114 spin.pcs_bc[id] = ave_pcs_tensor(dj, vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights) * 1e6

115 116

117 -def centre(pos=None, atom_id=None, pipe=None, verbosity=1, ave_pos=False, force=False):

118 """Specify the atom in the loaded structure corresponding to the paramagnetic centre. 119 120 @keyword pos: The atomic position. If set, the atom_id string will be ignored. 121 @type pos: list of float 122 @keyword atom_id: The atom identification string. 123 @type atom_id: str 124 @keyword pipe: An alternative data pipe to extract the paramagnetic centre from. 125 @type pipe: None or str 126 @keyword verbosity: The amount of information to print out. The bigger the number, the more information. 127 @type verbosity: int 128 @keyword ave_pos: A flag which if True causes the atomic positions from multiple models to be averaged. 129 @type ave_pos: bool 130 @keyword force: A flag which if True will cause the current PCS centre to be overwritten. 131 """ 132 133 # The data pipe. 134 if pipe == None: 135 pipe = pipes.cdp_name() 136 137 # Test the data pipe. 138 pipes.test(pipe) 139 140 # Get the data pipes. 141 source_dp = pipes.get_pipe(pipe) 142 143 # Test if the structure has been loaded. 144 if not hasattr(source_dp, 'structure'): 145 raise RelaxNoPdbError 146 147 # Test the centre has already been set. 148 if not force and hasattr(cdp, 'paramagnetic_centre'): 149 raise RelaxError("The paramagnetic centre has already been set to the coordinates " + repr(cdp.paramagnetic_centre) + ".") 150 151 # Position is supplied. 152 if pos != None: 153 centre = array(pos) 154 num_pos = 1 155 full_pos_list = [] 156 157 # Position from a loaded structure. 158 else: 159 # Get the positions. 160 centre = zeros(3, float64) 161 full_pos_list = [] 162 num_pos = 0 163 for spin, spin_id in spin_loop(atom_id, pipe=pipe, return_id=True): 164 # No atomic positions. 165 if not hasattr(spin, 'pos'): 166 continue 167 168 # Spin position list. 169 if isinstance(spin.pos[0], float) or isinstance(spin.pos[0], float64): 170 pos_list = [spin.pos] 171 else: 172 pos_list = spin.pos 173 174 # Loop over the model positions. 175 for pos in pos_list: 176 full_pos_list.append(pos) 177 centre = centre + array(pos) 178 num_pos = num_pos + 1 179 180 # No positional information! 181 if not num_pos: 182 raise RelaxError("No positional information could be found for the spin '%s'." % atom_id) 183 184 # Averaging. 185 centre = centre / float(num_pos) 186 187 # Print out. 188 if verbosity: 189 print("Paramagnetic centres located at:") 190 for pos in full_pos_list: 191 print((" [%8.3f, %8.3f, %8.3f]" % (pos[0], pos[1], pos[2]))) 192 print("\nAverage paramagnetic centre located at:") 193 print((" [%8.3f, %8.3f, %8.3f]" % (centre[0], centre[1], centre[2]))) 194 195 # Set the centre (place it into the current data pipe). 196 if ave_pos: 197 if verbosity: 198 print("\nUsing the average paramagnetic position.") 199 cdp.paramagnetic_centre = centre 200 else: 201 if verbosity: 202 print("\nUsing all paramagnetic positions.") 203 cdp.paramagnetic_centre = full_pos_list

204 205

206 -def corr_plot(format=None, file=None, dir=None, force=False):

207 """Generate a correlation plot of the measured vs. back-calculated PCSs. 208 209 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file. 210 @type format: str or None 211 @keyword file: The file name or object to write to. 212 @type file: str or file object 213 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 214 @type dir: str 215 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 216 @type force: bool 217 """ 218 219 # Test if the current pipe exists. 220 pipes.test() 221 222 # Test if the sequence data is loaded. 223 if not exists_mol_res_spin_data(): 224 raise RelaxNoSequenceError 225 226 # Does PCS data exist? 227 if not hasattr(cdp, 'pcs_ids') or not cdp.pcs_ids: 228 warn(RelaxWarning("No PCS data exists, skipping file creation.")) 229 return 230 231 # Open the file for writing. 232 file = open_write_file(file, dir, force) 233 234 # Init. 235 data = [] 236 237 # The diagonal. 238 data.append([[-100, -100, 0], [100, 100, 0]]) 239 240 # The spin types. 241 types = [] 242 for spin in spin_loop(): 243 if spin.element not in types: 244 types.append(spin.element) 245 246 # Loop over the PCS data. 247 for align_id in cdp.pcs_ids: 248 # Loop over the spin types. 249 for i in range(len(types)): 250 # Append a new list for this alignment. 251 data.append([]) 252 253 # Errors present? 254 err_flag = False 255 for spin in spin_loop(): 256 # Skip deselected spins. 257 if not spin.select: 258 continue 259 260 # Error present. 261 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 262 err_flag = True 263 break 264 265 # Loop over the spins. 266 for spin, spin_id in spin_loop(return_id=True): 267 # Skip deselected spins. 268 if not spin.select: 269 continue 270 271 # Incorrect spin type. 272 if spin.element != types[i]: 273 continue 274 275 # Skip if data is missing. 276 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs.keys() or not align_id in spin.pcs_bc.keys(): 277 continue 278 279 # Append the data. 280 data[-1].append([spin.pcs_bc[align_id], spin.pcs[align_id]]) 281 282 # Errors. 283 if err_flag: 284 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 285 data[-1][-1].append(spin.pcs_err[align_id]) 286 else: 287 data[-1][-1].append(None) 288 289 # Label. 290 data[-1][-1].append(spin_id) 291 292 # The data size. 293 size = len(data) 294 295 # Only one data set. 296 data = [data] 297 298 # Graph type. 299 if err_flag: 300 graph_type = 'xydy' 301 else: 302 graph_type = 'xy' 303 304 # Grace file. 305 if format == 'grace': 306 # The set names. 307 set_names = [None] 308 for i in range(len(cdp.pcs_ids)): 309 for j in range(len(types)): 310 set_names.append("%s (%s)" % (cdp.pcs_ids[i], types[j])) 311 312 # The header. 313 grace.write_xy_header(file=file, title="PCS correlation plot", sets=size, set_names=set_names, linestyle=[2]+[0]*size, data_type=['pcs_bc', 'pcs'], axis_min=[-0.5, -0.5], axis_max=[0.5, 0.5], legend_pos=[1, 0.5]) 314 315 # The main data. 316 grace.write_xy_data(data=data, file=file, graph_type=graph_type)

317 318

319 -def delete(align_id=None):

320 """Delete the PCS data corresponding to the alignment ID. 321 322 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted. 323 @type align_id: str or None 324 """ 325 326 # Test if the current data pipe exists. 327 pipes.test() 328 329 # Test if sequence data exists. 330 if not exists_mol_res_spin_data(): 331 raise RelaxNoSequenceError 332 333 # Check that the ID exists. 334 if align_id and not align_id in cdp.pcs_ids: 335 raise RelaxError("The PCS alignment id '%s' does not exist" % align_id) 336 337 # The IDs. 338 if not align_id: 339 ids = deepcopy(cdp.pcs_ids) 340 else: 341 ids = [align_id] 342 343 # Loop over the alignments, removing all the corresponding data. 344 for id in ids: 345 # The PCS ID. 346 cdp.pcs_ids.pop(cdp.pcs_ids.index(id)) 347 348 # The data type. 349 if hasattr(cdp, 'pcs_data_types') and cdp.pcs_data_types.has_key(id): 350 cdp.pcs_data_types.pop(id) 351 352 # The spin data. 353 for spin in spin_loop(): 354 # The data. 355 if hasattr(spin, 'pcs') and spin.pcs.has_key(id): 356 spin.pcs.pop(id) 357 358 # The error. 359 if hasattr(spin, 'pcs_err') and spin.pcs_err.has_key(id): 360 spin.pcs_err.pop(id) 361 362 # Clean the global data. 363 if not hasattr(cdp, 'rdc_ids') or id not in cdp.rdc_ids: 364 cdp.align_ids.pop(id)

365 366

367 -def display(align_id=None, bc=False):

368 """Display the PCS data corresponding to the alignment ID. 369 370 @keyword align_id: The alignment tensor ID string. 371 @type align_id: str 372 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed. 373 @type bc: bool 374 """ 375 376 # Call the write method with sys.stdout as the file. 377 write(align_id=align_id, file=sys.stdout, bc=bc)

378 379

380 -def q_factors(spin_id=None):

381 """Calculate the Q-factors for the PCS data. 382 383 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins. 384 @type spin_id: None or str 385 """ 386 387 # No PCSs, so no Q factors can be calculated. 388 if not hasattr(cdp, 'pcs_ids') or not len(cdp.pcs_ids): 389 warn(RelaxWarning("No PCS data exists, Q factors cannot be calculated.")) 390 return 391 392 # Q-factor dictionary. 393 cdp.q_factors_pcs = {} 394 395 # Loop over the alignments. 396 for align_id in cdp.pcs_ids: 397 # Init. 398 pcs2_sum = 0.0 399 sse = 0.0 400 401 # Spin loop. 402 spin_count = 0 403 pcs_data = False 404 pcs_bc_data = False 405 for spin in spin_loop(spin_id): 406 # Skip deselected spins. 407 if not spin.select: 408 continue 409 410 # Increment the spin counter. 411 spin_count += 1 412 413 # Data checks. 414 if hasattr(spin, 'pcs') and spin.pcs.has_key(align_id): 415 pcs_data = True 416 if hasattr(spin, 'pcs_bc') and spin.pcs_bc.has_key(align_id): 417 pcs_bc_data = True 418 419 # Skip spins without PCS data. 420 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not spin.pcs.has_key(align_id) or spin.pcs[align_id] == None or not spin.pcs_bc.has_key(align_id) or spin.pcs_bc[align_id] == None: 421 continue 422 423 # Sum of squares. 424 sse = sse + (spin.pcs[align_id] - spin.pcs_bc[align_id])**2 425 426 # Sum the PCSs squared (for normalisation). 427 pcs2_sum = pcs2_sum + spin.pcs[align_id]**2 428 429 # The Q-factor for the alignment. 430 if pcs2_sum: 431 Q = sqrt(sse / pcs2_sum) 432 cdp.q_factors_pcs[align_id] = Q 433 434 # Warnings (and then exit). 435 if not spin_count: 436 warn(RelaxWarning("No spins have been used in the calculation.")) 437 return 438 if not pcs_data: 439 warn(RelaxWarning("No PCS data can be found.")) 440 return 441 if not pcs_bc_data: 442 warn(RelaxWarning("No back-calculated PCS data can be found.")) 443 return 444 445 # The total Q-factor. 446 cdp.q_pcs = 0.0 447 for id in cdp.q_factors_pcs: 448 cdp.q_pcs = cdp.q_pcs + cdp.q_factors_pcs[id]**2 449 cdp.q_pcs = cdp.q_pcs / len(cdp.q_factors_pcs) 450 cdp.q_pcs = sqrt(cdp.q_pcs)

451 452

453 -def read(align_id=None, file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):

454 """Read the PCS data from file. 455 456 @param align_id: The alignment tensor ID string. 457 @type align_id: str 458 @param file: The name of the file to open. 459 @type file: str 460 @param dir: The directory containing the file (defaults to the current directory if None). 461 @type dir: str or None 462 @param file_data: An alternative opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 463 @type file_data: list of lists 464 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 465 @type spin_id_col: int or None 466 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 467 @type mol_name_col: int or None 468 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 469 @type res_name_col: int or None 470 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 471 @type res_num_col: int or None 472 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 473 @type spin_name_col: int or None 474 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 475 @type spin_num_col: int or None 476 @keyword data_col: The column containing the PCS data in Hz. 477 @type data_col: int or None 478 @keyword error_col: The column containing the PCS errors. 479 @type error_col: int or None 480 @keyword sep: The column separator which, if None, defaults to whitespace. 481 @type sep: str or None 482 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 483 @type spin_id: None or str 484 """ 485 486 # Test if the current data pipe exists. 487 pipes.test() 488 489 # Test if sequence data exists. 490 if not exists_mol_res_spin_data(): 491 raise RelaxNoSequenceError 492 493 # Either the data or error column must be supplied. 494 if data_col == None and error_col == None: 495 raise RelaxError("One of either the data or error column must be supplied.") 496 497 498 # Spin specific data. 499 ##################### 500 501 # Loop over the PCS data. 502 mol_names = [] 503 res_nums = [] 504 res_names = [] 505 spin_nums = [] 506 spin_names = [] 507 values = [] 508 errors = [] 509 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id): 510 # Unpack. 511 if data_col and error_col: 512 mol_name, res_num, res_name, spin_num, spin_name, value, error = data 513 elif data_col: 514 mol_name, res_num, res_name, spin_num, spin_name, value = data 515 error = None 516 else: 517 mol_name, res_num, res_name, spin_num, spin_name, error = data 518 value = None 519 520 # Test the error value (cannot be 0.0). 521 if error == 0.0: 522 raise RelaxError("An invalid error value of zero has been encountered.") 523 524 # Get the corresponding spin container. 525 id = generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 526 spin = return_spin(id) 527 if spin == None: 528 warn(RelaxNoSpinWarning(id)) 529 continue 530 531 # Add the data. 532 if data_col: 533 # Initialise. 534 if not hasattr(spin, 'pcs'): 535 spin.pcs = {} 536 537 # Append the value. 538 spin.pcs[align_id] = value 539 540 # Add the error. 541 if error_col: 542 # Initialise. 543 if not hasattr(spin, 'pcs_err'): 544 spin.pcs_err = {} 545 546 # Append the error. 547 spin.pcs_err[align_id] = error 548 549 # Append the data for printout. 550 mol_names.append(mol_name) 551 res_nums.append(res_num) 552 res_names.append(res_name) 553 spin_nums.append(spin_num) 554 spin_names.append(spin_name) 555 values.append(value) 556 errors.append(error) 557 558 # Print out. 559 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error') 560 561 562 # Global (non-spin specific) data. 563 ################################## 564 565 # No data, so return. 566 if not len(values): 567 return 568 569 # Initialise. 570 if not hasattr(cdp, 'align_ids'): 571 cdp.align_ids = [] 572 if not hasattr(cdp, 'pcs_ids'): 573 cdp.pcs_ids = [] 574 575 # Add the PCS id string. 576 if align_id not in cdp.align_ids: 577 cdp.align_ids.append(align_id) 578 if align_id not in cdp.pcs_ids: 579 cdp.pcs_ids.append(align_id)

580 581

582 -def weight(align_id=None, spin_id=None, weight=1.0):

583 """Set optimisation weights on the PCS data. 584 585 @keyword align_id: The alignment tensor ID string. 586 @type align_id: str 587 @keyword spin_id: The spin ID string. 588 @type spin_id: None or str 589 @keyword weight: The optimisation weight. The higher the value, the more importance the PCS will have. 590 @type weight: float or int. 591 """ 592 593 # Test if sequence data exists. 594 if not exists_mol_res_spin_data(): 595 raise RelaxNoSequenceError 596 597 # Test if data corresponding to 'align_id' exists. 598 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 599 raise RelaxNoPCSError(align_id) 600 601 # Loop over the spins. 602 for spin in spin_loop(spin_id): 603 # No data structure. 604 if not hasattr(spin, 'pcs_weight'): 605 spin.pcs_weight = {} 606 607 # Set the weight. 608 spin.pcs_weight[align_id] = weight

609 610

611 -def write(align_id=None, file=None, dir=None, bc=False, force=False):

612 """Display the PCS data corresponding to the alignment ID. 613 614 @keyword align_id: The alignment tensor ID string. 615 @type align_id: str 616 @keyword file: The file name or object to write to. 617 @type file: str or file object 618 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 619 @type dir: str 620 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. 621 @type bc: bool 622 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 623 @type force: bool 624 """ 625 626 # Test if the current pipe exists. 627 pipes.test() 628 629 # Test if the sequence data is loaded. 630 if not exists_mol_res_spin_data(): 631 raise RelaxNoSequenceError 632 633 # Test if data corresponding to 'align_id' exists. 634 if not hasattr(cdp, 'pcs_ids') or align_id not in cdp.pcs_ids: 635 raise RelaxNoPCSError(align_id) 636 637 # Open the file for writing. 638 file = open_write_file(file, dir, force) 639 640 # Loop over the spins and collect the data. 641 mol_names = [] 642 res_nums = [] 643 res_names = [] 644 spin_nums = [] 645 spin_names = [] 646 values = [] 647 errors = [] 648 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 649 # Skip spins with no PCSs. 650 if not bc and (not hasattr(spin, 'pcs') or not align_id in spin.pcs.keys()): 651 continue 652 elif bc and (not hasattr(spin, 'pcs_bc') or align_id not in spin.pcs_bc.keys()): 653 continue 654 655 # Append the spin data. 656 mol_names.append(mol_name) 657 res_nums.append(res_num) 658 res_names.append(res_name) 659 spin_nums.append(spin.num) 660 spin_names.append(spin.name) 661 662 # The value. 663 if bc: 664 values.append(spin.pcs_bc[align_id]) 665 else: 666 values.append(spin.pcs[align_id]) 667 668 # The error. 669 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 670 errors.append(spin.pcs_err[align_id]) 671 else: 672 errors.append(None) 673 674 # Write out. 675 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error')

676

Source Code for Module generic_fns.pcs