pipe_control.pcs

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2014 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for the manipulation of pseudo-contact shift data.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from math import pi, sqrt 28 from numpy import array, float64, int32, ones, std, zeros 29 from numpy.linalg import norm 30 from random import gauss 31 import sys 32 from warnings import warn 33 34 # relax module imports. 35 from lib.alignment.pcs import ave_pcs_tensor, pcs_tensor 36 from lib.errors import RelaxError, RelaxNoAlignError, RelaxNoPdbError, RelaxNoPCSError, RelaxNoSequenceError 37 from lib.geometry.vectors import random_unit_vector 38 from lib.io import open_write_file 39 from lib.physical_constants import g1H, pcs_constant 40 from lib.sequence import read_spin_data, write_spin_data 41 from lib.warnings import RelaxWarning, RelaxNoSpinWarning 42 from pipe_control import grace, pipes 43 from pipe_control.align_tensor import get_tensor_index, get_tensor_object, opt_uses_align_data, opt_uses_tensor 44 from pipe_control.mol_res_spin import exists_mol_res_spin_data, generate_spin_id_unique, is_pseudoatom, return_spin, spin_index_loop, spin_loop 45 46

47 -def back_calc(align_id=None):

48 """Back calculate the PCS from the alignment tensor. 49 50 @keyword align_id: The alignment tensor ID string. 51 @type align_id: str 52 """ 53 54 # Check the pipe setup. 55 check_pipe_setup(pcs_id=align_id, sequence=True, N=True, tensors=True, paramag_centre=True) 56 57 # Convert the align IDs to an array, or take all IDs. 58 if align_id: 59 align_ids = [align_id] 60 else: 61 align_ids = cdp.align_ids 62 63 # Add the ID to the PCS IDs, if needed. 64 for align_id in align_ids: 65 # Init. 66 if not hasattr(cdp, 'pcs_ids'): 67 cdp.pcs_ids = [] 68 69 # Add the ID. 70 if align_id not in cdp.pcs_ids: 71 cdp.pcs_ids.append(align_id) 72 73 # The weights. 74 weights = ones(cdp.N, float64) / cdp.N 75 76 # Unit vector data structure init. 77 unit_vect = zeros((cdp.N, 3), float64) 78 79 # Loop over the spins. 80 count = 0 81 for spin in spin_loop(): 82 # Skip spins with no position. 83 if not hasattr(spin, 'pos'): 84 continue 85 86 # Atom positions. 87 pos = spin.pos 88 if type(pos[0]) in [float, float64]: 89 pos = [pos] * cdp.N 90 91 # Loop over the alignments. 92 for id in align_ids: 93 # Vectors. 94 vect = zeros((cdp.N, 3), float64) 95 r = zeros(cdp.N, float64) 96 dj = zeros(cdp.N, float64) 97 for c in range(cdp.N): 98 # The vector. 99 vect[c] = pos[c] - cdp.paramagnetic_centre 100 101 # The length. 102 r[c] = norm(vect[c]) 103 104 # Normalise (only if the vector has length). 105 if r[c]: 106 vect[c] = vect[c] / r[c] 107 108 # Calculate the PCS constant. 109 dj[c] = pcs_constant(cdp.temperature[id], cdp.spectrometer_frq[id] * 2.0 * pi / g1H, r[c]/1e10) 110 111 # Initialise if necessary. 112 if not hasattr(spin, 'pcs_bc'): 113 spin.pcs_bc = {} 114 115 # Calculate the PCSs (in ppm). 116 spin.pcs_bc[id] = ave_pcs_tensor(dj, vect, cdp.N, cdp.align_tensors[get_tensor_index(align_id=id)].A, weights=weights) * 1e6 117 118 # Increment the counter. 119 count += 1 120 121 # No PCSs calculated. 122 if not count: 123 warn(RelaxWarning("No PCSs have been back calculated, probably due to missing spin position information."))

124 125

126 -def centre(pos=None, atom_id=None, pipe=None, verbosity=1, ave_pos=False, force=False):

127 """Specify the atom in the loaded structure corresponding to the paramagnetic centre. 128 129 @keyword pos: The atomic position. If set, the atom_id string will be ignored. 130 @type pos: list of float 131 @keyword atom_id: The atom identification string. 132 @type atom_id: str 133 @keyword pipe: An alternative data pipe to extract the paramagnetic centre from. 134 @type pipe: None or str 135 @keyword verbosity: The amount of information to print out. The bigger the number, the more information. 136 @type verbosity: int 137 @keyword ave_pos: A flag which if True causes the atomic positions from multiple models to be averaged. 138 @type ave_pos: bool 139 @keyword force: A flag which if True will cause the current PCS centre to be overwritten. 140 """ 141 142 # The data pipe. 143 if pipe == None: 144 pipe = pipes.cdp_name() 145 146 # Check the pipe setup. 147 check_pipe_setup(pipe=pipe) 148 149 # Get the data pipes. 150 source_dp = pipes.get_pipe(pipe) 151 152 # Test if the structure has been loaded. 153 if not hasattr(source_dp, 'structure'): 154 raise RelaxNoPdbError 155 156 # Test the centre has already been set. 157 if not force and hasattr(cdp, 'paramagnetic_centre'): 158 raise RelaxError("The paramagnetic centre has already been set to the coordinates " + repr(cdp.paramagnetic_centre) + ".") 159 160 # Position is supplied. 161 if pos != None: 162 centre = array(pos) 163 num_pos = 1 164 full_pos_list = [] 165 166 # Position from a loaded structure. 167 else: 168 # Get the positions. 169 centre = zeros(3, float64) 170 full_pos_list = [] 171 num_pos = 0 172 for spin, spin_id in spin_loop(atom_id, pipe=pipe, return_id=True): 173 # No atomic positions. 174 if not hasattr(spin, 'pos'): 175 continue 176 177 # Spin position list. 178 if isinstance(spin.pos[0], float) or isinstance(spin.pos[0], float64): 179 pos_list = [spin.pos] 180 else: 181 pos_list = spin.pos 182 183 # Loop over the model positions. 184 for pos in pos_list: 185 full_pos_list.append(pos) 186 centre = centre + array(pos) 187 num_pos = num_pos + 1 188 189 # No positional information! 190 if not num_pos: 191 raise RelaxError("No positional information could be found for the spin '%s'." % atom_id) 192 193 # Averaging. 194 centre = centre / float(num_pos) 195 196 # Print out. 197 if verbosity: 198 print("Paramagnetic centres located at:") 199 for pos in full_pos_list: 200 print(" [%8.3f, %8.3f, %8.3f]" % (pos[0], pos[1], pos[2])) 201 print("\nAverage paramagnetic centre located at:") 202 print(" [%8.3f, %8.3f, %8.3f]" % (centre[0], centre[1], centre[2])) 203 204 # Set the centre (place it into the current data pipe). 205 if ave_pos: 206 if verbosity: 207 print("\nUsing the average paramagnetic position.") 208 cdp.paramagnetic_centre = centre 209 else: 210 if verbosity: 211 print("\nUsing all paramagnetic positions.") 212 cdp.paramagnetic_centre = full_pos_list

213 214

215 -def check_pipe_setup(pipe=None, pcs_id=None, sequence=False, N=False, tensors=False, pcs=False, paramag_centre=False):

216 """Check that the current data pipe has been setup sufficiently. 217 218 @keyword pipe: The data pipe to check, defaulting to the current pipe. 219 @type pipe: None or str 220 @keyword pcs_id: The PCS ID string to check for in cdp.pcs_ids. 221 @type pcs_id: None or str 222 @keyword sequence: A flag which when True will invoke the sequence data check. 223 @type sequence: bool 224 @keyword N: A flag which if True will check that cdp.N is set. 225 @type N: bool 226 @keyword tensors: A flag which if True will check that alignment tensors exist. 227 @type tensors: bool 228 @keyword pcs: A flag which if True will check that PCSs exist. 229 @type pcs: bool 230 @keyword paramag_centre: A flag which if True will check that the paramagnetic centre has been set. 231 @type paramag_centre: bool 232 """ 233 234 # The data pipe. 235 if pipe == None: 236 pipe = pipes.cdp_name() 237 238 # Get the data pipe. 239 dp = pipes.get_pipe(pipe) 240 241 # Test if the current data pipe exists. 242 pipes.test(pipe) 243 244 # Test if sequence data exists. 245 if sequence and not exists_mol_res_spin_data(pipe): 246 raise RelaxNoSequenceError(pipe) 247 248 # Check for dp.N. 249 if N and not hasattr(dp, 'N'): 250 raise RelaxError("The number of states N has not been set.") 251 252 # Check that alignment tensors are present. 253 if tensors and (not hasattr(dp, 'align_tensors') or len(dp.align_tensors) == 0): 254 raise RelaxNoTensorError('alignment') 255 256 # Test for the alignment ID. 257 if pcs_id and (not hasattr(dp, 'align_ids') or pcs_id not in dp.align_ids): 258 raise RelaxNoAlignError(pcs_id, pipe) 259 260 # Test if PCS data exists. 261 if pcs and not hasattr(dp, 'align_ids'): 262 raise RelaxNoAlignError() 263 if pcs and not hasattr(dp, 'pcs_ids'): 264 raise RelaxNoPCSError() 265 elif pcs and pcs_id and pcs_id not in dp.pcs_ids: 266 raise RelaxNoPCSError(pcs_id) 267 268 # Test if the paramagnetic centre is set. 269 if paramag_centre and not hasattr(cdp, 'paramagnetic_centre'): 270 raise RelaxError("The paramagnetic centre has not been defined.")

271 272

273 -def copy(pipe_from=None, pipe_to=None, align_id=None):

274 """Copy the PCS data from one data pipe to another. 275 276 @keyword pipe_from: The data pipe to copy the PCS data from. This defaults to the current data pipe. 277 @type pipe_from: str 278 @keyword pipe_to: The data pipe to copy the PCS data to. This defaults to the current data pipe. 279 @type pipe_to: str 280 @param align_id: The alignment ID string. 281 @type align_id: str 282 """ 283 284 # Defaults. 285 if pipe_from == None and pipe_to == None: 286 raise RelaxError("The pipe_from and pipe_to arguments cannot both be set to None.") 287 elif pipe_from == None: 288 pipe_from = pipes.cdp_name() 289 elif pipe_to == None: 290 pipe_to = pipes.cdp_name() 291 292 # Check the pipe setup. 293 check_pipe_setup(pipe=pipe_from, pcs_id=align_id, sequence=True, pcs=True) 294 check_pipe_setup(pipe=pipe_to, sequence=True) 295 296 # Get the data pipes. 297 dp_from = pipes.get_pipe(pipe_from) 298 dp_to = pipes.get_pipe(pipe_to) 299 300 # The IDs. 301 if align_id == None: 302 align_ids = dp_from.align_ids 303 else: 304 align_ids = [align_id] 305 306 # Init target pipe global structures. 307 if not hasattr(dp_to, 'align_ids'): 308 dp_to.align_ids = [] 309 if not hasattr(dp_to, 'pcs_ids'): 310 dp_to.pcs_ids = [] 311 312 # Loop over the align IDs. 313 for align_id in align_ids: 314 # Copy the global data. 315 if align_id not in dp_to.align_ids and align_id not in dp_to.align_ids: 316 dp_to.align_ids.append(align_id) 317 if align_id in dp_from.pcs_ids and align_id not in dp_to.pcs_ids: 318 dp_to.pcs_ids.append(align_id) 319 320 # Spin loop. 321 for mol_index, res_index, spin_index in spin_index_loop(): 322 # Alias the spin containers. 323 spin_from = dp_from.mol[mol_index].res[res_index].spin[spin_index] 324 spin_to = dp_to.mol[mol_index].res[res_index].spin[spin_index] 325 326 # No data or errors. 327 if (not hasattr(spin_from, 'pcs') or not align_id in spin_from.pcs) and (not hasattr(spin_from, 'pcs_err') or not align_id in spin_from.pcs_err): 328 continue 329 330 # Initialise the spin data if necessary. 331 if hasattr(spin_from, 'pcs') and not hasattr(spin_to, 'pcs'): 332 spin_to.pcs = {} 333 if hasattr(spin_from, 'pcs_err') and not hasattr(spin_to, 'pcs_err'): 334 spin_to.pcs_err = {} 335 336 # Copy the value and error from pipe_from. 337 if hasattr(spin_from, 'pcs'): 338 spin_to.pcs[align_id] = spin_from.pcs[align_id] 339 if hasattr(spin_from, 'pcs_err'): 340 spin_to.pcs_err[align_id] = spin_from.pcs_err[align_id]

341 342

343 -def corr_plot(format=None, file=None, dir=None, force=False):

344 """Generate a correlation plot of the measured vs. back-calculated PCSs. 345 346 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file. 347 @type format: str or None 348 @keyword file: The file name or object to write to. 349 @type file: str or file object 350 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 351 @type dir: str 352 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 353 @type force: bool 354 """ 355 356 # Check the pipe setup. 357 check_pipe_setup(sequence=True) 358 359 # Does PCS data exist? 360 if not hasattr(cdp, 'pcs_ids') or not cdp.pcs_ids: 361 warn(RelaxWarning("No PCS data exists, skipping file creation.")) 362 return 363 364 # Open the file for writing. 365 file = open_write_file(file, dir, force) 366 367 # Init. 368 data = [] 369 370 # The diagonal. 371 data.append([[-100, -100, 0], [100, 100, 0]]) 372 373 # The spin types. 374 types = [] 375 for spin in spin_loop(): 376 if spin.element not in types: 377 types.append(spin.element) 378 379 # Loop over the PCS data. 380 for align_id in cdp.pcs_ids: 381 # Loop over the spin types. 382 for i in range(len(types)): 383 # Append a new list for this alignment. 384 data.append([]) 385 386 # Errors present? 387 err_flag = False 388 for spin in spin_loop(): 389 # Skip deselected spins. 390 if not spin.select: 391 continue 392 393 # Error present. 394 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 395 err_flag = True 396 break 397 398 # Loop over the spins. 399 for spin, spin_id in spin_loop(return_id=True): 400 # Skip deselected spins. 401 if not spin.select: 402 continue 403 404 # Incorrect spin type. 405 if spin.element != types[i]: 406 continue 407 408 # Skip if data is missing. 409 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs.keys() or not align_id in spin.pcs_bc.keys(): 410 continue 411 412 # Append the data. 413 data[-1].append([spin.pcs_bc[align_id], spin.pcs[align_id]]) 414 415 # Errors. 416 if err_flag: 417 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 418 data[-1][-1].append(spin.pcs_err[align_id]) 419 else: 420 data[-1][-1].append(None) 421 422 # Label. 423 data[-1][-1].append(spin_id) 424 425 # The data size. 426 size = len(data) 427 428 # Only one data set. 429 data = [data] 430 431 # Graph type. 432 if err_flag: 433 graph_type = 'xydy' 434 else: 435 graph_type = 'xy' 436 437 # Grace file. 438 if format == 'grace': 439 # The set names. 440 set_names = [None] 441 for i in range(len(cdp.pcs_ids)): 442 for j in range(len(types)): 443 set_names.append("%s (%s)" % (cdp.pcs_ids[i], types[j])) 444 445 # The header. 446 grace.write_xy_header(file=file, title="PCS correlation plot", data_type=['pcs_bc', 'pcs'], sets=[size], set_names=[set_names], linestyle=[[2]+[0]*size], legend_pos=[[1, 0.5]]) 447 448 # The main data. 449 grace.write_xy_data(data=data, file=file, graph_type=graph_type)

450 451

452 -def delete(align_id=None):

453 """Delete the PCS data corresponding to the alignment ID. 454 455 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted. 456 @type align_id: str or None 457 """ 458 459 # Check the pipe setup. 460 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True) 461 462 # The IDs. 463 if not align_id: 464 ids = deepcopy(cdp.pcs_ids) 465 else: 466 ids = [align_id] 467 468 # Loop over the alignments, removing all the corresponding data. 469 for id in ids: 470 # The PCS ID. 471 cdp.pcs_ids.pop(cdp.pcs_ids.index(id)) 472 473 # The data type. 474 if hasattr(cdp, 'pcs_data_types') and id in cdp.pcs_data_types: 475 cdp.pcs_data_types.pop(id) 476 477 # The spin data. 478 for spin in spin_loop(): 479 # The data. 480 if hasattr(spin, 'pcs') and id in spin.pcs: 481 spin.pcs.pop(id) 482 483 # The error. 484 if hasattr(spin, 'pcs_err') and id in spin.pcs_err: 485 spin.pcs_err.pop(id) 486 487 # Clean the global data. 488 if not hasattr(cdp, 'rdc_ids') or id not in cdp.rdc_ids: 489 cdp.align_ids.pop(cdp.align_ids.index(id))

490 491

492 -def display(align_id=None, bc=False):

493 """Display the PCS data corresponding to the alignment ID. 494 495 @keyword align_id: The alignment tensor ID string. 496 @type align_id: str 497 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed. 498 @type bc: bool 499 """ 500 501 # Check the pipe setup. 502 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True) 503 504 # Call the write method with sys.stdout as the file. 505 write(align_id=align_id, file=sys.stdout, bc=bc)

506 507

508 -def opt_uses_pcs(align_id):

509 """Determine if the PCS data for the given alignment ID is needed for optimisation. 510 511 @param align_id: The alignment ID string. 512 @type align_id: str 513 @return: True if the PCS data is to be used for optimisation, False otherwise. 514 @rtype: bool 515 """ 516 517 # No alignment IDs. 518 if not hasattr(cdp, 'pcs_ids'): 519 return False 520 521 # No PCS data for the alignment. 522 if align_id not in cdp.pcs_ids: 523 return False 524 525 # Is the tensor optimised? 526 tensor_flag = opt_uses_tensor(get_tensor_object(align_id)) 527 528 # Is the paramagnetic position optimised? 529 pos_flag = False 530 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed: 531 pos_flag = True 532 533 # Are the populations optimised? 534 prob_flag = False 535 if cdp.model == 'population': 536 prob_flag = True 537 538 # Not used. 539 if not tensor_flag and not pos_flag and not prob_flag: 540 return False 541 542 # The PCS data is to be used for optimisation. 543 return True

544 545

546 -def q_factors(spin_id=None):

547 """Calculate the Q factors for the PCS data. 548 549 @keyword spin_id: The spin ID string used to restrict the Q factor calculation to a subset of all spins. 550 @type spin_id: None or str 551 """ 552 553 # Check the pipe setup. 554 check_pipe_setup(sequence=True) 555 556 # No PCSs, so no Q factors can be calculated. 557 if not hasattr(cdp, 'pcs_ids') or not len(cdp.pcs_ids): 558 warn(RelaxWarning("No PCS data exists, Q factors cannot be calculated.")) 559 return 560 561 # Q factor dictionary. 562 cdp.q_factors_pcs = {} 563 564 # Loop over the alignments. 565 for align_id in cdp.pcs_ids: 566 # Init. 567 pcs2_sum = 0.0 568 sse = 0.0 569 570 # Spin loop. 571 spin_count = 0 572 pcs_data = False 573 pcs_bc_data = False 574 for spin in spin_loop(spin_id): 575 # Skip deselected spins. 576 if not spin.select: 577 continue 578 579 # Increment the spin counter. 580 spin_count += 1 581 582 # Data checks. 583 if hasattr(spin, 'pcs') and align_id in spin.pcs: 584 pcs_data = True 585 if hasattr(spin, 'pcs_bc') and align_id in spin.pcs_bc: 586 pcs_bc_data = True 587 588 # Skip spins without PCS data. 589 if not hasattr(spin, 'pcs') or not hasattr(spin, 'pcs_bc') or not align_id in spin.pcs or spin.pcs[align_id] == None or not align_id in spin.pcs_bc or spin.pcs_bc[align_id] == None: 590 continue 591 592 # Sum of squares. 593 sse = sse + (spin.pcs[align_id] - spin.pcs_bc[align_id])**2 594 595 # Sum the PCSs squared (for normalisation). 596 pcs2_sum = pcs2_sum + spin.pcs[align_id]**2 597 598 # The Q factor for the alignment. 599 if pcs2_sum: 600 Q = sqrt(sse / pcs2_sum) 601 cdp.q_factors_pcs[align_id] = Q 602 603 # Warnings (and then exit). 604 if not spin_count: 605 warn(RelaxWarning("No spins have been used in the calculation, skipping the PCS Q factor calculation.")) 606 return 607 if not pcs_data: 608 warn(RelaxWarning("No PCS data can be found for the alignment ID '%s', skipping the PCS Q factor calculation for this alignment." % align_id)) 609 continue 610 if not pcs_bc_data: 611 warn(RelaxWarning("No back-calculated PCS data can be found for the alignment ID '%s', skipping the PCS Q factor calculation for this alignment." % align_id)) 612 continue 613 614 # The total Q factor. 615 cdp.q_pcs = 0.0 616 for id in cdp.q_factors_pcs: 617 cdp.q_pcs = cdp.q_pcs + cdp.q_factors_pcs[id]**2 618 cdp.q_pcs = cdp.q_pcs / len(cdp.q_factors_pcs) 619 cdp.q_pcs = sqrt(cdp.q_pcs)

620 621

622 -def read(align_id=None, file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):

623 """Read the PCS data from file. 624 625 @param align_id: The alignment tensor ID string. 626 @type align_id: str 627 @param file: The name of the file to open. 628 @type file: str 629 @param dir: The directory containing the file (defaults to the current directory if None). 630 @type dir: str or None 631 @param file_data: An alternative opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. 632 @type file_data: list of lists 633 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 634 @type spin_id_col: int or None 635 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. 636 @type mol_name_col: int or None 637 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. 638 @type res_name_col: int or None 639 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. 640 @type res_num_col: int or None 641 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. 642 @type spin_name_col: int or None 643 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. 644 @type spin_num_col: int or None 645 @keyword data_col: The column containing the PCS data in Hz. 646 @type data_col: int or None 647 @keyword error_col: The column containing the PCS errors. 648 @type error_col: int or None 649 @keyword sep: The column separator which, if None, defaults to whitespace. 650 @type sep: str or None 651 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 652 @type spin_id: None or str 653 """ 654 655 # Check the pipe setup. 656 check_pipe_setup(sequence=True) 657 658 # Test if sequence data exists. 659 if not exists_mol_res_spin_data(): 660 raise RelaxNoSequenceError 661 662 # Either the data or error column must be supplied. 663 if data_col == None and error_col == None: 664 raise RelaxError("One of either the data or error column must be supplied.") 665 666 667 # Spin specific data. 668 ##################### 669 670 # Loop over the PCS data. 671 mol_names = [] 672 res_nums = [] 673 res_names = [] 674 spin_nums = [] 675 spin_names = [] 676 values = [] 677 errors = [] 678 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id): 679 # Unpack. 680 if data_col and error_col: 681 mol_name, res_num, res_name, spin_num, spin_name, value, error = data 682 elif data_col: 683 mol_name, res_num, res_name, spin_num, spin_name, value = data 684 error = None 685 else: 686 mol_name, res_num, res_name, spin_num, spin_name, error = data 687 value = None 688 689 # Test the error value (cannot be 0.0). 690 if error == 0.0: 691 raise RelaxError("An invalid error value of zero has been encountered.") 692 693 # Get the corresponding spin container. 694 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 695 spin = return_spin(id) 696 if spin == None and spin_id[0] == '@': # Allow spin IDs of atom names to be used to specify multi column data. 697 spin = return_spin(id+spin_id) 698 if spin == None: 699 warn(RelaxNoSpinWarning(id)) 700 continue 701 702 # Add the data. 703 if data_col: 704 # Initialise. 705 if not hasattr(spin, 'pcs'): 706 spin.pcs = {} 707 708 # Append the value. 709 spin.pcs[align_id] = value 710 711 # Add the error. 712 if error_col: 713 # Initialise. 714 if not hasattr(spin, 'pcs_err'): 715 spin.pcs_err = {} 716 717 # Append the error. 718 spin.pcs_err[align_id] = error 719 720 # Append the data for printout. 721 mol_names.append(mol_name) 722 res_nums.append(res_num) 723 res_names.append(res_name) 724 spin_nums.append(spin_num) 725 spin_names.append(spin_name) 726 values.append(value) 727 errors.append(error) 728 729 # Print out. 730 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error') 731 732 733 # Global (non-spin specific) data. 734 ################################## 735 736 # No data, so return. 737 if not len(values): 738 return 739 740 # Initialise. 741 if not hasattr(cdp, 'align_ids'): 742 cdp.align_ids = [] 743 if not hasattr(cdp, 'pcs_ids'): 744 cdp.pcs_ids = [] 745 746 # Add the PCS id string. 747 if align_id not in cdp.align_ids: 748 cdp.align_ids.append(align_id) 749 if align_id not in cdp.pcs_ids: 750 cdp.pcs_ids.append(align_id)

751 752

753 -def return_pcs_data(sim_index=None):

754 """Set up the data structures for optimisation using PCSs as base data sets. 755 756 @keyword sim_index: The index of the simulation to optimise. This should be None if normal optimisation is desired. 757 @type sim_index: None or int 758 @return: The assembled data structures for using PCSs as the base data for optimisation. These include: 759 - the PCS values. 760 - the unit vectors connecting the paramagnetic centre (the electron spin) to the spin. 761 - the PCS weight. 762 - the experimental temperatures. 763 - the spectrometer frequencies. 764 - pseudo_flags, the list of flags indicating if the interatomic data contains a pseudo-atom (as 1's and 0's). 765 @rtype: tuple of (numpy rank-2 float64 array, numpy rank-2 float64 array, numpy rank-2 float64 array, list of float, list of float, numpy rank-1 int32 array) 766 """ 767 768 # Data setup tests. 769 if not hasattr(cdp, 'paramagnetic_centre') and (hasattr(cdp, 'paramag_centre_fixed') and cdp.paramag_centre_fixed): 770 raise RelaxError("The paramagnetic centre has not yet been specified.") 771 if not hasattr(cdp, 'temperature'): 772 raise RelaxError("The experimental temperatures have not been set.") 773 if not hasattr(cdp, 'spectrometer_frq'): 774 raise RelaxError("The spectrometer frequencies of the experiments have not been set.") 775 776 # Sort out pseudo-atoms first. This only needs to be called once. 777 setup_pseudoatom_pcs() 778 779 # Initialise. 780 pcs = [] 781 pcs_err = [] 782 pcs_weight = [] 783 temp = [] 784 frq = [] 785 pseudo_flags = [] 786 787 # The PCS data. 788 for i in range(len(cdp.align_ids)): 789 # Alias the ID. 790 align_id = cdp.align_ids[i] 791 792 # Skip non-optimised data. 793 if not opt_uses_align_data(align_id): 794 continue 795 796 # Append empty arrays to the PCS structures. 797 pcs.append([]) 798 pcs_err.append([]) 799 pcs_weight.append([]) 800 801 # Get the temperature for the PCS constant. 802 if align_id in cdp.temperature: 803 temp.append(cdp.temperature[align_id]) 804 805 # The temperature must be given! 806 else: 807 raise RelaxError("The experimental temperature for the alignment ID '%s' has not been set." % align_id) 808 809 # Get the spectrometer frequency in Tesla units for the PCS constant. 810 if align_id in cdp.spectrometer_frq: 811 frq.append(cdp.spectrometer_frq[align_id] * 2.0 * pi / g1H) 812 813 # The frequency must be given! 814 else: 815 raise RelaxError("The spectrometer frequency for the alignment ID '%s' has not been set." % align_id) 816 817 # Spin loop. 818 j = 0 819 for spin in spin_loop(): 820 # Skip deselected spins. 821 if not spin.select: 822 continue 823 824 # Skip spins without PCS data. 825 if not hasattr(spin, 'pcs'): 826 continue 827 828 # Append the PCSs to the list. 829 if align_id in spin.pcs.keys(): 830 if sim_index != None: 831 pcs[-1].append(spin.pcs_sim[align_id][sim_index]) 832 else: 833 pcs[-1].append(spin.pcs[align_id]) 834 else: 835 pcs[-1].append(None) 836 837 # Append the PCS errors. 838 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 839 pcs_err[-1].append(spin.pcs_err[align_id]) 840 else: 841 pcs_err[-1].append(None) 842 843 # Append the weight. 844 if hasattr(spin, 'pcs_weight') and align_id in spin.pcs_weight.keys(): 845 pcs_weight[-1].append(spin.pcs_weight[align_id]) 846 else: 847 pcs_weight[-1].append(1.0) 848 849 # Spin index. 850 j = j + 1 851 852 # Pseudo-atom. 853 for spin in spin_loop(): 854 if is_pseudoatom(spin): 855 pseudo_flags.append(1) 856 else: 857 pseudo_flags.append(0) 858 859 # Convert to numpy objects. 860 pcs = array(pcs, float64) 861 pcs_err = array(pcs_err, float64) 862 pcs_weight = array(pcs_weight, float64) 863 pseudo_flags = array(pseudo_flags, int32) 864 865 # Convert the PCS from ppm to no units. 866 pcs = pcs * 1e-6 867 pcs_err = pcs_err * 1e-6 868 869 # Return the data structures. 870 return pcs, pcs_err, pcs_weight, temp, frq, pseudo_flags

871 872

873 -def set_errors(align_id=None, spin_id=None, sd=None):

874 """Set the PCS errors if not already present. 875 876 @keyword align_id: The optional alignment tensor ID string. 877 @type align_id: str 878 @keyword spin_id: The optional spin ID string. 879 @type spin_id: None or str 880 @keyword sd: The PCS standard deviation in ppm. 881 @type sd: float or int. 882 """ 883 884 # Check the pipe setup. 885 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True) 886 887 # Convert the align IDs to an array, or take all IDs. 888 if align_id: 889 align_ids = [align_id] 890 else: 891 align_ids = cdp.pcs_ids 892 893 # Loop over the spins. 894 for spin in spin_loop(spin_id): 895 # Skip deselected spins. 896 if not spin.select: 897 continue 898 899 # Skip spins with no PCSs. 900 if not hasattr(spin, 'pcs') or (align_id and not align_id in spin.pcs): 901 continue 902 903 # No data structure. 904 if not hasattr(spin, 'pcs_err'): 905 spin.pcs_err = {} 906 907 # Set the error. 908 for id in align_ids: 909 spin.pcs_err[id] = sd

910 911

912 -def setup_pseudoatom_pcs():

913 """Make sure that the spin systems are properly set up for pseudo-atoms and PCSs. 914 915 All spin data containers which are a member of a pseudo-atom will be deselected. 916 """ 917 918 # Loop over all spin data containers. 919 for pseudospin, pseudospin_id in spin_loop(return_id=True): 920 # No pseudo-atom, so do nothing. 921 if not is_pseudoatom(pseudospin): 922 return 923 924 # Loop over the atoms of the pseudo-atom. 925 for spin, spin_id in pseudoatom_loop(pseudospin, return_id=True): 926 # Deselect if needed. 927 if spin.select: 928 warn(RelaxWarning("Deselecting the '%s' spin as it is a member of the '%s' pseudo-atom system." % (spin_id, pseudospin_id))) 929 spin.select = False

930 931

932 -def structural_noise(align_id=None, rmsd=0.2, sim_num=1000, file=None, dir=None, force=False):

933 """Determine the PCS error due to structural noise via simulation. 934 935 For the simulation the following must already be set up in the current data pipe: 936 937 - The position of the paramagnetic centre. 938 - The alignment and magnetic susceptibility tensor. 939 940 The protocol for the simulation is as follows: 941 942 - The lanthanide or paramagnetic centre position will be fixed. Its motion is assumed to be on the femto- to pico- and nanosecond timescales. Hence the motion is averaged over the evolution of the PCS and can be ignored. 943 - The positions of the nuclear spins will be randomised N times. For each simulation a random unit vector will be generated. Then a random distance along the unit vector will be generated by sampling from a Gaussian distribution centered at zero, the original spin position, with a standard deviation set to the given RMSD. Both positive and negative displacements will be used. 944 - The PCS for the randomised position will be back calculated. 945 - The PCS standard deviation will be calculated from the N randomised PCS values. 946 947 The standard deviation will both be stored in the spin container data structure in the relax data store as well as being added to the already present PCS error (using variance addition). This will then be used in any optimisations involving the PCS. 948 949 If the alignment ID string is not supplied, the procedure will be applied to the PCS data from all alignments. 950 951 952 @keyword align_id: The alignment tensor ID string. 953 @type align_id: str 954 @keyword rmsd: The atomic position RMSD, in Angstrom, to randomise the spin positions with for the simulations. 955 @type rmsd: float 956 @keyword sim_num: The number of simulations, N, to perform to determine the structural noise component of the PCS errors. 957 @type sim_num: int 958 @keyword file: The optional name of the Grace file to plot the structural errors verses the paramagnetic centre to spin distances. 959 @type file: None or str 960 @keyword dir: The directory name to place the Grace file into. 961 @type dir: None or str 962 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 963 @type force: bool 964 """ 965 966 # Check the pipe setup. 967 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True, paramag_centre=True) 968 969 # Convert the align IDs to an array, or take all IDs. 970 if align_id: 971 align_ids = [align_id] 972 else: 973 align_ids = cdp.align_ids 974 975 # Initialise some numpy data structures for use in the simulations. 976 grace_data = [] 977 unit_vect = zeros(3, float64) 978 pcs = {} 979 for id in align_ids: 980 grace_data.append([]) 981 pcs[id] = zeros(sim_num, float64) 982 983 # Print out. 984 print("Executing %i simulations for each spin system." % sim_num) 985 986 # Loop over the spins. 987 for spin, spin_id in spin_loop(return_id=True): 988 # Deselected spins. 989 if not spin.select: 990 continue 991 992 # Skip spins with no PCS or position. 993 if not hasattr(spin, 'pcs'): 994 continue 995 if not hasattr(spin, 'pos'): 996 continue 997 998 # Print out. 999 print(spin_id) 1000 1001 # Average the atom position. 1002 if type(spin.pos[0]) in [float, float64]: 1003 pos = spin.pos 1004 else: 1005 pos = zeros(3, float64) 1006 for i in range(len(spin.pos)): 1007 pos += spin.pos[i] 1008 pos = pos / len(spin.pos) 1009 1010 # The original vector length (for the Grace plot). 1011 orig_vect = pos - cdp.paramagnetic_centre 1012 orig_r = norm(orig_vect) 1013 1014 # Loop over the N randomisations. 1015 for i in range(sim_num): 1016 # The random unit vector. 1017 random_unit_vector(unit_vect) 1018 1019 # The random displacement (in Angstrom). 1020 disp = gauss(0, rmsd) 1021 1022 # Move the atom. 1023 new_pos = pos + disp*unit_vect 1024 1025 # The vector and length. 1026 vect = new_pos - cdp.paramagnetic_centre 1027 r = norm(vect) 1028 vect = vect / r 1029 1030 # Loop over the alignments. 1031 for id in align_ids: 1032 # No PCS value, so skip. 1033 if id not in spin.pcs: 1034 continue 1035 1036 # Calculate the PCS constant. 1037 dj = pcs_constant(cdp.temperature[id], cdp.spectrometer_frq[id] * 2.0 * pi / g1H, r/1e10) 1038 1039 # Calculate the PCS value (in ppm). 1040 pcs[id][i] = pcs_tensor(dj, vect, cdp.align_tensors[get_tensor_index(id)].A) * 1e6 1041 1042 # Initialise if necessary. 1043 if not hasattr(spin, 'pcs_struct_err'): 1044 spin.pcs_struct_err = {} 1045 1046 # Loop over the alignments. 1047 align_index = 0 1048 for id in align_ids: 1049 # No PCS value, so skip. 1050 if id not in spin.pcs: 1051 align_index += 1 1052 continue 1053 1054 # The PCS standard deviation. 1055 sd = std(pcs[id]) 1056 1057 # Remove the previous error. 1058 if id in spin.pcs_struct_err: 1059 warn(RelaxWarning("Removing the previous structural error value from the PCS error of the spin '%s' for the alignment ID '%s'." % (spin_id, id))) 1060 spin.pcs_err[id] = sqrt(spin.pcs_err[id]**2 - spin.pcs_struct_err[id]**2) 1061 1062 # Store the structural error. 1063 spin.pcs_struct_err[id] = sd 1064 1065 # Add it to the PCS error (with variance addition). 1066 spin.pcs_err[id] = sqrt(spin.pcs_err[id]**2 + sd**2) 1067 1068 # Store the data for the Grace plot. 1069 grace_data[align_index].append([orig_r, sd]) 1070 1071 # Increment the alignment index. 1072 align_index += 1 1073 1074 # The Grace output. 1075 if file: 1076 # Open the Grace file for writing. 1077 file = open_write_file(file, dir, force) 1078 1079 # The header. 1080 grace.write_xy_header(file=file, title="PCS structural noise", subtitle="%s Angstrom structural noise"%rmsd, data_type=['pcs_bc', 'pcs'], sets=[len(align_ids)], set_names=[align_ids], symbol_sizes=[[0.5]*len(align_ids)], linetype=[[0]*len(align_ids)], axis_labels=[["Ln\\S3+\\N to spin distance (Angstrom)", "PCS standard deviation (ppm)"]]) 1081 1082 # The main data. 1083 grace.write_xy_data(data=[grace_data], file=file, graph_type='xy')

1084 1085

1086 -def weight(align_id=None, spin_id=None, weight=1.0):

1087 """Set optimisation weights on the PCS data. 1088 1089 @keyword align_id: The alignment tensor ID string. 1090 @type align_id: str 1091 @keyword spin_id: The spin ID string. 1092 @type spin_id: None or str 1093 @keyword weight: The optimisation weight. The higher the value, the more importance the PCS will have. 1094 @type weight: float or int. 1095 """ 1096 1097 # Check the pipe setup. 1098 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True) 1099 1100 # Loop over the spins. 1101 for spin in spin_loop(spin_id): 1102 # No data structure. 1103 if not hasattr(spin, 'pcs_weight'): 1104 spin.pcs_weight = {} 1105 1106 # Set the weight. 1107 spin.pcs_weight[align_id] = weight

1108 1109

1110 -def write(align_id=None, file=None, dir=None, bc=False, force=False):

1111 """Display the PCS data corresponding to the alignment ID. 1112 1113 @keyword align_id: The alignment tensor ID string. 1114 @type align_id: str 1115 @keyword file: The file name or object to write to. 1116 @type file: str or file object 1117 @keyword dir: The name of the directory to place the file into (defaults to the current directory). 1118 @type dir: str 1119 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written. 1120 @type bc: bool 1121 @keyword force: A flag which if True will cause any pre-existing file to be overwritten. 1122 @type force: bool 1123 """ 1124 1125 # Check the pipe setup. 1126 check_pipe_setup(sequence=True, pcs_id=align_id, pcs=True) 1127 1128 # Open the file for writing. 1129 file = open_write_file(file, dir, force) 1130 1131 # Loop over the spins and collect the data. 1132 mol_names = [] 1133 res_nums = [] 1134 res_names = [] 1135 spin_nums = [] 1136 spin_names = [] 1137 values = [] 1138 errors = [] 1139 for spin, mol_name, res_num, res_name in spin_loop(full_info=True): 1140 # Skip deselected spins. 1141 if not spin.select: 1142 continue 1143 1144 # Skip spins with no PCSs. 1145 if not bc and (not hasattr(spin, 'pcs') or not align_id in spin.pcs.keys()): 1146 continue 1147 elif bc and (not hasattr(spin, 'pcs_bc') or align_id not in spin.pcs_bc.keys()): 1148 continue 1149 1150 # Append the spin data. 1151 mol_names.append(mol_name) 1152 res_nums.append(res_num) 1153 res_names.append(res_name) 1154 spin_nums.append(spin.num) 1155 spin_names.append(spin.name) 1156 1157 # The value. 1158 if bc: 1159 values.append(spin.pcs_bc[align_id]) 1160 else: 1161 values.append(spin.pcs[align_id]) 1162 1163 # The error. 1164 if hasattr(spin, 'pcs_err') and align_id in spin.pcs_err.keys(): 1165 errors.append(spin.pcs_err[align_id]) 1166 else: 1167 errors.append(None) 1168 1169 # Write out. 1170 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='PCSs', error=errors, error_name='PCS_error')

1171

Source Code for Module pipe_control.pcs