pipe_control.spectrum

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2004,2006-2014 Edward d'Auvergne # 4 # Copyright (C) 2008 Sebastien Morin # 5 # Copyright (C) 2010 Michael Bieri # 6 # Copyright (C) 2013-2014 Troels E. Linnet # 7 # # 8 # This file is part of the program relax (http://www.nmr-relax.com). # 9 # # 10 # This program is free software: you can redistribute it and/or modify # 11 # it under the terms of the GNU General Public License as published by # 12 # the Free Software Foundation, either version 3 of the License, or # 13 # (at your option) any later version. # 14 # # 15 # This program is distributed in the hope that it will be useful, # 16 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 18 # GNU General Public License for more details. # 19 # # 20 # You should have received a copy of the GNU General Public License # 21 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 22 # # 23 ############################################################################### 24 25 # Module docstring. 26 """Module containing functions for the handling of peak intensities.""" 27 28 29 # Python module imports. 30 from math import sqrt 31 from numpy import asarray 32 import operator 33 import sys 34 from warnings import warn 35 36 # relax module imports. 37 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError 38 from lib.io import sort_filenames, write_data 39 from lib.text.sectioning import section, subsection 40 from lib.spectrum.peak_list import read_peak_list 41 from lib.statistics import std 42 from lib.warnings import RelaxWarning, RelaxNoSpinWarning 43 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, return_spin, spin_loop 44 from pipe_control.pipes import check_pipe 45 from pipe_control.selection import desel_spin, sel_spin 46 47

48 -def __errors_height_no_repl():

49 """Calculate the errors for peak heights when no spectra are replicated.""" 50 51 # Loop over the spins and set the error to the RMSD of the base plane noise. 52 for spin, spin_id in spin_loop(return_id=True): 53 # Skip deselected spins. 54 if not spin.select: 55 continue 56 57 # Skip spins missing intensity data. 58 if not hasattr(spin, 'peak_intensity'): 59 continue 60 61 # Test if the RMSD has been set. 62 if not hasattr(spin, 'baseplane_rmsd'): 63 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 64 65 # Set the error to the RMSD. 66 spin.peak_intensity_err = spin.baseplane_rmsd

67 68

69 -def __errors_repl(subset=None, verbosity=0):

70 """Calculate the errors for peak intensities from replicated spectra. 71 72 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 73 @type subset: list of str 74 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 75 @type verbosity: int 76 """ 77 78 # Replicated spectra. 79 repl = replicated_flags() 80 81 # Are all spectra replicated? 82 if False in list(repl.values()): 83 all_repl = False 84 print("All spectra replicated: No.") 85 else: 86 all_repl = True 87 print("All spectra replicated: Yes.") 88 89 # Initialise. 90 if not hasattr(cdp, 'sigma_I'): 91 cdp.sigma_I = {} 92 if not hasattr(cdp, 'var_I'): 93 cdp.var_I = {} 94 95 # The subset. 96 subset_flag = False 97 if not subset: 98 subset_flag = True 99 subset = cdp.spectrum_ids 100 101 # Loop over the spectra. 102 for id in subset: 103 # Skip non-replicated spectra. 104 if not repl[id]: 105 continue 106 107 # Skip replicated spectra which already have been used. 108 if id in cdp.var_I and cdp.var_I[id] != 0.0: 109 continue 110 111 # The replicated spectra. 112 for j in range(len(cdp.replicates)): 113 if id in cdp.replicates[j]: 114 spectra = cdp.replicates[j] 115 116 # Number of spectra. 117 num_spectra = len(spectra) 118 119 # Printout. 120 print("\nReplicated spectra: " + repr(spectra)) 121 if verbosity: 122 print("%-20s%-20s" % ("Spin_ID", "SD")) 123 124 # Calculate the mean value. 125 count = 0 126 for spin, spin_id in spin_loop(return_id=True): 127 # Skip deselected spins. 128 if not spin.select: 129 continue 130 131 # Skip and deselect spins which have no data. 132 if not hasattr(spin, 'peak_intensity'): 133 spin.select = False 134 continue 135 136 # Missing data. 137 missing = False 138 for j in range(num_spectra): 139 if not spectra[j] in spin.peak_intensity: 140 missing = True 141 if missing: 142 continue 143 144 # The peak intensities. 145 values = [] 146 for j in range(num_spectra): 147 values.append(spin.peak_intensity[spectra[j]]) 148 149 # The standard deviation. 150 sd = std(values=values, dof=1) 151 152 # Printout. 153 if verbosity: 154 print("%-20s%-20s" % (spin_id, sd)) 155 156 # Sum of variances (for average). 157 if not id in cdp.var_I: 158 cdp.var_I[id] = 0.0 159 cdp.var_I[id] = cdp.var_I[id] + sd**2 160 count = count + 1 161 162 # No data catch. 163 if not count: 164 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") 165 166 # Average variance. 167 cdp.var_I[id] = cdp.var_I[id] / float(count) 168 169 # Set all spectra variances. 170 for j in range(num_spectra): 171 cdp.var_I[spectra[j]] = cdp.var_I[id] 172 173 # Print out. 174 print("Standard deviation: %s" % sqrt(cdp.var_I[id])) 175 176 177 # Average across all spectra if there are time points with a single spectrum. 178 if not all_repl: 179 # Print out. 180 if subset_flag: 181 print("\nVariance averaging over the spectra subset.") 182 else: 183 print("\nVariance averaging over all spectra.") 184 185 # Initialise. 186 var_I = 0.0 187 num_dups = 0 188 189 # Loop over all time points. 190 for id in cdp.var_I: 191 # Only use id's defined in subset 192 if id not in subset: 193 continue 194 195 # Single spectrum (or extraordinarily accurate NMR spectra!). 196 if cdp.var_I[id] == 0.0: 197 continue 198 199 # Sum and count. 200 var_I = var_I + cdp.var_I[id] 201 num_dups = num_dups + 1 202 203 # Average value. 204 var_I = var_I / float(num_dups) 205 206 # Assign the average value to all time points. 207 for id in subset: 208 cdp.var_I[id] = var_I 209 210 # Print out. 211 print("Standard deviation for all spins: " + repr(sqrt(var_I))) 212 213 # Loop over the spectra. 214 for id in cdp.var_I: 215 # Create the standard deviation data structure. 216 cdp.sigma_I[id] = sqrt(cdp.var_I[id]) 217 218 # Set the spin specific errors. 219 for spin in spin_loop(): 220 # Skip deselected spins. 221 if not spin.select: 222 continue 223 224 # Set the error. 225 spin.peak_intensity_err = cdp.sigma_I

226 227

228 -def __errors_volume_no_repl(subset=None):

229 """Calculate the errors for peak volumes when no spectra are replicated.""" 230 231 # Loop over the spins and set the error to the RMSD of the base plane noise. 232 for spin, spin_id in spin_loop(return_id=True): 233 # Skip deselected spins. 234 if not spin.select: 235 continue 236 237 # Skip spins missing intensity data. 238 if not hasattr(spin, 'peak_intensity'): 239 continue 240 241 # Test if the RMSD has been set. 242 if not hasattr(spin, 'baseplane_rmsd'): 243 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 244 245 # Test that the total number of points have been set. 246 if not hasattr(spin, 'N'): 247 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id) 248 249 # Set the error to the RMSD multiplied by the square root of the total number of points. 250 for key in spin.peak_intensity: 251 spin.peak_intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)

252 253

254 -def add_spectrum_id(spectrum_id=None):

255 """Add the spectrum ID to the data store. 256 257 @keyword spectrum_id: The spectrum ID string. 258 @type spectrum_id: str 259 """ 260 261 # Initialise the structure, if needed. 262 if not hasattr(cdp, 'spectrum_ids'): 263 cdp.spectrum_ids = [] 264 265 # The ID already exists. 266 if spectrum_id in cdp.spectrum_ids: 267 return 268 269 # Add the ID. 270 cdp.spectrum_ids.append(spectrum_id)

271 272

273 -def baseplane_rmsd(error=0.0, spectrum_id=None, spin_id=None):

274 """Set the peak intensity errors, as defined as the baseplane RMSD. 275 276 @param error: The peak intensity error value defined as the RMSD of the base plane 277 noise. 278 @type error: float 279 @keyword spectrum_id: The spectrum id. 280 @type spectrum_id: str 281 @param spin_id: The spin identification string. 282 @type spin_id: str 283 """ 284 285 # Data checks. 286 check_pipe() 287 check_mol_res_spin_data() 288 check_spectrum_id(spectrum_id) 289 290 # The scaling by NC_proc. 291 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 292 scale = 1.0 / 2**cdp.ncproc[spectrum_id] 293 else: 294 scale = 1.0 295 296 # Loop over the spins. 297 for spin in spin_loop(spin_id): 298 # Skip deselected spins. 299 if not spin.select: 300 continue 301 302 # Initialise or update the baseplane_rmsd data structure as necessary. 303 if not hasattr(spin, 'baseplane_rmsd'): 304 spin.baseplane_rmsd = {} 305 306 # Set the error. 307 spin.baseplane_rmsd[spectrum_id] = float(error) * scale

308 309

310 -def check_spectrum_id(id):

311 """Check that the give spectrum ID exists. 312 313 @param id: The spectrum ID to check for. 314 @type id: str 315 @raises RelaxNoSpectraError: If the ID does not exist. 316 """ 317 318 # Check that the spectrum ID structure exists. 319 if not hasattr(cdp, 'spectrum_ids'): 320 raise RelaxNoSpectraError(id) 321 322 # Test if the spectrum ID exists. 323 if id not in cdp.spectrum_ids: 324 raise RelaxNoSpectraError(id)

325 326

327 -def delete(spectrum_id=None):

328 """Delete spectral data corresponding to the spectrum ID. 329 330 @keyword spectrum_id: The spectrum ID string. 331 @type spectrum_id: str 332 """ 333 334 # Data checks. 335 check_pipe() 336 check_mol_res_spin_data() 337 check_spectrum_id(spectrum_id) 338 339 # Remove the ID. 340 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id)) 341 342 # The ncproc parameter. 343 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 344 del cdp.ncproc[spectrum_id] 345 346 # Replicates. 347 if hasattr(cdp, 'replicates'): 348 # Loop over the replicates. 349 for i in range(len(cdp.replicates)): 350 # The spectrum is replicated. 351 if spectrum_id in cdp.replicates[i]: 352 # Duplicate. 353 if len(cdp.replicates[i]) == 2: 354 cdp.replicates.pop(i) 355 356 # More than two spectra: 357 else: 358 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id)) 359 360 # No need to check further. 361 break 362 363 # Errors. 364 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I: 365 del cdp.sigma_I[spectrum_id] 366 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I: 367 del cdp.var_I[spectrum_id] 368 369 # Loop over the spins. 370 for spin in spin_loop(): 371 # Intensity data. 372 if hasattr(spin, 'peak_intensity') and spectrum_id in spin.peak_intensity: 373 del spin.peak_intensity[spectrum_id]

374 375

376 -def error_analysis(subset=None):

377 """Determine the peak intensity standard deviation. 378 379 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 380 @type subset: list of str 381 """ 382 383 # Tests. 384 check_pipe() 385 check_mol_res_spin_data() 386 387 # Test if spectra have been loaded. 388 if not hasattr(cdp, 'spectrum_ids'): 389 raise RelaxError("Error analysis is not possible, no spectra have been loaded.") 390 391 # Check the IDs. 392 if subset: 393 for id in subset: 394 if id not in cdp.spectrum_ids: 395 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id) 396 397 # Peak height category. 398 if cdp.int_method == 'height': 399 # Print out. 400 print("Intensity measure: Peak heights.") 401 402 # Do we have replicated spectra? 403 if hasattr(cdp, 'replicates'): 404 # Print out. 405 print("Replicated spectra: Yes.") 406 407 # Set the errors. 408 __errors_repl(subset=subset) 409 410 # No replicated spectra. 411 else: 412 # Print out. 413 print("Replicated spectra: No.") 414 if subset: 415 print("Spectra ID subset ignored.") 416 417 # Set the errors. 418 __errors_height_no_repl() 419 420 # Peak volume category. 421 if cdp.int_method == 'point sum': 422 # Print out. 423 print("Intensity measure: Peak volumes.") 424 425 # Do we have replicated spectra? 426 if hasattr(cdp, 'replicates'): 427 # Print out. 428 print("Replicated spectra: Yes.") 429 430 # Set the errors. 431 __errors_repl(subset=subset) 432 433 # No replicated spectra. 434 else: 435 # Print out. 436 print("Replicated spectra: No.") 437 438 # No implemented. 439 raise RelaxImplementError 440 441 # Set the errors. 442 __errors_vol_no_repl()

443 444

445 -def error_analysis_per_field():

446 """Perform an error analysis of the peak intensities for each field strength separately.""" 447 448 # Printout. 449 section(file=sys.stdout, text="Automatic Error analysis per field strength", prespace=2) 450 451 # Handle missing frequency data. 452 frqs = [None] 453 if hasattr(cdp, 'spectrometer_frq_list'): 454 frqs = cdp.spectrometer_frq_list 455 456 # Loop over the spectrometer frequencies. 457 for frq in frqs: 458 # Generate a list of spectrum IDs matching the frequency. 459 ids = [] 460 for id in cdp.spectrum_ids: 461 # Check that the spectrometer frequency matches. 462 match_frq = True 463 if frq != None and cdp.spectrometer_frq[id] != frq: 464 match_frq = False 465 466 # Add the ID. 467 if match_frq: 468 ids.append(id) 469 470 # Run the error analysis on the subset. 471 print("For field strength %.8f MHz, subset ids for spectrum.error_analysis is: %s" % (frq/1e6, ids)) 472 error_analysis(subset=ids)

473 474

475 -def get_ids():

476 """Return a list of all spectrum IDs. 477 478 @return: The list of spectrum IDs. 479 @rtype: list of str 480 """ 481 482 # No IDs. 483 if not hasattr(cdp, 'spectrum_ids'): 484 return [] 485 486 # Return the IDs. 487 return cdp.spectrum_ids

488 489

490 -def integration_points(N=0, spectrum_id=None, spin_id=None):

491 """Set the number of integration points for the given spectrum. 492 493 @param N: The number of integration points. 494 @type N: int 495 @keyword spectrum_id: The spectrum ID string. 496 @type spectrum_id: str 497 @keyword spin_id: The spin ID string used to restrict the value to. 498 @type spin_id: None or str 499 """ 500 501 raise RelaxImplementError

502 503

504 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):

505 """Read the peak intensity data. 506 507 @keyword file: The name of the file(s) containing the peak intensities. 508 @type file: str or list of str 509 @keyword dir: The directory where the file is located. 510 @type dir: str 511 @keyword spectrum_id: The spectrum identification string. 512 @type spectrum_id: str or list of str 513 @keyword dim: The dimension of the peak list to associate the data with. 514 @type dim: int 515 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). 516 @type int_col: int or list of int 517 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. 518 @type int_method: str 519 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 520 @type spin_id_col: int or None 521 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 522 @type mol_name_col: int or None 523 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 524 @type res_name_col: int or None 525 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 526 @type res_num_col: int or None 527 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 528 @type spin_name_col: int or None 529 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 530 @type spin_num_col: int or None 531 @keyword sep: The column separator which, if None, defaults to whitespace. 532 @type sep: str or None 533 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 534 @type spin_id: None or str 535 @keyword ncproc: The Bruker ncproc binary intensity scaling factor. 536 @type ncproc: int or None 537 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 538 @type verbose: bool 539 """ 540 541 # Data checks. 542 check_pipe() 543 check_mol_res_spin_data() 544 545 # Check the file name. 546 if file == None: 547 raise RelaxError("The file name must be supplied.") 548 549 # Test that the intensity measures are identical. 550 if hasattr(cdp, 'int_method') and cdp.int_method != int_method: 551 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) 552 553 # Multiple ID flags. 554 flag_multi = False 555 flag_multi_file = False 556 flag_multi_col = False 557 if isinstance(spectrum_id, list) or spectrum_id == 'auto': 558 flag_multi = True 559 if isinstance(file, list): 560 flag_multi_file = True 561 if isinstance(int_col, list) or spectrum_id == 'auto': 562 flag_multi_col = True 563 564 # List argument checks. 565 if flag_multi: 566 # Too many lists. 567 if flag_multi_file and flag_multi_col: 568 raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.") 569 570 # Not enough lists. 571 if not flag_multi_file and not flag_multi_col: 572 raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.") 573 574 # List lengths for multiple files. 575 if flag_multi_file and len(spectrum_id) != len(file): 576 raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id)) 577 578 # List lengths for multiple intensity columns. 579 if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col): 580 raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col)) 581 582 # More list argument checks (when only one spectrum ID is supplied). 583 else: 584 # Multiple files. 585 if flag_multi_file: 586 raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.") 587 588 # Multiple intensity columns. 589 if flag_multi_col: 590 raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.") 591 592 # Intensity column checks. 593 if spectrum_id != 'auto' and not flag_multi and flag_multi_col: 594 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.") 595 596 # Check the intensity measure. 597 if not int_method in ['height', 'point sum', 'other']: 598 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) 599 600 # Set the peak intensity measure. 601 cdp.int_method = int_method 602 603 # Convert the file argument to a list if necessary. 604 if not isinstance(file, list): 605 file = [file] 606 607 # Loop over all files. 608 for file_index in range(len(file)): 609 # Read the peak list data. 610 peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 611 612 # Automatic spectrum IDs. 613 if spectrum_id == 'auto': 614 spectrum_id = peak_list[0].intensity_name 615 616 # Loop over the assignments. 617 data = [] 618 data_flag = False 619 for assign in peak_list: 620 # Generate the spin_id. 621 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1]) 622 623 # Convert the intensity data to a list if needed. 624 intensity = assign.intensity 625 if not isinstance(intensity, list): 626 intensity = [intensity] 627 628 # Loop over the intensity data. 629 for int_index in range(len(intensity)): 630 # Sanity check. 631 if intensity[int_index] == 0.0: 632 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id)) 633 634 # Get the spin container. 635 spin = return_spin(spin_id=spin_id) 636 if not spin: 637 warn(RelaxNoSpinWarning(spin_id)) 638 continue 639 640 # Skip deselected spins. 641 if not spin.select: 642 continue 643 644 # Initialise. 645 if not hasattr(spin, 'peak_intensity'): 646 spin.peak_intensity = {} 647 648 # Intensity scaling. 649 if ncproc != None: 650 intensity[int_index] = intensity[int_index] / float(2**ncproc) 651 652 # Add the data. 653 if flag_multi_file: 654 id = spectrum_id[file_index] 655 elif flag_multi_col: 656 id = spectrum_id[int_index] 657 else: 658 id = spectrum_id 659 spin.peak_intensity[id] = intensity[int_index] 660 661 # Switch the flag. 662 data_flag = True 663 664 # Append the data for printing out. 665 data.append([spin_id, repr(intensity[int_index])]) 666 667 # Add the spectrum id (and ncproc) to the relax data store. 668 spectrum_ids = spectrum_id 669 if isinstance(spectrum_id, str): 670 spectrum_ids = [spectrum_id] 671 if ncproc != None and not hasattr(cdp, 'ncproc'): 672 cdp.ncproc = {} 673 for i in range(len(spectrum_ids)): 674 add_spectrum_id(spectrum_ids[i]) 675 if ncproc != None: 676 cdp.ncproc[spectrum_ids[i]] = ncproc 677 678 # No data. 679 if not data_flag: 680 # Delete all the data. 681 delete(spectrum_id) 682 683 # Raise the error. 684 raise RelaxError("No data could be loaded from the peak list") 685 686 # Printout. 687 if verbose: 688 print("\nThe following intensities have been loaded into the relax data store:\n") 689 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data) 690 print('')

691 692

693 -def read_spins(file=None, dir=None, dim=1, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):

694 """Read the peak intensity data. 695 696 @keyword file: The name of the file containing the peak intensities. 697 @type file: str 698 @keyword dir: The directory where the file is located. 699 @type dir: str 700 @keyword dim: The dimension of the peak list to associate the data with. 701 @type dim: int 702 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 703 @type spin_id_col: int or None 704 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 705 @type mol_name_col: int or None 706 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 707 @type res_name_col: int or None 708 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 709 @type res_num_col: int or None 710 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 711 @type spin_name_col: int or None 712 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 713 @type spin_num_col: int or None 714 @keyword sep: The column separator which, if None, defaults to whitespace. 715 @type sep: str or None 716 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 717 @type spin_id: None or str 718 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 719 @type verbose: bool 720 """ 721 722 # Data checks. 723 check_pipe() 724 725 # Check the file name. 726 if file == None: 727 raise RelaxError("The file name must be supplied.") 728 729 # Read the peak list data. 730 peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 731 732 # Loop over the peak_list. 733 created_spins = [] 734 for assign in peak_list: 735 mol_name = assign.mol_names[dim-1] 736 res_num = assign.res_nums[dim-1] 737 res_name = assign.res_names[dim-1] 738 spin_num = assign.spin_nums[dim-1] 739 spin_name = assign.spin_names[dim-1] 740 741 # Generate the spin_id. 742 spin_id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_name=spin_name) 743 744 # Check if the spin already exist. 745 if return_spin(spin_id=spin_id) == None: 746 # Create the spin if not exist. 747 create_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol_name=mol_name) 748 749 # Test that data exists. 750 check_mol_res_spin_data()

751

752 -def replicated(spectrum_ids=None):

753 """Set which spectra are replicates. 754 755 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra. 756 @type spectrum_ids: list of str 757 """ 758 759 # Test if the current pipe exists. 760 check_pipe() 761 762 # Test for None. 763 if spectrum_ids == None: 764 warn(RelaxWarning("The spectrum ID list cannot be None.")) 765 return 766 767 # Test if spectra have been loaded. 768 if not hasattr(cdp, 'spectrum_ids'): 769 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.") 770 771 # Test the spectrum id strings. 772 for spectrum_id in spectrum_ids: 773 check_spectrum_id(spectrum_id) 774 775 # Test for more than one element. 776 if len(spectrum_ids) == 1: 777 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids)) 778 return 779 780 # Initialise. 781 if not hasattr(cdp, 'replicates'): 782 cdp.replicates = [] 783 784 # Check if the spectrum IDs are already in the list. 785 found = False 786 for i in range(len(cdp.replicates)): 787 # Loop over all elements of the first. 788 for j in range(len(spectrum_ids)): 789 if spectrum_ids[j] in cdp.replicates[i]: 790 found = True 791 792 # One of the spectrum IDs already have a replicate specified. 793 if found: 794 # Add the remaining replicates to the list and quit this function. 795 for j in range(len(spectrum_ids)): 796 if spectrum_ids[j] not in cdp.replicates[i]: 797 cdp.replicates[i].append(spectrum_ids[j]) 798 799 # Nothing more to do. 800 return 801 802 # A new set of replicates. 803 cdp.replicates.append(spectrum_ids)

804 805

806 -def replicated_flags():

807 """Create and return a dictionary of flags of whether the spectrum is replicated or not. 808 809 @return: The dictionary of flags of whether the spectrum is replicated or not. 810 @rtype: dict of bool 811 """ 812 813 # Initialise all IDs to false. 814 repl = {} 815 for id in cdp.spectrum_ids: 816 repl[id] = False 817 818 # Loop over the replicates. 819 for i in range(len(cdp.replicates)): 820 for j in range(len(cdp.replicates[i])): 821 repl[cdp.replicates[i][j]] = True 822 823 # Return the dictionary. 824 return repl

825 826

827 -def replicated_ids(spectrum_id):

828 """Create and return a list of spectra ID which are replicates of the given ID. 829 830 @param spectrum_id: The spectrum ID to find all the replicates of. 831 @type spectrum_id: str 832 @return: The list of spectrum IDs which are replicates of spectrum_id. 833 @rtype: list of str 834 """ 835 836 # No replicates present. 837 if not hasattr(cdp, 'replicates'): 838 return [] 839 840 # Initialise the ID list. 841 repl = [] 842 843 # Loop over the replicate lists. 844 for i in range(len(cdp.replicates)): 845 # The spectrum ID is in the list. 846 if spectrum_id in cdp.replicates[i]: 847 # Loop over the inner list. 848 for j in range(len(cdp.replicates[i])): 849 # Spectrum ID match. 850 if spectrum_id == cdp.replicates[i][j]: 851 continue 852 853 # Append the replicated ID. 854 repl.append(cdp.replicates[i][j]) 855 856 # Nothing. 857 if repl == []: 858 return repl 859 860 # Sort the list. 861 repl.sort() 862 863 # Remove duplicates (backward). 864 id = repl[-1] 865 for i in range(len(repl)-2, -1, -1): 866 # Duplicate. 867 if id == repl[i]: 868 del repl[i] 869 870 # Unique. 871 else: 872 id = repl[i] 873 874 # Return the list. 875 return repl

876 877

878 -def signal_noise_ratio(verbose=True):

879 """Calculate the signal to noise ratio per spin. 880 881 @keyword verbose: A flag which if True will print additional information out. 882 @type verbose: bool 883 """ 884 885 # Tests. 886 check_pipe() 887 check_mol_res_spin_data() 888 889 # Test if spectra have been loaded. 890 if not hasattr(cdp, 'spectrum_ids'): 891 raise RelaxError("No spectra have been loaded.") 892 893 # Possible print. 894 if verbose: 895 print("\nThe following signal to noise ratios has been calculated:\n") 896 897 # Set the spin specific signal to noise ratio. 898 for spin, spin_id in spin_loop(return_id=True): 899 # Skip deselected spins. 900 if not spin.select: 901 continue 902 903 # Skip spins missing intensity data. 904 if not hasattr(spin, 'peak_intensity'): 905 continue 906 907 # Test if error analysis has been performed. 908 if not hasattr(spin, 'peak_intensity_err'): 909 raise RelaxError("Intensity error analysis has not been performed. Please see spectrum.error_analysis().") 910 911 # If necessary, create the dictionary. 912 if not hasattr(spin, 'sn_ratio'): 913 spin.sn_ratio = {} 914 915 # Loop over the ID. 916 ids = [] 917 for id in spin.peak_intensity: 918 # Append the ID to the list. 919 ids.append(id) 920 921 # Calculate the sn_ratio. 922 pint = float(spin.peak_intensity[id]) 923 pint_err = float(spin.peak_intensity_err[id]) 924 sn_ratio = pint / pint_err 925 926 # Assign the sn_ratio. 927 spin.sn_ratio[id] = sn_ratio 928 929 # Sort the ids alphanumeric. 930 ids = sort_filenames(filenames=ids, rev=False) 931 932 # Collect the data under sorted ids. 933 data_i = [] 934 for id in ids: 935 # Get the values. 936 pint = spin.peak_intensity[id] 937 pint_err = spin.peak_intensity_err[id] 938 sn_ratio = spin.sn_ratio[id] 939 940 # Store the data. 941 data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)]) 942 943 if verbose: 944 section(file=sys.stdout, text="Signal to noise ratio for spin ID '%s'"%spin_id, prespace=1) 945 write_data(out=sys.stdout, headings=["Spectrum ID", "Signal", "Noise", "S/N"], data=data_i)

946 947

948 -def sn_ratio_deselection(ratio=10.0, operation='<', all_sn=False, select=False, verbose=True):

949 """Use user function deselect.spin on spins with signal to noise ratio higher or lower than ratio. The operation determines the selection operation. 950 951 @keyword ratio: The ratio to compare to. 952 @type ratio: float 953 @keyword operation: The comparison operation by which to select the spins. Of the operation(sn_ratio, ratio), where operation can either be: '<', '<=', '>', '>=', '==', '!='. 954 @type operation: str 955 @keyword all_sn: A flag specifying if all the signal to noise ratios per spin should match the comparison operator, of if just a single comparison match is enough. 956 @type all_sn: bool 957 @keyword select: A flag specifying if the user function select.spin should be used instead. 958 @type select: bool 959 @keyword verbose: A flag which if True will print additional information out. 960 @type verbose: bool 961 """ 962 963 # Tests. 964 check_pipe() 965 check_mol_res_spin_data() 966 967 # Test if spectra have been loaded. 968 if not hasattr(cdp, 'spectrum_ids'): 969 raise RelaxError("No spectra have been loaded.") 970 971 # Assign the comparison operator. 972 # "'<' : strictly less than" 973 if operation == '<': 974 op = operator.lt 975 976 # "'<=' : less than or equal" 977 elif operation == '<=': 978 op = operator.le 979 980 # "'>' : strictly greater than" 981 elif operation == '>': 982 op = operator.gt 983 984 # "'>=' : greater than or equal" 985 elif operation == '>=': 986 op = operator.ge 987 988 # "'==' : equal" 989 elif operation == '==': 990 op = operator.eq 991 992 # "'!=' : not equal", 993 elif operation == '!=': 994 op = operator.ne 995 996 # If not assigned, raise error. 997 else: 998 raise RelaxError("The compare operation does not belong to the allowed list of methods: ['<', '<=', '>', '>=', '==', '!=']") 999 1000 # Assign text for print out. 1001 if all_sn: 1002 text_all_sn = "all" 1003 else: 1004 text_all_sn = "any" 1005 1006 if select: 1007 text_sel = "selected" 1008 sel_func = sel_spin 1009 else: 1010 text_sel = "deselected" 1011 sel_func = desel_spin 1012 1013 # Print 1014 section(file=sys.stdout, text="Signal to noise ratio comparison selection", prespace=1, postspace=0) 1015 print("For the comparion test: S/N %s %1.1f"%(operation, ratio)) 1016 1017 # Loop over the spins. 1018 spin_ids = [] 1019 for spin, spin_id in spin_loop(return_id=True): 1020 # Skip spins missing sn_ratio. 1021 if not hasattr(spin, 'sn_ratio'): 1022 # Skip warning for deselected spins. 1023 if spin.select: 1024 warn(RelaxWarning("Spin '%s' does not contain Signal to Noise calculations. Perform the user function 'spectrum.sn_ratio'. This spin is skipped." % spin_id)) 1025 continue 1026 1027 # Loop over the ID, collect and sort. 1028 ids = [] 1029 for id in spin.peak_intensity: 1030 # Append the ID to the list. 1031 ids.append(id) 1032 1033 # Sort the ids alphanumeric. 1034 ids = sort_filenames(filenames=ids, rev=False) 1035 1036 # Loop over the sorted ids. 1037 sn_val = [] 1038 for id in ids: 1039 # Append the Signal to Noise in the list. 1040 sn_val.append(spin.sn_ratio[id]) 1041 1042 # Convert the list to array. 1043 sn_val = asarray(sn_val) 1044 1045 # Make the comparison for the whole array. 1046 test_arr = op(sn_val, ratio) 1047 1048 # Determine how the test should evaluate. 1049 if all_sn: 1050 test = test_arr.all() 1051 else: 1052 test = test_arr.any() 1053 1054 # Make an numpy array for the ids, an extract id which failed the test. 1055 ids_arr = asarray(ids) 1056 ids_test_arr = ids_arr[test_arr] 1057 1058 # Make inversion of bool 1059 test_arr_inv = test_arr == False 1060 ids_test_arr_inv = ids_arr[test_arr_inv] 1061 1062 # print 1063 if verbose: 1064 subsection(file=sys.stdout, text="Signal to noise ratio comparison for spin ID '%s'"%spin_id, prespace=1, postspace=0) 1065 print("Following spectra ID evaluated to True: %s"%ids_test_arr) 1066 print("Following spectra ID evaluated to False: %s"%ids_test_arr_inv) 1067 print("'%s' comparisons have been used for evaluation, which evaluated to: %s"%(text_all_sn, test)) 1068 if test: 1069 print("The spin ID '%s' is %s"%(spin_id, text_sel)) 1070 else: 1071 print("The spin ID '%s' is skipped"%spin_id) 1072 1073 # If the test evaluates to True, then do selection action. 1074 if test: 1075 # Select/Deselect the spin. 1076 sel_func(spin_id=spin_id) 1077 1078 # Assign spin_id to list, for printing. 1079 spin_ids.append(spin_id) 1080 1081 # Make summary 1082 if verbose: 1083 if len(spin_ids) > 0: 1084 subsection(file=sys.stdout, text="For all of the S/N comparion test, the following spin ID's was %s"%text_sel, prespace=1, postspace=0) 1085 print(spin_ids)

1086 1087

1088 -def sn_ratio_selection(ratio=10.0, operation='>', all_sn=True, verbose=True):

1089 """Use user function select.spin on spins with signal to noise ratio higher or lower than ratio. The operation determines the selection operation. 1090 1091 @keyword ratio: The ratio to compare to. 1092 @type ratio: float 1093 @keyword operation: The comparison operation by which to select the spins. Of the operation(sn_ratio, ratio), where operation can either be: '<', '<=', '>', '>=', '==', '!='. 1094 @type operation: str 1095 @keyword all_sn: A flag specifying if all the signal to noise ratios per spin should match the comparison operator, of if just a single comparison match is enough. 1096 @type all_sn: bool 1097 @keyword verbose: A flag which if True will print additional information out. 1098 @type verbose: bool 1099 """ 1100 1101 # Forward function. 1102 sn_ratio_deselection(ratio=ratio, operation=operation, all_sn=all_sn, select=True, verbose=verbose)

1103

Source Code for Module pipe_control.spectrum