pipe_control.spectrum

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2014 Edward d'Auvergne # 4 # Copyright (C) 2008 Sebastien Morin # 5 # Copyright (C) 2013-2014 Troels E. Linnet # 6 # # 7 # This file is part of the program relax (http://www.nmr-relax.com). # 8 # # 9 # This program is free software: you can redistribute it and/or modify # 10 # it under the terms of the GNU General Public License as published by # 11 # the Free Software Foundation, either version 3 of the License, or # 12 # (at your option) any later version. # 13 # # 14 # This program is distributed in the hope that it will be useful, # 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 # GNU General Public License for more details. # 18 # # 19 # You should have received a copy of the GNU General Public License # 20 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 21 # # 22 ############################################################################### 23 24 # Module docstring. 25 """Module containing functions for the handling of peak intensities.""" 26 27 28 # Python module imports. 29 from math import sqrt 30 from numpy import asarray 31 import operator 32 import sys 33 from warnings import warn 34 35 # relax module imports. 36 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError 37 from lib.io import sort_filenames, write_data 38 from lib.text.sectioning import section, subsection 39 from lib.spectrum.peak_list import read_peak_list 40 from lib.statistics import std 41 from lib.warnings import RelaxWarning, RelaxNoSpinWarning 42 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, return_spin, spin_loop 43 from pipe_control.pipes import check_pipe 44 from pipe_control.selection import desel_spin, sel_spin 45 46

47 -def __errors_height_no_repl():

48 """Calculate the errors for peak heights when no spectra are replicated.""" 49 50 # Loop over the spins and set the error to the RMSD of the base plane noise. 51 for spin, spin_id in spin_loop(return_id=True): 52 # Skip deselected spins. 53 if not spin.select: 54 continue 55 56 # Skip spins missing intensity data. 57 if not hasattr(spin, 'peak_intensity'): 58 continue 59 60 # Test if the RMSD has been set. 61 if not hasattr(spin, 'baseplane_rmsd'): 62 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 63 64 # Set the error to the RMSD. 65 spin.peak_intensity_err = spin.baseplane_rmsd

66 67

68 -def __errors_repl(subset=None, verbosity=0):

69 """Calculate the errors for peak intensities from replicated spectra. 70 71 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 72 @type subset: list of str 73 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 74 @type verbosity: int 75 """ 76 77 # Replicated spectra. 78 repl = replicated_flags() 79 80 # Are all spectra replicated? 81 if False in list(repl.values()): 82 all_repl = False 83 print("All spectra replicated: No.") 84 else: 85 all_repl = True 86 print("All spectra replicated: Yes.") 87 88 # Initialise. 89 if not hasattr(cdp, 'sigma_I'): 90 cdp.sigma_I = {} 91 if not hasattr(cdp, 'var_I'): 92 cdp.var_I = {} 93 94 # The subset. 95 subset_flag = False 96 if not subset: 97 subset_flag = True 98 subset = cdp.spectrum_ids 99 100 # Loop over the spectra. 101 for id in subset: 102 # Skip non-replicated spectra. 103 if not repl[id]: 104 continue 105 106 # Skip replicated spectra which already have been used. 107 if id in cdp.var_I and cdp.var_I[id] != 0.0: 108 continue 109 110 # The replicated spectra. 111 for j in range(len(cdp.replicates)): 112 if id in cdp.replicates[j]: 113 spectra = cdp.replicates[j] 114 115 # Number of spectra. 116 num_spectra = len(spectra) 117 118 # Printout. 119 print("\nReplicated spectra: " + repr(spectra)) 120 if verbosity: 121 print("%-20s%-20s" % ("Spin_ID", "SD")) 122 123 # Calculate the mean value. 124 count = 0 125 for spin, spin_id in spin_loop(return_id=True): 126 # Skip deselected spins. 127 if not spin.select: 128 continue 129 130 # Skip and deselect spins which have no data. 131 if not hasattr(spin, 'peak_intensity'): 132 spin.select = False 133 continue 134 135 # Missing data. 136 missing = False 137 for j in range(num_spectra): 138 if not spectra[j] in spin.peak_intensity: 139 missing = True 140 if missing: 141 continue 142 143 # The peak intensities. 144 values = [] 145 for j in range(num_spectra): 146 values.append(spin.peak_intensity[spectra[j]]) 147 148 # The standard deviation. 149 sd = std(values=values, dof=1) 150 151 # Printout. 152 if verbosity: 153 print("%-20s%-20s" % (spin_id, sd)) 154 155 # Sum of variances (for average). 156 if not id in cdp.var_I: 157 cdp.var_I[id] = 0.0 158 cdp.var_I[id] = cdp.var_I[id] + sd**2 159 count = count + 1 160 161 # No data catch. 162 if not count: 163 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") 164 165 # Average variance. 166 cdp.var_I[id] = cdp.var_I[id] / float(count) 167 168 # Set all spectra variances. 169 for j in range(num_spectra): 170 cdp.var_I[spectra[j]] = cdp.var_I[id] 171 172 # Print out. 173 print("Standard deviation: %s" % sqrt(cdp.var_I[id])) 174 175 176 # Average across all spectra if there are time points with a single spectrum. 177 if not all_repl: 178 # Print out. 179 if subset_flag: 180 print("\nVariance averaging over the spectra subset.") 181 else: 182 print("\nVariance averaging over all spectra.") 183 184 # Initialise. 185 var_I = 0.0 186 num_dups = 0 187 188 # Loop over all time points. 189 for id in cdp.var_I: 190 # Only use id's defined in subset 191 if id not in subset: 192 continue 193 194 # Single spectrum (or extraordinarily accurate NMR spectra!). 195 if cdp.var_I[id] == 0.0: 196 continue 197 198 # Sum and count. 199 var_I = var_I + cdp.var_I[id] 200 num_dups = num_dups + 1 201 202 # Average value. 203 var_I = var_I / float(num_dups) 204 205 # Assign the average value to all time points. 206 for id in subset: 207 cdp.var_I[id] = var_I 208 209 # Print out. 210 print("Standard deviation for all spins: " + repr(sqrt(var_I))) 211 212 # Loop over the spectra. 213 for id in cdp.var_I: 214 # Create the standard deviation data structure. 215 cdp.sigma_I[id] = sqrt(cdp.var_I[id]) 216 217 # Set the spin specific errors. 218 for spin in spin_loop(): 219 # Skip deselected spins. 220 if not spin.select: 221 continue 222 223 # Set the error. 224 spin.peak_intensity_err = cdp.sigma_I

225 226

227 -def __errors_volume_no_repl(subset=None):

228 """Calculate the errors for peak volumes when no spectra are replicated.""" 229 230 # Loop over the spins and set the error to the RMSD of the base plane noise. 231 for spin, spin_id in spin_loop(return_id=True): 232 # Skip deselected spins. 233 if not spin.select: 234 continue 235 236 # Skip spins missing intensity data. 237 if not hasattr(spin, 'peak_intensity'): 238 continue 239 240 # Test if the RMSD has been set. 241 if not hasattr(spin, 'baseplane_rmsd'): 242 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 243 244 # Test that the total number of points have been set. 245 if not hasattr(spin, 'N'): 246 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id) 247 248 # Set the error to the RMSD multiplied by the square root of the total number of points. 249 for key in spin.peak_intensity: 250 spin.peak_intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)

251 252

253 -def add_spectrum_id(spectrum_id=None):

254 """Add the spectrum ID to the data store. 255 256 @keyword spectrum_id: The spectrum ID string. 257 @type spectrum_id: str 258 """ 259 260 # Initialise the structure, if needed. 261 if not hasattr(cdp, 'spectrum_ids'): 262 cdp.spectrum_ids = [] 263 264 # The ID already exists. 265 if spectrum_id in cdp.spectrum_ids: 266 return 267 268 # Add the ID. 269 cdp.spectrum_ids.append(spectrum_id)

270 271

272 -def baseplane_rmsd(error=0.0, spectrum_id=None, spin_id=None):

273 """Set the peak intensity errors, as defined as the baseplane RMSD. 274 275 @param error: The peak intensity error value defined as the RMSD of the base plane 276 noise. 277 @type error: float 278 @keyword spectrum_id: The spectrum id. 279 @type spectrum_id: str 280 @param spin_id: The spin identification string. 281 @type spin_id: str 282 """ 283 284 # Data checks. 285 check_pipe() 286 check_mol_res_spin_data() 287 check_spectrum_id(spectrum_id) 288 289 # The scaling by NC_proc. 290 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 291 scale = 1.0 / 2**cdp.ncproc[spectrum_id] 292 else: 293 scale = 1.0 294 295 # Loop over the spins. 296 for spin in spin_loop(spin_id): 297 # Skip deselected spins. 298 if not spin.select: 299 continue 300 301 # Initialise or update the baseplane_rmsd data structure as necessary. 302 if not hasattr(spin, 'baseplane_rmsd'): 303 spin.baseplane_rmsd = {} 304 305 # Set the error. 306 spin.baseplane_rmsd[spectrum_id] = float(error) * scale

307 308

309 -def check_spectrum_id(id):

310 """Check that the give spectrum ID exists. 311 312 @param id: The spectrum ID to check for. 313 @type id: str 314 @raises RelaxNoSpectraError: If the ID does not exist. 315 """ 316 317 # Check that the spectrum ID structure exists. 318 if not hasattr(cdp, 'spectrum_ids'): 319 raise RelaxNoSpectraError(id) 320 321 # Test if the spectrum ID exists. 322 if id not in cdp.spectrum_ids: 323 raise RelaxNoSpectraError(id)

324 325

326 -def delete(spectrum_id=None):

327 """Delete spectral data corresponding to the spectrum ID. 328 329 @keyword spectrum_id: The spectrum ID string. 330 @type spectrum_id: str 331 """ 332 333 # Data checks. 334 check_pipe() 335 check_mol_res_spin_data() 336 check_spectrum_id(spectrum_id) 337 338 # Remove the ID. 339 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id)) 340 341 # The ncproc parameter. 342 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 343 del cdp.ncproc[spectrum_id] 344 345 # Replicates. 346 if hasattr(cdp, 'replicates'): 347 # Loop over the replicates. 348 for i in range(len(cdp.replicates)): 349 # The spectrum is replicated. 350 if spectrum_id in cdp.replicates[i]: 351 # Duplicate. 352 if len(cdp.replicates[i]) == 2: 353 cdp.replicates.pop(i) 354 355 # More than two spectra: 356 else: 357 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id)) 358 359 # No need to check further. 360 break 361 362 # Errors. 363 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I: 364 del cdp.sigma_I[spectrum_id] 365 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I: 366 del cdp.var_I[spectrum_id] 367 368 # Loop over the spins. 369 for spin in spin_loop(): 370 # Intensity data. 371 if hasattr(spin, 'peak_intensity') and spectrum_id in spin.peak_intensity: 372 del spin.peak_intensity[spectrum_id]

373 374

375 -def error_analysis(subset=None):

376 """Determine the peak intensity standard deviation. 377 378 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 379 @type subset: list of str 380 """ 381 382 # Tests. 383 check_pipe() 384 check_mol_res_spin_data() 385 386 # Test if spectra have been loaded. 387 if not hasattr(cdp, 'spectrum_ids'): 388 raise RelaxError("Error analysis is not possible, no spectra have been loaded.") 389 390 # Check the IDs. 391 if subset: 392 for id in subset: 393 if id not in cdp.spectrum_ids: 394 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id) 395 396 # Peak height category. 397 if cdp.int_method == 'height': 398 # Print out. 399 print("Intensity measure: Peak heights.") 400 401 # Do we have replicated spectra? 402 if hasattr(cdp, 'replicates'): 403 # Print out. 404 print("Replicated spectra: Yes.") 405 406 # Set the errors. 407 __errors_repl(subset=subset) 408 409 # No replicated spectra. 410 else: 411 # Print out. 412 print("Replicated spectra: No.") 413 if subset: 414 print("Spectra ID subset ignored.") 415 416 # Set the errors. 417 __errors_height_no_repl() 418 419 # Peak volume category. 420 if cdp.int_method == 'point sum': 421 # Print out. 422 print("Intensity measure: Peak volumes.") 423 424 # Do we have replicated spectra? 425 if hasattr(cdp, 'replicates'): 426 # Print out. 427 print("Replicated spectra: Yes.") 428 429 # Set the errors. 430 __errors_repl(subset=subset) 431 432 # No replicated spectra. 433 else: 434 # Print out. 435 print("Replicated spectra: No.") 436 437 # No implemented. 438 raise RelaxImplementError 439 440 # Set the errors. 441 __errors_vol_no_repl()

442 443

444 -def error_analysis_per_field():

445 """Perform an error analysis of the peak intensities for each field strength separately.""" 446 447 # Printout. 448 section(file=sys.stdout, text="Automatic Error analysis per field strength", prespace=2) 449 450 # Handle missing frequency data. 451 frqs = [None] 452 if hasattr(cdp, 'spectrometer_frq_list'): 453 frqs = cdp.spectrometer_frq_list 454 455 # Loop over the spectrometer frequencies. 456 for frq in frqs: 457 # Generate a list of spectrum IDs matching the frequency. 458 ids = [] 459 for id in cdp.spectrum_ids: 460 # Check that the spectrometer frequency matches. 461 match_frq = True 462 if frq != None and cdp.spectrometer_frq[id] != frq: 463 match_frq = False 464 465 # Add the ID. 466 if match_frq: 467 ids.append(id) 468 469 # Run the error analysis on the subset. 470 print("For field strength %.8f MHz, subset ids for spectrum.error_analysis is: %s" % (frq/1e6, ids)) 471 error_analysis(subset=ids)

472 473

474 -def get_ids():

475 """Return a list of all spectrum IDs. 476 477 @return: The list of spectrum IDs. 478 @rtype: list of str 479 """ 480 481 # No IDs. 482 if not hasattr(cdp, 'spectrum_ids'): 483 return [] 484 485 # Return the IDs. 486 return cdp.spectrum_ids

487 488

489 -def integration_points(N=0, spectrum_id=None, spin_id=None):

490 """Set the number of integration points for the given spectrum. 491 492 @param N: The number of integration points. 493 @type N: int 494 @keyword spectrum_id: The spectrum ID string. 495 @type spectrum_id: str 496 @keyword spin_id: The spin ID string used to restrict the value to. 497 @type spin_id: None or str 498 """ 499 500 raise RelaxImplementError

501 502

503 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):

504 """Read the peak intensity data. 505 506 @keyword file: The name of the file(s) containing the peak intensities. 507 @type file: str or list of str 508 @keyword dir: The directory where the file is located. 509 @type dir: str 510 @keyword spectrum_id: The spectrum identification string. 511 @type spectrum_id: str or list of str 512 @keyword dim: The dimension of the peak list to associate the data with. 513 @type dim: int 514 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). 515 @type int_col: int or list of int 516 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. 517 @type int_method: str 518 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 519 @type spin_id_col: int or None 520 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 521 @type mol_name_col: int or None 522 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 523 @type res_name_col: int or None 524 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 525 @type res_num_col: int or None 526 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 527 @type spin_name_col: int or None 528 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 529 @type spin_num_col: int or None 530 @keyword sep: The column separator which, if None, defaults to whitespace. 531 @type sep: str or None 532 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 533 @type spin_id: None or str 534 @keyword ncproc: The Bruker ncproc binary intensity scaling factor. 535 @type ncproc: int or None 536 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 537 @type verbose: bool 538 """ 539 540 # Data checks. 541 check_pipe() 542 check_mol_res_spin_data() 543 544 # Check the file name. 545 if file == None: 546 raise RelaxError("The file name must be supplied.") 547 548 # Test that the intensity measures are identical. 549 if hasattr(cdp, 'int_method') and cdp.int_method != int_method: 550 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) 551 552 # Multiple ID flags. 553 flag_multi = False 554 flag_multi_file = False 555 flag_multi_col = False 556 if isinstance(spectrum_id, list) or spectrum_id == 'auto': 557 flag_multi = True 558 if isinstance(file, list): 559 flag_multi_file = True 560 if isinstance(int_col, list) or spectrum_id == 'auto': 561 flag_multi_col = True 562 563 # List argument checks. 564 if flag_multi: 565 # Too many lists. 566 if flag_multi_file and flag_multi_col: 567 raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.") 568 569 # Not enough lists. 570 if not flag_multi_file and not flag_multi_col: 571 raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.") 572 573 # List lengths for multiple files. 574 if flag_multi_file and len(spectrum_id) != len(file): 575 raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id)) 576 577 # List lengths for multiple intensity columns. 578 if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col): 579 raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col)) 580 581 # More list argument checks (when only one spectrum ID is supplied). 582 else: 583 # Multiple files. 584 if flag_multi_file: 585 raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.") 586 587 # Multiple intensity columns. 588 if flag_multi_col: 589 raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.") 590 591 # Intensity column checks. 592 if spectrum_id != 'auto' and not flag_multi and flag_multi_col: 593 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.") 594 595 # Check the intensity measure. 596 if not int_method in ['height', 'point sum', 'other']: 597 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) 598 599 # Set the peak intensity measure. 600 cdp.int_method = int_method 601 602 # Convert the file argument to a list if necessary. 603 if not isinstance(file, list): 604 file = [file] 605 606 # Loop over all files. 607 for file_index in range(len(file)): 608 # Read the peak list data. 609 peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 610 611 # Automatic spectrum IDs. 612 if spectrum_id == 'auto': 613 spectrum_id = peak_list[0].intensity_name 614 615 # Loop over the assignments. 616 data = [] 617 data_flag = False 618 for assign in peak_list: 619 # Generate the spin_id. 620 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1]) 621 622 # Convert the intensity data to a list if needed. 623 intensity = assign.intensity 624 if not isinstance(intensity, list): 625 intensity = [intensity] 626 627 # Loop over the intensity data. 628 for int_index in range(len(intensity)): 629 # Sanity check. 630 if intensity[int_index] == 0.0: 631 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id)) 632 633 # Get the spin container. 634 spin = return_spin(spin_id) 635 if not spin: 636 warn(RelaxNoSpinWarning(spin_id)) 637 continue 638 639 # Skip deselected spins. 640 if not spin.select: 641 continue 642 643 # Initialise. 644 if not hasattr(spin, 'peak_intensity'): 645 spin.peak_intensity = {} 646 647 # Intensity scaling. 648 if ncproc != None: 649 intensity[int_index] = intensity[int_index] / float(2**ncproc) 650 651 # Add the data. 652 if flag_multi_file: 653 id = spectrum_id[file_index] 654 elif flag_multi_col: 655 id = spectrum_id[int_index] 656 else: 657 id = spectrum_id 658 spin.peak_intensity[id] = intensity[int_index] 659 660 # Switch the flag. 661 data_flag = True 662 663 # Append the data for printing out. 664 data.append([spin_id, repr(intensity[int_index])]) 665 666 # Add the spectrum id (and ncproc) to the relax data store. 667 spectrum_ids = spectrum_id 668 if isinstance(spectrum_id, str): 669 spectrum_ids = [spectrum_id] 670 if ncproc != None and not hasattr(cdp, 'ncproc'): 671 cdp.ncproc = {} 672 for i in range(len(spectrum_ids)): 673 add_spectrum_id(spectrum_ids[i]) 674 if ncproc != None: 675 cdp.ncproc[spectrum_ids[i]] = ncproc 676 677 # No data. 678 if not data_flag: 679 # Delete all the data. 680 delete(spectrum_id) 681 682 # Raise the error. 683 raise RelaxError("No data could be loaded from the peak list") 684 685 # Printout. 686 if verbose: 687 print("\nThe following intensities have been loaded into the relax data store:\n") 688 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data) 689 print('')

690 691

692 -def read_spins(file=None, dir=None, dim=1, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):

693 """Read the peak intensity data. 694 695 @keyword file: The name of the file containing the peak intensities. 696 @type file: str 697 @keyword dir: The directory where the file is located. 698 @type dir: str 699 @keyword dim: The dimension of the peak list to associate the data with. 700 @type dim: int 701 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 702 @type spin_id_col: int or None 703 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 704 @type mol_name_col: int or None 705 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 706 @type res_name_col: int or None 707 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 708 @type res_num_col: int or None 709 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 710 @type spin_name_col: int or None 711 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 712 @type spin_num_col: int or None 713 @keyword sep: The column separator which, if None, defaults to whitespace. 714 @type sep: str or None 715 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 716 @type spin_id: None or str 717 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 718 @type verbose: bool 719 """ 720 721 # Data checks. 722 check_pipe() 723 724 # Check the file name. 725 if file == None: 726 raise RelaxError("The file name must be supplied.") 727 728 # Read the peak list data. 729 peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 730 731 # Loop over the peak_list. 732 created_spins = [] 733 for assign in peak_list: 734 mol_name = assign.mol_names[dim-1] 735 res_num = assign.res_nums[dim-1] 736 res_name = assign.res_names[dim-1] 737 spin_num = assign.spin_nums[dim-1] 738 spin_name = assign.spin_names[dim-1] 739 740 # Generate the spin_id. 741 spin_id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_name=spin_name) 742 743 # Check if the spin already exist. 744 if return_spin(spin_id=spin_id) == None: 745 # Create the spin if not exist. 746 create_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol_name=mol_name) 747 748 # Test that data exists. 749 check_mol_res_spin_data()

750

751 -def replicated(spectrum_ids=None):

752 """Set which spectra are replicates. 753 754 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra. 755 @type spectrum_ids: list of str 756 """ 757 758 # Test if the current pipe exists. 759 check_pipe() 760 761 # Test for None. 762 if spectrum_ids == None: 763 warn(RelaxWarning("The spectrum ID list cannot be None.")) 764 return 765 766 # Test if spectra have been loaded. 767 if not hasattr(cdp, 'spectrum_ids'): 768 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.") 769 770 # Test the spectrum id strings. 771 for spectrum_id in spectrum_ids: 772 check_spectrum_id(spectrum_id) 773 774 # Test for more than one element. 775 if len(spectrum_ids) == 1: 776 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids)) 777 return 778 779 # Initialise. 780 if not hasattr(cdp, 'replicates'): 781 cdp.replicates = [] 782 783 # Check if the spectrum IDs are already in the list. 784 found = False 785 for i in range(len(cdp.replicates)): 786 # Loop over all elements of the first. 787 for j in range(len(spectrum_ids)): 788 if spectrum_ids[j] in cdp.replicates[i]: 789 found = True 790 791 # One of the spectrum IDs already have a replicate specified. 792 if found: 793 # Add the remaining replicates to the list and quit this function. 794 for j in range(len(spectrum_ids)): 795 if spectrum_ids[j] not in cdp.replicates[i]: 796 cdp.replicates[i].append(spectrum_ids[j]) 797 798 # Nothing more to do. 799 return 800 801 # A new set of replicates. 802 cdp.replicates.append(spectrum_ids)

803 804

805 -def replicated_flags():

806 """Create and return a dictionary of flags of whether the spectrum is replicated or not. 807 808 @return: The dictionary of flags of whether the spectrum is replicated or not. 809 @rtype: dict of bool 810 """ 811 812 # Initialise all IDs to false. 813 repl = {} 814 for id in cdp.spectrum_ids: 815 repl[id] = False 816 817 # Loop over the replicates. 818 for i in range(len(cdp.replicates)): 819 for j in range(len(cdp.replicates[i])): 820 repl[cdp.replicates[i][j]] = True 821 822 # Return the dictionary. 823 return repl

824 825

826 -def replicated_ids(spectrum_id):

827 """Create and return a list of spectra ID which are replicates of the given ID. 828 829 @param spectrum_id: The spectrum ID to find all the replicates of. 830 @type spectrum_id: str 831 @return: The list of spectrum IDs which are replicates of spectrum_id. 832 @rtype: list of str 833 """ 834 835 # Initialise the ID list. 836 repl = [] 837 838 # Loop over the replicate lists. 839 for i in range(len(cdp.replicates)): 840 # The spectrum ID is in the list. 841 if spectrum_id in cdp.replicates[i]: 842 # Loop over the inner list. 843 for j in range(len(cdp.replicates[i])): 844 # Spectrum ID match. 845 if spectrum_id == cdp.replicates[i][j]: 846 continue 847 848 # Append the replicated ID. 849 repl.append(cdp.replicates[i][j]) 850 851 # Nothing. 852 if repl == []: 853 return repl 854 855 # Sort the list. 856 repl.sort() 857 858 # Remove duplicates (backward). 859 id = repl[-1] 860 for i in range(len(repl)-2, -1, -1): 861 # Duplicate. 862 if id == repl[i]: 863 del repl[i] 864 865 # Unique. 866 else: 867 id = repl[i] 868 869 # Return the list. 870 return repl

871 872

873 -def signal_noise_ratio(verbose=True):

874 """Calculate the signal to noise ratio per spin. 875 876 @keyword verbose: A flag which if True will print additional information out. 877 @type verbose: bool 878 """ 879 880 # Tests. 881 check_pipe() 882 check_mol_res_spin_data() 883 884 # Test if spectra have been loaded. 885 if not hasattr(cdp, 'spectrum_ids'): 886 raise RelaxError("No spectra have been loaded.") 887 888 # Possible print. 889 if verbose: 890 print("\nThe following signal to noise ratios has been calculated:\n") 891 892 # Set the spin specific signal to noise ratio. 893 for spin, spin_id in spin_loop(return_id=True): 894 # Skip deselected spins. 895 if not spin.select: 896 continue 897 898 # Skip spins missing intensity data. 899 if not hasattr(spin, 'peak_intensity'): 900 continue 901 902 # Test if error analysis has been performed. 903 if not hasattr(spin, 'peak_intensity_err'): 904 raise RelaxError("Intensity error analysis has not been performed. Please see spectrum.error_analysis().") 905 906 # If necessary, create the dictionary. 907 if not hasattr(spin, 'sn_ratio'): 908 spin.sn_ratio = {} 909 910 # Loop over the ID. 911 ids = [] 912 for id in spin.peak_intensity: 913 # Append the ID to the list. 914 ids.append(id) 915 916 # Calculate the sn_ratio. 917 pint = float(spin.peak_intensity[id]) 918 pint_err = float(spin.peak_intensity_err[id]) 919 sn_ratio = pint / pint_err 920 921 # Assign the sn_ratio. 922 spin.sn_ratio[id] = sn_ratio 923 924 # Sort the ids alphanumeric. 925 ids = sort_filenames(filenames=ids, rev=False) 926 927 # Collect the data under sorted ids. 928 data_i = [] 929 for id in ids: 930 # Get the values. 931 pint = spin.peak_intensity[id] 932 pint_err = spin.peak_intensity_err[id] 933 sn_ratio = spin.sn_ratio[id] 934 935 # Store the data. 936 data_i.append([id, repr(pint), repr(pint_err), repr(sn_ratio)]) 937 938 if verbose: 939 section(file=sys.stdout, text="Signal to noise ratio for spin ID '%s'"%spin_id, prespace=1) 940 write_data(out=sys.stdout, headings=["Spectrum ID", "Signal", "Noise", "S/N"], data=data_i)

941 942

943 -def sn_ratio_deselection(ratio=10.0, operation='<', all_sn=False, select=False, verbose=True):

944 """Use user function deselect.spin on spins with signal to noise ratio higher or lower than ratio. The operation determines the selection operation. 945 946 @keyword ratio: The ratio to compare to. 947 @type ratio: float 948 @keyword operation: The comparison operation by which to select the spins. Of the operation(sn_ratio, ratio), where operation can either be: '<', '<=', '>', '>=', '==', '!='. 949 @type operation: str 950 @keyword all_sn: A flag specifying if all the signal to noise ratios per spin should match the comparison operator, of if just a single comparison match is enough. 951 @type all_sn: bool 952 @keyword select: A flag specifying if the user function select.spin should be used instead. 953 @type select: bool 954 @keyword verbose: A flag which if True will print additional information out. 955 @type verbose: bool 956 """ 957 958 # Tests. 959 check_pipe() 960 check_mol_res_spin_data() 961 962 # Test if spectra have been loaded. 963 if not hasattr(cdp, 'spectrum_ids'): 964 raise RelaxError("No spectra have been loaded.") 965 966 # Assign the comparison operator. 967 # "'<' : strictly less than" 968 if operation == '<': 969 op = operator.lt 970 971 # "'<=' : less than or equal" 972 elif operation == '<=': 973 op = operator.le 974 975 # "'>' : strictly greater than" 976 elif operation == '>': 977 op = operator.gt 978 979 # "'>=' : greater than or equal" 980 elif operation == '>=': 981 op = operator.ge 982 983 # "'==' : equal" 984 elif operation == '==': 985 op = operator.eq 986 987 # "'!=' : not equal", 988 elif operation == '!=': 989 op = operator.ne 990 991 # If not assigned, raise error. 992 else: 993 raise RelaxError("The compare operation does not belong to the allowed list of methods: ['<', '<=', '>', '>=', '==', '!=']") 994 995 # Assign text for print out. 996 if all_sn: 997 text_all_sn = "all" 998 else: 999 text_all_sn = "any" 1000 1001 if select: 1002 text_sel = "selected" 1003 sel_func = sel_spin 1004 else: 1005 text_sel = "deselected" 1006 sel_func = desel_spin 1007 1008 # Print 1009 section(file=sys.stdout, text="Signal to noise ratio comparison selection", prespace=1, postspace=0) 1010 print("For the comparion test: S/N %s %1.1f"%(operation, ratio)) 1011 1012 # Loop over the spins. 1013 spin_ids = [] 1014 for spin, spin_id in spin_loop(return_id=True): 1015 # Skip spins missing sn_ratio. 1016 if not hasattr(spin, 'sn_ratio'): 1017 # Skip warning for deselected spins. 1018 if spin.select: 1019 warn(RelaxWarning("Spin '%s' does not contain Signal to Noise calculations. Perform the user function 'spectrum.sn_ratio'. This spin is skipped." % spin_id)) 1020 continue 1021 1022 # Loop over the ID, collect and sort. 1023 ids = [] 1024 for id in spin.peak_intensity: 1025 # Append the ID to the list. 1026 ids.append(id) 1027 1028 # Sort the ids alphanumeric. 1029 ids = sort_filenames(filenames=ids, rev=False) 1030 1031 # Loop over the sorted ids. 1032 sn_val = [] 1033 for id in ids: 1034 # Append the Signal to Noise in the list. 1035 sn_val.append(spin.sn_ratio[id]) 1036 1037 # Convert the list to array. 1038 sn_val = asarray(sn_val) 1039 1040 # Make the comparison for the whole array. 1041 test_arr = op(sn_val, ratio) 1042 1043 # Determine how the test should evaluate. 1044 if all_sn: 1045 test = test_arr.all() 1046 else: 1047 test = test_arr.any() 1048 1049 # Make an numpy array for the ids, an extract id which failed the test. 1050 ids_arr = asarray(ids) 1051 ids_test_arr = ids_arr[test_arr] 1052 1053 # Make inversion of bool 1054 test_arr_inv = test_arr == False 1055 ids_test_arr_inv = ids_arr[test_arr_inv] 1056 1057 # print 1058 if verbose: 1059 subsection(file=sys.stdout, text="Signal to noise ratio comparison for spin ID '%s'"%spin_id, prespace=1, postspace=0) 1060 print("Following spectra ID evaluated to True: %s"%ids_test_arr) 1061 print("Following spectra ID evaluated to False: %s"%ids_test_arr_inv) 1062 print("'%s' comparisons have been used for evaluation, which evaluated to: %s"%(text_all_sn, test)) 1063 if test: 1064 print("The spin ID '%s' is %s"%(spin_id, text_sel)) 1065 else: 1066 print("The spin ID '%s' is skipped"%spin_id) 1067 1068 # If the test evaluates to True, then do selection action. 1069 if test: 1070 # Select/Deselect the spin. 1071 sel_func(spin_id=spin_id) 1072 1073 # Assign spin_id to list, for printing. 1074 spin_ids.append(spin_id) 1075 1076 # Make summary 1077 if verbose: 1078 if len(spin_ids) > 0: 1079 subsection(file=sys.stdout, text="For all of the S/N comparion test, the following spin ID's was %s"%text_sel, prespace=1, postspace=0) 1080 print(spin_ids)

1081 1082

1083 -def sn_ratio_selection(ratio=10.0, operation='>', all_sn=True, verbose=True):

1084 """Use user function select.spin on spins with signal to noise ratio higher or lower than ratio. The operation determines the selection operation. 1085 1086 @keyword ratio: The ratio to compare to. 1087 @type ratio: float 1088 @keyword operation: The comparison operation by which to select the spins. Of the operation(sn_ratio, ratio), where operation can either be: '<', '<=', '>', '>=', '==', '!='. 1089 @type operation: str 1090 @keyword all_sn: A flag specifying if all the signal to noise ratios per spin should match the comparison operator, of if just a single comparison match is enough. 1091 @type all_sn: bool 1092 @keyword verbose: A flag which if True will print additional information out. 1093 @type verbose: bool 1094 """ 1095 1096 # Forward function. 1097 sn_ratio_deselection(ratio=ratio, operation=operation, all_sn=all_sn, select=True, verbose=verbose)

1098

Source Code for Module pipe_control.spectrum