pipe_control.spectrum

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2014 Edward d'Auvergne # 4 # Copyright (C) 2008 Sebastien Morin # 5 # Copyright (C) 2013 Troels E. Linnet # 6 # # 7 # This file is part of the program relax (http://www.nmr-relax.com). # 8 # # 9 # This program is free software: you can redistribute it and/or modify # 10 # it under the terms of the GNU General Public License as published by # 11 # the Free Software Foundation, either version 3 of the License, or # 12 # (at your option) any later version. # 13 # # 14 # This program is distributed in the hope that it will be useful, # 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 # GNU General Public License for more details. # 18 # # 19 # You should have received a copy of the GNU General Public License # 20 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 21 # # 22 ############################################################################### 23 24 # Module docstring. 25 """Module containing functions for the handling of peak intensities.""" 26 27 28 # Python module imports. 29 from math import sqrt 30 import sys 31 from warnings import warn 32 33 # relax module imports. 34 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError 35 from lib.io import write_data 36 from lib.spectrum.peak_list import read_peak_list 37 from lib.statistics import std 38 from lib.warnings import RelaxWarning, RelaxNoSpinWarning 39 from pipe_control import pipes 40 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, return_spin, spin_loop 41 42

43 -def __errors_height_no_repl():

44 """Calculate the errors for peak heights when no spectra are replicated.""" 45 46 # Loop over the spins and set the error to the RMSD of the base plane noise. 47 for spin, spin_id in spin_loop(return_id=True): 48 # Skip deselected spins. 49 if not spin.select: 50 continue 51 52 # Skip spins missing intensity data. 53 if not hasattr(spin, 'intensities'): 54 continue 55 56 # Test if the RMSD has been set. 57 if not hasattr(spin, 'baseplane_rmsd'): 58 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 59 60 # Set the error to the RMSD. 61 spin.intensity_err = spin.baseplane_rmsd

62 63

64 -def __errors_repl(subset=None, verbosity=0):

65 """Calculate the errors for peak intensities from replicated spectra. 66 67 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 68 @type subset: list of str 69 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 70 @type verbosity: int 71 """ 72 73 # Replicated spectra. 74 repl = replicated_flags() 75 76 # Are all spectra replicated? 77 if False in repl.values(): 78 all_repl = False 79 print("All spectra replicated: No.") 80 else: 81 all_repl = True 82 print("All spectra replicated: Yes.") 83 84 # Initialise. 85 if not hasattr(cdp, 'sigma_I'): 86 cdp.sigma_I = {} 87 if not hasattr(cdp, 'var_I'): 88 cdp.var_I = {} 89 90 # The subset. 91 subset_flag = False 92 if not subset: 93 subset_flag = True 94 subset = cdp.spectrum_ids 95 96 # Loop over the spectra. 97 for id in subset: 98 # Skip non-replicated spectra. 99 if not repl[id]: 100 continue 101 102 # Skip replicated spectra which already have been used. 103 if id in cdp.var_I and cdp.var_I[id] != 0.0: 104 continue 105 106 # The replicated spectra. 107 for j in range(len(cdp.replicates)): 108 if id in cdp.replicates[j]: 109 spectra = cdp.replicates[j] 110 111 # Number of spectra. 112 num_spectra = len(spectra) 113 114 # Printout. 115 print("\nReplicated spectra: " + repr(spectra)) 116 if verbosity: 117 print("%-20s%-20s" % ("Spin_ID", "SD")) 118 119 # Calculate the mean value. 120 count = 0 121 for spin, spin_id in spin_loop(return_id=True): 122 # Skip deselected spins. 123 if not spin.select: 124 continue 125 126 # Skip and deselect spins which have no data. 127 if not hasattr(spin, 'intensities'): 128 spin.select = False 129 continue 130 131 # Missing data. 132 missing = False 133 for j in range(num_spectra): 134 if not spectra[j] in spin.intensities: 135 missing = True 136 if missing: 137 continue 138 139 # The peak intensities. 140 values = [] 141 for j in range(num_spectra): 142 values.append(spin.intensities[spectra[j]]) 143 144 # The standard deviation. 145 sd = std(values=values, dof=1) 146 147 # Printout. 148 if verbosity: 149 print("%-20s%-20s" % (spin_id, sd)) 150 151 # Sum of variances (for average). 152 if not id in cdp.var_I: 153 cdp.var_I[id] = 0.0 154 cdp.var_I[id] = cdp.var_I[id] + sd**2 155 count = count + 1 156 157 # No data catch. 158 if not count: 159 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") 160 161 # Average variance. 162 cdp.var_I[id] = cdp.var_I[id] / float(count) 163 164 # Set all spectra variances. 165 for j in range(num_spectra): 166 cdp.var_I[spectra[j]] = cdp.var_I[id] 167 168 # Print out. 169 print("Standard deviation: %s" % sqrt(cdp.var_I[id])) 170 171 172 # Average across all spectra if there are time points with a single spectrum. 173 if not all_repl: 174 # Print out. 175 if subset_flag: 176 print("\nVariance averaging over the spectra subset.") 177 else: 178 print("\nVariance averaging over all spectra.") 179 180 # Initialise. 181 var_I = 0.0 182 num_dups = 0 183 184 # Loop over all time points. 185 for id in cdp.var_I.keys(): 186 # Single spectrum (or extraordinarily accurate NMR spectra!). 187 if cdp.var_I[id] == 0.0: 188 continue 189 190 # Sum and count. 191 var_I = var_I + cdp.var_I[id] 192 num_dups = num_dups + 1 193 194 # Average value. 195 var_I = var_I / float(num_dups) 196 197 # Assign the average value to all time points. 198 for id in subset: 199 cdp.var_I[id] = var_I 200 201 # Print out. 202 print("Standard deviation for all spins: " + repr(sqrt(var_I))) 203 204 # Loop over the spectra. 205 for id in cdp.var_I.keys(): 206 # Create the standard deviation data structure. 207 cdp.sigma_I[id] = sqrt(cdp.var_I[id]) 208 209 # Set the spin specific errors. 210 for spin in spin_loop(): 211 # Skip deselected spins. 212 if not spin.select: 213 continue 214 215 # Set the error. 216 spin.intensity_err = cdp.sigma_I

217 218

219 -def __errors_volume_no_repl(subset=None):

220 """Calculate the errors for peak volumes when no spectra are replicated.""" 221 222 # Loop over the spins and set the error to the RMSD of the base plane noise. 223 for spin, spin_id in spin_loop(return_id=True): 224 # Skip deselected spins. 225 if not spin.select: 226 continue 227 228 # Skip spins missing intensity data. 229 if not hasattr(spin, 'intensities'): 230 continue 231 232 # Test if the RMSD has been set. 233 if not hasattr(spin, 'baseplane_rmsd'): 234 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 235 236 # Test that the total number of points have been set. 237 if not hasattr(spin, 'N'): 238 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id) 239 240 # Set the error to the RMSD multiplied by the square root of the total number of points. 241 for key in spin.intensity.keys(): 242 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)

243 244

245 -def add_spectrum_id(spectrum_id=None):

246 """Add the spectrum ID to the data store. 247 248 @keyword spectrum_id: The spectrum ID string. 249 @type spectrum_id: str 250 """ 251 252 # Initialise the structure, if needed. 253 if not hasattr(cdp, 'spectrum_ids'): 254 cdp.spectrum_ids = [] 255 256 # The ID already exists. 257 if spectrum_id in cdp.spectrum_ids: 258 return 259 260 # Add the ID. 261 cdp.spectrum_ids.append(spectrum_id)

262 263

264 -def baseplane_rmsd(error=0.0, spectrum_id=None, spin_id=None):

265 """Set the peak intensity errors, as defined as the baseplane RMSD. 266 267 @param error: The peak intensity error value defined as the RMSD of the base plane 268 noise. 269 @type error: float 270 @keyword spectrum_id: The spectrum id. 271 @type spectrum_id: str 272 @param spin_id: The spin identification string. 273 @type spin_id: str 274 """ 275 276 # Data checks. 277 pipes.test() 278 check_mol_res_spin_data() 279 check_spectrum_id(spectrum_id) 280 281 # The scaling by NC_proc. 282 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 283 scale = 1.0 / 2**cdp.ncproc[spectrum_id] 284 else: 285 scale = 1.0 286 287 # Loop over the spins. 288 for spin in spin_loop(spin_id): 289 # Skip deselected spins. 290 if not spin.select: 291 continue 292 293 # Initialise or update the baseplane_rmsd data structure as necessary. 294 if not hasattr(spin, 'baseplane_rmsd'): 295 spin.baseplane_rmsd = {} 296 297 # Set the error. 298 spin.baseplane_rmsd[spectrum_id] = float(error) * scale

299 300

301 -def check_spectrum_id(id):

302 """Check that the give spectrum ID exists. 303 304 @param id: The spectrum ID to check for. 305 @type id: str 306 @raises RelaxNoSpectraError: If the ID does not exist. 307 """ 308 309 # Check that the spectrum ID structure exists. 310 if not hasattr(cdp, 'spectrum_ids'): 311 raise RelaxNoSpectraError(id) 312 313 # Test if the spectrum ID exists. 314 if id not in cdp.spectrum_ids: 315 raise RelaxNoSpectraError(id)

316 317

318 -def delete(spectrum_id=None):

319 """Delete spectral data corresponding to the spectrum ID. 320 321 @keyword spectrum_id: The spectrum ID string. 322 @type spectrum_id: str 323 """ 324 325 # Data checks. 326 pipes.test() 327 check_mol_res_spin_data() 328 check_spectrum_id(spectrum_id) 329 330 # Remove the ID. 331 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id)) 332 333 # The ncproc parameter. 334 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 335 del cdp.ncproc[spectrum_id] 336 337 # Replicates. 338 if hasattr(cdp, 'replicates'): 339 # Loop over the replicates. 340 for i in range(len(cdp.replicates)): 341 # The spectrum is replicated. 342 if spectrum_id in cdp.replicates[i]: 343 # Duplicate. 344 if len(cdp.replicates[i]) == 2: 345 cdp.replicates.pop(i) 346 347 # More than two spectra: 348 else: 349 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id)) 350 351 # No need to check further. 352 break 353 354 # Errors. 355 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I: 356 del cdp.sigma_I[spectrum_id] 357 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I: 358 del cdp.var_I[spectrum_id] 359 360 # Loop over the spins. 361 for spin in spin_loop(): 362 # Intensity data. 363 if hasattr(spin, 'intensities') and spectrum_id in spin.intensities: 364 del spin.intensities[spectrum_id]

365 366

367 -def error_analysis(subset=None):

368 """Determine the peak intensity standard deviation. 369 370 @keyword subset: The list of spectrum ID strings to restrict the analysis to. 371 @type subset: list of str 372 """ 373 374 # Tests. 375 pipes.test() 376 check_mol_res_spin_data() 377 378 # Test if spectra have been loaded. 379 if not hasattr(cdp, 'spectrum_ids'): 380 raise RelaxError("Error analysis is not possible, no spectra have been loaded.") 381 382 # Check the IDs. 383 if subset: 384 for id in subset: 385 if id not in cdp.spectrum_ids: 386 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id) 387 388 # Peak height category. 389 if cdp.int_method == 'height': 390 # Print out. 391 print("Intensity measure: Peak heights.") 392 393 # Do we have replicated spectra? 394 if hasattr(cdp, 'replicates'): 395 # Print out. 396 print("Replicated spectra: Yes.") 397 398 # Set the errors. 399 __errors_repl(subset=subset) 400 401 # No replicated spectra. 402 else: 403 # Print out. 404 print("Replicated spectra: No.") 405 if subset: 406 print("Spectra ID subset ignored.") 407 408 # Set the errors. 409 __errors_height_no_repl() 410 411 # Peak volume category. 412 if cdp.int_method == 'point sum': 413 # Print out. 414 print("Intensity measure: Peak volumes.") 415 416 # Do we have replicated spectra? 417 if hasattr(cdp, 'replicates'): 418 # Print out. 419 print("Replicated spectra: Yes.") 420 421 # Set the errors. 422 __errors_repl(subset=subset) 423 424 # No replicated spectra. 425 else: 426 # Print out. 427 print("Replicated spectra: No.") 428 429 # No implemented. 430 raise RelaxImplementError 431 432 # Set the errors. 433 __errors_vol_no_repl()

434 435

436 -def get_ids():

437 """Return a list of all spectrum IDs. 438 439 @return: The list of spectrum IDs. 440 @rtype: list of str 441 """ 442 443 # No IDs. 444 if not hasattr(cdp, 'spectrum_ids'): 445 return [] 446 447 # Return the IDs. 448 return cdp.spectrum_ids

449 450

451 -def integration_points(N=0, spectrum_id=None, spin_id=None):

452 """Set the number of integration points for the given spectrum. 453 454 @param N: The number of integration points. 455 @type N: int 456 @keyword spectrum_id: The spectrum ID string. 457 @type spectrum_id: str 458 @keyword spin_id: The spin ID string used to restrict the value to. 459 @type spin_id: None or str 460 """ 461 462 raise RelaxImplementError

463 464

465 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):

466 """Read the peak intensity data. 467 468 @keyword file: The name of the file(s) containing the peak intensities. 469 @type file: str or list of str 470 @keyword dir: The directory where the file is located. 471 @type dir: str 472 @keyword spectrum_id: The spectrum identification string. 473 @type spectrum_id: str or list of str 474 @keyword dim: The dimension of the peak list to associate the data with. 475 @type dim: int 476 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). 477 @type int_col: int or list of int 478 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. 479 @type int_method: str 480 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 481 @type spin_id_col: int or None 482 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 483 @type mol_name_col: int or None 484 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 485 @type res_name_col: int or None 486 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 487 @type res_num_col: int or None 488 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 489 @type spin_name_col: int or None 490 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 491 @type spin_num_col: int or None 492 @keyword sep: The column separator which, if None, defaults to whitespace. 493 @type sep: str or None 494 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 495 @type spin_id: None or str 496 @keyword ncproc: The Bruker ncproc binary intensity scaling factor. 497 @type ncproc: int or None 498 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 499 @type verbose: bool 500 """ 501 502 # Data checks. 503 pipes.test() 504 check_mol_res_spin_data() 505 506 # Check the file name. 507 if file == None: 508 raise RelaxError("The file name must be supplied.") 509 510 # Test that the intensity measures are identical. 511 if hasattr(cdp, 'int_method') and cdp.int_method != int_method: 512 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) 513 514 # Multiple ID flags. 515 flag_multi = False 516 flag_multi_file = False 517 flag_multi_col = False 518 if isinstance(spectrum_id, list) or spectrum_id == 'auto': 519 flag_multi = True 520 if isinstance(file, list): 521 flag_multi_file = True 522 if isinstance(int_col, list) or spectrum_id == 'auto': 523 flag_multi_col = True 524 525 # List argument checks. 526 if flag_multi: 527 # Too many lists. 528 if flag_multi_file and flag_multi_col: 529 raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.") 530 531 # Not enough lists. 532 if not flag_multi_file and not flag_multi_col: 533 raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.") 534 535 # List lengths for multiple files. 536 if flag_multi_file and len(spectrum_id) != len(file): 537 raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id)) 538 539 # List lengths for multiple intensity columns. 540 if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col): 541 raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col)) 542 543 # More list argument checks (when only one spectrum ID is supplied). 544 else: 545 # Multiple files. 546 if flag_multi_file: 547 raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.") 548 549 # Multiple intensity columns. 550 if flag_multi_col: 551 raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.") 552 553 # Intensity column checks. 554 if spectrum_id != 'auto' and not flag_multi and flag_multi_col: 555 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.") 556 557 # Check the intensity measure. 558 if not int_method in ['height', 'point sum', 'other']: 559 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) 560 561 # Set the peak intensity measure. 562 cdp.int_method = int_method 563 564 # Convert the file argument to a list if necessary. 565 if not isinstance(file, list): 566 file = [file] 567 568 # Loop over all files. 569 for file_index in range(len(file)): 570 # Read the peak list data. 571 peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 572 573 # Automatic spectrum IDs. 574 if spectrum_id == 'auto': 575 spectrum_id = peak_list[0].intensity_name 576 577 # Loop over the assignments. 578 data = [] 579 data_flag = False 580 for assign in peak_list: 581 # Generate the spin_id. 582 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1]) 583 584 # Convert the intensity data to a list if needed. 585 intensity = assign.intensity 586 if not isinstance(intensity, list): 587 intensity = [intensity] 588 589 # Loop over the intensity data. 590 for int_index in range(len(intensity)): 591 # Sanity check. 592 if intensity[int_index] == 0.0: 593 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id)) 594 595 # Get the spin container. 596 spin = return_spin(spin_id) 597 if not spin: 598 warn(RelaxNoSpinWarning(spin_id)) 599 continue 600 601 # Skip deselected spins. 602 if not spin.select: 603 continue 604 605 # Initialise. 606 if not hasattr(spin, 'intensities'): 607 spin.intensities = {} 608 609 # Intensity scaling. 610 if ncproc != None: 611 intensity[int_index] = intensity[int_index] / float(2**ncproc) 612 613 # Add the data. 614 if flag_multi_file: 615 id = spectrum_id[file_index] 616 elif flag_multi_col: 617 id = spectrum_id[int_index] 618 else: 619 id = spectrum_id 620 spin.intensities[id] = intensity[int_index] 621 622 # Switch the flag. 623 data_flag = True 624 625 # Append the data for printing out. 626 data.append([spin_id, repr(intensity[int_index])]) 627 628 # Add the spectrum id (and ncproc) to the relax data store. 629 spectrum_ids = spectrum_id 630 if isinstance(spectrum_id, str): 631 spectrum_ids = [spectrum_id] 632 if ncproc != None and not hasattr(cdp, 'ncproc'): 633 cdp.ncproc = {} 634 for i in range(len(spectrum_ids)): 635 add_spectrum_id(spectrum_ids[i]) 636 if ncproc != None: 637 cdp.ncproc[spectrum_ids[i]] = ncproc 638 639 # No data. 640 if not data_flag: 641 # Delete all the data. 642 delete(spectrum_id) 643 644 # Raise the error. 645 raise RelaxError("No data could be loaded from the peak list") 646 647 # Printout. 648 if verbose: 649 print("\nThe following intensities have been loaded into the relax data store:\n") 650 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data) 651 print('')

652 653

654 -def read_spins(file=None, dir=None, dim=1, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):

655 """Read the peak intensity data. 656 657 @keyword file: The name of the file containing the peak intensities. 658 @type file: str 659 @keyword dir: The directory where the file is located. 660 @type dir: str 661 @keyword dim: The dimension of the peak list to associate the data with. 662 @type dim: int 663 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 664 @type spin_id_col: int or None 665 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 666 @type mol_name_col: int or None 667 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 668 @type res_name_col: int or None 669 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 670 @type res_num_col: int or None 671 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 672 @type spin_name_col: int or None 673 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 674 @type spin_num_col: int or None 675 @keyword sep: The column separator which, if None, defaults to whitespace. 676 @type sep: str or None 677 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai. 678 @type spin_id: None or str 679 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 680 @type verbose: bool 681 """ 682 683 # Data checks. 684 pipes.test() 685 686 # Check the file name. 687 if file == None: 688 raise RelaxError("The file name must be supplied.") 689 690 # Read the peak list data. 691 peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) 692 693 # Loop over the peak_list. 694 created_spins = [] 695 for assign in peak_list: 696 mol_name = assign.mol_names[dim-1] 697 res_num = assign.res_nums[dim-1] 698 res_name = assign.res_names[dim-1] 699 spin_num = assign.spin_nums[dim-1] 700 spin_name = assign.spin_names[dim-1] 701 702 # Generate the spin_id. 703 spin_id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_name=spin_name) 704 705 # Check if the spin already exist. 706 if return_spin(spin_id=spin_id) == None: 707 # Create the spin if not exist. 708 create_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol_name=mol_name) 709 710 # Test that data exists. 711 check_mol_res_spin_data()

712

713 -def replicated(spectrum_ids=None):

714 """Set which spectra are replicates. 715 716 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra. 717 @type spectrum_ids: list of str 718 """ 719 720 # Test if the current pipe exists. 721 pipes.test() 722 723 # Test for None. 724 if spectrum_ids == None: 725 warn(RelaxWarning("The spectrum ID list cannot be None.")) 726 return 727 728 # Test if spectra have been loaded. 729 if not hasattr(cdp, 'spectrum_ids'): 730 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.") 731 732 # Test the spectrum id strings. 733 for spectrum_id in spectrum_ids: 734 check_spectrum_id(spectrum_id) 735 736 # Test for more than one element. 737 if len(spectrum_ids) == 1: 738 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids)) 739 return 740 741 # Initialise. 742 if not hasattr(cdp, 'replicates'): 743 cdp.replicates = [] 744 745 # Check if the spectrum IDs are already in the list. 746 found = False 747 for i in range(len(cdp.replicates)): 748 # Loop over all elements of the first. 749 for j in range(len(spectrum_ids)): 750 if spectrum_ids[j] in cdp.replicates[i]: 751 found = True 752 753 # One of the spectrum IDs already have a replicate specified. 754 if found: 755 # Add the remaining replicates to the list and quit this function. 756 for j in range(len(spectrum_ids)): 757 if spectrum_ids[j] not in cdp.replicates[i]: 758 cdp.replicates[i].append(spectrum_ids[j]) 759 760 # Nothing more to do. 761 return 762 763 # A new set of replicates. 764 cdp.replicates.append(spectrum_ids)

765 766

767 -def replicated_flags():

768 """Create and return a dictionary of flags of whether the spectrum is replicated or not. 769 770 @return: The dictionary of flags of whether the spectrum is replicated or not. 771 @rtype: dict of bool 772 """ 773 774 # Initialise all IDs to false. 775 repl = {} 776 for id in cdp.spectrum_ids: 777 repl[id] = False 778 779 # Loop over the replicates. 780 for i in range(len(cdp.replicates)): 781 for j in range(len(cdp.replicates[i])): 782 repl[cdp.replicates[i][j]] = True 783 784 # Return the dictionary. 785 return repl

786 787

788 -def replicated_ids(spectrum_id):

789 """Create and return a list of spectra ID which are replicates of the given ID. 790 791 @param spectrum_id: The spectrum ID to find all the replicates of. 792 @type spectrum_id: str 793 @return: The list of spectrum IDs which are replicates of spectrum_id. 794 @rtype: list of str 795 """ 796 797 # Initialise the ID list. 798 repl = [] 799 800 # Loop over the replicate lists. 801 for i in range(len(cdp.replicates)): 802 # The spectrum ID is in the list. 803 if spectrum_id in cdp.replicates[i]: 804 # Loop over the inner list. 805 for j in range(len(cdp.replicates[i])): 806 # Spectrum ID match. 807 if spectrum_id == cdp.replicates[i][j]: 808 continue 809 810 # Append the replicated ID. 811 repl.append(cdp.replicates[i][j]) 812 813 # Nothing. 814 if repl == []: 815 return repl 816 817 # Sort the list. 818 repl.sort() 819 820 # Remove duplicates (backward). 821 id = repl[-1] 822 for i in range(len(repl)-2, -1, -1): 823 # Duplicate. 824 if id == repl[i]: 825 del repl[i] 826 827 # Unique. 828 else: 829 id = repl[i] 830 831 # Return the list. 832 return repl

833

Source Code for Module pipe_control.spectrum