generic_fns.spectrum

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2012 Edward d'Auvergne # 4 # Copyright (C) 2008 Sebastien Morin # 5 # # 6 # This file is part of the program relax. # 7 # # 8 # relax is free software; you can redistribute it and/or modify # 9 # it under the terms of the GNU General Public License as published by # 10 # the Free Software Foundation; either version 2 of the License, or # 11 # (at your option) any later version. # 12 # # 13 # relax is distributed in the hope that it will be useful, # 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 # GNU General Public License for more details. # 17 # # 18 # You should have received a copy of the GNU General Public License # 19 # along with relax; if not, write to the Free Software # 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 21 # # 22 ############################################################################### 23 24 # Module docstring. 25 """Module containing functions for the handling of peak intensities.""" 26 27 28 # Python module imports. 29 from math import sqrt 30 from re import split 31 import string 32 from warnings import warn 33 34 # relax module imports. 35 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, generate_spin_id_data_array, return_spin, spin_loop 36 from generic_fns import pipes 37 from relax_errors import RelaxArgNotNoneError, RelaxError, RelaxImplementError, RelaxNoSequenceError, RelaxNoSpectraError 38 from relax_io import extract_data, read_spin_data, strip 39 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 40 41 42

43 -class Bruker_import:

44 - def __init__(self, dir=None, exp_type=None, file=None, UI='prompt', output_file=None):

45 """Function to import Bruker Protein Dynamic Center (PDC) files. 46 47 @param dir: The directory to save the new file in. 48 @type dir: str 49 @param file: The Bruker PDC output file. 50 @type file: str 51 @param exp_type: The type of experiment, e.g. NOE, T1 or T2 52 @type exp_type: str 53 @param UI: The relax user interface (either 'prompt' or 'GUI'). 54 @type UI: str 55 @param output_file: The file to save the imported list. 56 @type output_file: str 57 """ 58 59 # Create experiment type 60 self.exp_type = exp_type 61 62 # The output file 63 self.output_file = output_file 64 self.dir = dir 65 66 # The user interface. 67 self.UI = UI 68 69 # Read the file. 70 if file: 71 self.read_file(file) 72 else: 73 raise RelaxError('No file selected.') 74 75 # Collect the entries for the file. 76 self.collect_entries() 77 78 # Create dummy file. 79 self.create_file()

80 81

82 - def __str__(self):

83 """Function to allow to return a value.""" 84 return str(self.value)

85 86

87 - def collect_entries(self):

88 """Function to collect entries for the NOE/R1/R2 relax dummy file.""" 89 90 # storage for file entries 91 # [ [Mol_name, Res_num, Res_name, Spin_num, value, error], ... ] 92 self.file_entries = [] 93 94 # Data flag 95 is_data = False 96 97 # Spin name 98 spinname = 'N' 99 100 # loop over line 101 for line in range(0, len(self.entries)): 102 # Detect the experiment 103 if 'Project:' in self.entries[line][0]: 104 exp_type = '' 105 106 # NOE 107 if 'Dynamic method/Hetero NOE' in self.entries[line][1]: 108 exp_type = 'NOE' 109 # T1 110 elif 'Dynamic method/T1' in self.entries[line][1]: 111 exp_type = 'T1' 112 # T2 113 elif 'Dynamic method/T2' in self.entries[line][1]: 114 exp_type = 'T2' 115 116 # Check agreement of file and user input. 117 if self.exp_type: 118 if not self.exp_type == exp_type: 119 raise RelaxError('Selected file is not a '+self.exp_type+'-file.') 120 return 121 122 # Feedback 123 print("Reading BRUKER PDC "+exp_type+" file.\n") 124 125 # The entries 126 if 'SECTION:' in self.entries[line][0]: 127 # NOE/T1/T2 results 128 if 'results' in self.entries[line][1]: 129 is_data = True 130 continue 131 132 # Other entries 133 else: 134 is_data = False 135 136 # Spin name 137 if 'Labelling:' in self.entries[line][0]: 138 # 15N 139 if 'N' in self.entries[line][0]: 140 spinname = 'N' 141 142 # 13C 143 if 'C' in self.entries[line][0]: 144 spinname = 'C' 145 146 # Collect NOE/T1/T2 values 147 if is_data: 148 # Exclued header or blank line. 149 if self.entries[line][0] in ['Peak name', '']: 150 continue 151 152 # Label 153 label_tmp = self.entries[line][0] 154 label_tmp = label_tmp.replace(' ', '') 155 label_tmp = label_tmp.replace('[', '') 156 label_tmp = label_tmp.replace(']', '') 157 158 # Detect residue number 159 resnum = label_tmp 160 # find start of number 161 start = 0 162 while resnum[start].isdigit()==False: start = start+1 163 164 # find end of number 165 end = start 166 try: 167 while resnum[end].isdigit()==True: end = end+1 168 # Label ends with number 169 except: 170 end = end 171 172 # cut out residue number integer 173 resnum = resnum[start:end] 174 175 # Residue name 176 resname = label_tmp[0:start] 177 178 # Spin num 179 spin_no = line 180 181 # The value 182 value_tmp = float(self.entries[line][3]) 183 184 # Convert T1/T2 to R1/R2 185 if exp_type in ['T1', 'T2']: 186 value_tmp = 1.0/value_tmp 187 188 # error 189 error = float(self.entries[line][4]) 190 191 # Store file entry 192 self.file_entries.append(['Bruker_PDC_'+exp_type, resnum, resname, spin_no, spinname, value_tmp, error])

193 194

195 - def create_file(self):

196 """Function to write the file.""" 197 198 # The Sparky header 199 text = 'Mol_name\tRes_num\tRes_name\tSpin_num\tSpin_name\tValue\tError \n' 200 201 for line in range(0, len(self.file_entries)): 202 # Build entries. 203 tmp_text = '' 204 for i in range(0, len(self.file_entries[line])): 205 tmp_text = tmp_text + str(self.file_entries[line][i])+'\t' 206 207 # Add entries. 208 text = text+tmp_text + '\n' 209 210 # Print entries 211 if not self.UI == 'GUI': 212 print(text) 213 214 # craete output file 215 if self.output_file: 216 if self.dir: 217 file = open(self.dir+sep+self.output_file, 'w') 218 else: 219 file = open(self.output_file, 'w') 220 # create dummy file 221 else: 222 file = DummyFileObject() 223 224 # Write the file 225 file.write(text) 226 file.close() 227 228 # Feedback 229 if self.output_file: 230 if self.dir: 231 print('Created BRUKER PDC file '+self.dir+sep+self.output_file) 232 # The return value 233 self.value = self.dir+sep+self.output_file 234 else: 235 print('Created BRUKER PDC file '+self.output_file) 236 # The return value 237 self.value = self.output_file 238 else: 239 print('Created BRUKER PDC file.') 240 # Return the dummy file 241 self.value = file

242 243

244 - def read_file(self, filename):

245 """Function to read the file.""" 246 247 # Open the file. 248 file = open(filename, 'r') 249 250 # Storage of lines. 251 self.entries = [] 252 253 # Loop over line in PDC file 254 for line in file: 255 # Read entries in line 256 entry = line 257 entry = line.strip() 258 entry = entry.split('\t') 259 260 # Add entries to storage 261 self.entries.append(entry) 262 263 # close the file 264 file.close()

265 266

267 -def __errors_height_no_repl():

268 """Calculate the errors for peak heights when no spectra are replicated.""" 269 270 # Loop over the spins and set the error to the RMSD of the base plane noise. 271 for spin, spin_id in spin_loop(return_id=True): 272 # Skip deselected spins. 273 if not spin.select: 274 continue 275 276 # Skip spins missing intensity data. 277 if not hasattr(spin, 'intensities'): 278 continue 279 280 # Test if the RMSD has been set. 281 if not hasattr(spin, 'baseplane_rmsd'): 282 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 283 284 # Set the error to the RMSD. 285 spin.intensity_err = spin.baseplane_rmsd

286 287

288 -def __errors_repl(verbosity=0):

289 """Calculate the errors for peak intensities from replicated spectra. 290 291 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 292 @type verbosity: int 293 """ 294 295 # Replicated spectra. 296 repl = replicated_flags() 297 298 # Are all spectra replicated? 299 if False in repl.values(): 300 all_repl = False 301 print("All spectra replicated: No.") 302 else: 303 all_repl = True 304 print("All spectra replicated: Yes.") 305 306 # Initialise. 307 cdp.sigma_I = {} 308 cdp.var_I = {} 309 310 # Loop over the spectra. 311 for id in cdp.spectrum_ids: 312 # Skip non-replicated spectra. 313 if not repl[id]: 314 continue 315 316 # Skip replicated spectra which already have been used. 317 if cdp.var_I.has_key(id) and cdp.var_I[id] != 0.0: 318 continue 319 320 # The replicated spectra. 321 for j in xrange(len(cdp.replicates)): 322 if id in cdp.replicates[j]: 323 spectra = cdp.replicates[j] 324 325 # Number of spectra. 326 num_spectra = len(spectra) 327 328 # Print out. 329 print(("\nReplicated spectra: " + repr(spectra))) 330 if verbosity: 331 print(("%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD"))) 332 333 # Calculate the mean value. 334 count = 0 335 for spin in spin_loop(): 336 # Skip deselected spins. 337 if not spin.select: 338 continue 339 340 # Skip and deselect spins which have no data. 341 if not hasattr(spin, 'intensities'): 342 spin.select = False 343 continue 344 345 # Missing data. 346 missing = False 347 for j in xrange(num_spectra): 348 if not spin.intensities.has_key(spectra[j]): 349 missing = True 350 if missing: 351 continue 352 353 # Average intensity. 354 ave_intensity = 0.0 355 for j in xrange(num_spectra): 356 ave_intensity = ave_intensity + spin.intensities[spectra[j]] 357 ave_intensity = ave_intensity / num_spectra 358 359 # Sum of squared errors. 360 SSE = 0.0 361 for j in xrange(num_spectra): 362 SSE = SSE + (spin.intensities[spectra[j]] - ave_intensity) ** 2 363 364 # Variance. 365 # 366 # 1 367 # sigma^2 = ----- * sum({Xi - Xav}^2)] 368 # n - 1 369 # 370 var_I = 1.0 / (num_spectra - 1.0) * SSE 371 372 # Print out. 373 if verbosity: 374 print(("%-5i%-6s%-20s%-20s" % (spin.num, spin.name, repr(ave_intensity), repr(var_I)))) 375 376 # Sum of variances (for average). 377 if not cdp.var_I.has_key(id): 378 cdp.var_I[id] = 0.0 379 cdp.var_I[id] = cdp.var_I[id] + var_I 380 count = count + 1 381 382 # No data catch. 383 if not count: 384 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") 385 386 # Average variance. 387 cdp.var_I[id] = cdp.var_I[id] / float(count) 388 389 # Set all spectra variances. 390 for j in range(num_spectra): 391 cdp.var_I[spectra[j]] = cdp.var_I[id] 392 393 # Print out. 394 print(("Standard deviation: %s" % sqrt(cdp.var_I[id]))) 395 396 397 # Average across all spectra if there are time points with a single spectrum. 398 if not all_repl: 399 # Print out. 400 print("\nVariance averaging over all spectra.") 401 402 # Initialise. 403 var_I = 0.0 404 num_dups = 0 405 406 # Loop over all time points. 407 for id in cdp.var_I.keys(): 408 # Single spectrum (or extraordinarily accurate NMR spectra!). 409 if cdp.var_I[id] == 0.0: 410 continue 411 412 # Sum and count. 413 var_I = var_I + cdp.var_I[id] 414 num_dups = num_dups + 1 415 416 # Average value. 417 var_I = var_I / float(num_dups) 418 419 # Assign the average value to all time points. 420 for id in cdp.spectrum_ids: 421 cdp.var_I[id] = var_I 422 423 # Print out. 424 print(("Standard deviation for all spins: " + repr(sqrt(var_I)))) 425 426 # Loop over the spectra. 427 for id in cdp.var_I.keys(): 428 # Create the standard deviation data structure. 429 cdp.sigma_I[id] = sqrt(cdp.var_I[id]) 430 431 # Set the spin specific errors. 432 for spin in spin_loop(): 433 # Skip deselected spins. 434 if not spin.select: 435 continue 436 437 # Set the error. 438 spin.intensity_err = cdp.sigma_I

439 440

441 -def __errors_volume_no_repl():

442 """Calculate the errors for peak volumes when no spectra are replicated.""" 443 444 # Loop over the spins and set the error to the RMSD of the base plane noise. 445 for spin, spin_id in spin_loop(return_id=True): 446 # Skip deselected spins. 447 if not spin.select: 448 continue 449 450 # Skip spins missing intensity data. 451 if not hasattr(spin, 'intensities'): 452 continue 453 454 # Test if the RMSD has been set. 455 if not hasattr(spin, 'baseplane_rmsd'): 456 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 457 458 # Test that the total number of points have been set. 459 if not hasattr(spin, 'N'): 460 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id) 461 462 # Set the error to the RMSD multiplied by the square root of the total number of points. 463 for key in spin.intensity.keys(): 464 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)

465 466

467 -def autodetect_format(file_data):

468 """Automatically detect the format of the peak list. 469 470 @param file_data: The processed results file data. 471 @type file_data: list of lists of str 472 """ 473 474 # The first header line. 475 for line in file_data: 476 if line != []: 477 break 478 479 # Sparky format. 480 if line[0] == 'Assignment': 481 return 'sparky' 482 483 # NMRView format. 484 if line == ['label', 'dataset', 'sw', 'sf']: 485 return 'nmrview' 486 487 # XEasy format. 488 if line == ['No.', 'Color', 'w1', 'w2', 'ass.', 'in', 'w1', 'ass.', 'in', 'w2', 'Volume', 'Vol.', 'Err.', 'Method', 'Comment']: 489 return 'xeasy' 490 491 # Assume a generic format. 492 return 'generic'

493 494

495 -def baseplane_rmsd(error=0.0, spectrum_id=None, spin_id=None):

496 """Set the peak intensity errors, as defined as the baseplane RMSD. 497 498 @param error: The peak intensity error value defined as the RMSD of the base plane 499 noise. 500 @type error: float 501 @keyword spectrum_id: The spectrum id. 502 @type spectrum_id: str 503 @param spin_id: The spin identification string. 504 @type spin_id: str 505 """ 506 507 # Test if the current pipe exists 508 pipes.test() 509 510 # Test if the sequence data is loaded. 511 if not exists_mol_res_spin_data(): 512 raise RelaxNoSequenceError 513 514 # Test the spectrum id string. 515 if spectrum_id not in cdp.spectrum_ids: 516 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id) 517 518 # The scaling by NC_proc. 519 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 520 scale = 1.0 / 2**cdp.ncproc[spectrum_id] 521 else: 522 scale = 1.0 523 524 # Loop over the spins. 525 for spin in spin_loop(spin_id): 526 # Skip deselected spins. 527 if not spin.select: 528 continue 529 530 # Initialise or update the baseplane_rmsd data structure as necessary. 531 if not hasattr(spin, 'baseplane_rmsd'): 532 spin.baseplane_rmsd = {} 533 534 # Set the error. 535 spin.baseplane_rmsd[spectrum_id] = float(error) * scale

536 537

538 -def delete(spectrum_id=None):

539 """Delete spectral data corresponding to the spectrum ID. 540 541 @keyword spectrum_id: The spectrum ID string. 542 @type spectrum_id: str 543 """ 544 545 # Test if the current pipe exists. 546 pipes.test() 547 548 # Test if the sequence data is loaded. 549 if not exists_mol_res_spin_data(): 550 raise RelaxNoSequenceError 551 552 # Test if data exists. 553 if not hasattr(cdp, 'spectrum_ids') or spectrum_id not in cdp.spectrum_ids: 554 raise RelaxNoSpectraError(spectrum_id) 555 556 # Remove the ID. 557 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id)) 558 559 # The ncproc parameter. 560 if hasattr(cdp, 'ncproc') and cdp.ncproc.has_key(spectrum_id): 561 del cdp.ncproc[spectrum_id] 562 563 # Replicates. 564 if hasattr(cdp, 'replicates'): 565 # Loop over the replicates. 566 for i in range(len(cdp.replicates)): 567 # The spectrum is replicated. 568 if spectrum_id in cdp.replicates[i]: 569 # Duplicate. 570 if len(cdp.replicates[i]) == 2: 571 cdp.replicates.pop(i) 572 573 # More than two spectra: 574 else: 575 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id)) 576 577 # No need to check further. 578 break 579 580 # Errors. 581 if hasattr(cdp, 'sigma_I') and cdp.sigma_I.has_key(spectrum_id): 582 del cdp.sigma_I[spectrum_id] 583 if hasattr(cdp, 'var_I') and cdp.var_I.has_key(spectrum_id): 584 del cdp.var_I[spectrum_id] 585 586 # Loop over the spins. 587 for spin in spin_loop(): 588 # Intensity data. 589 if hasattr(spin, 'intensities') and spin.intensities.has_key(spectrum_id): 590 del spin.intensities[spectrum_id]

591 592

593 -def error_analysis():

594 """Determine the peak intensity standard deviation.""" 595 596 # Test if the current pipe exists 597 pipes.test() 598 599 # Test if the sequence data is loaded. 600 if not exists_mol_res_spin_data(): 601 raise RelaxNoSequenceError 602 603 # Test if spectra have been loaded. 604 if not hasattr(cdp, 'spectrum_ids'): 605 raise RelaxError("Error analysis is not possible, no spectra have been loaded.") 606 607 # Peak height category. 608 if cdp.int_method == 'height': 609 # Print out. 610 print("Intensity measure: Peak heights.") 611 612 # Do we have replicated spectra? 613 if hasattr(cdp, 'replicates'): 614 # Print out. 615 print("Replicated spectra: Yes.") 616 617 # Set the errors. 618 __errors_repl() 619 620 # No replicated spectra. 621 else: 622 # Print out. 623 print("Replicated spectra: No.") 624 625 # Set the errors. 626 __errors_height_no_repl() 627 628 # Peak volume category. 629 if cdp.int_method == 'point sum': 630 # Print out. 631 print("Intensity measure: Peak volumes.") 632 633 # Do we have replicated spectra? 634 if hasattr(cdp, 'replicates'): 635 # Print out. 636 print("Replicated spectra: Yes.") 637 638 # Set the errors. 639 __errors_repl() 640 641 # No replicated spectra. 642 else: 643 # Print out. 644 print("Replicated spectra: No.") 645 646 # No implemented. 647 raise RelaxImplementError 648 649 # Set the errors. 650 __errors_vol_no_repl()

651 652

653 -def get_ids():

654 """Return a list of all spectrum IDs. 655 656 @return: The list of spectrum IDs. 657 @rtype: list of str 658 """ 659 660 # No IDs. 661 if not hasattr(cdp, 'spectrum_ids'): 662 return [] 663 664 # Return the IDs. 665 return cdp.spectrum_ids

666 667

668 -def integration_points(N=0, spectrum_id=None, spin_id=None):

669 """Set the number of integration points for the given spectrum. 670 671 @param N: The number of integration points. 672 @type N: int 673 @keyword spectrum_id: The spectrum ID string. 674 @type spectrum_id: str 675 @keyword spin_id: The spin ID string used to restrict the value to. 676 @type spin_id: None or str 677 """ 678 679 raise RelaxImplementError

680 681

682 -def intensity_generic(file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None):

683 """Return the process data from the generic peak intensity file. 684 685 The residue number, heteronucleus and proton names, and peak intensity will be returned. 686 687 688 @keyword file_data: The data extracted from the file converted into a list of lists. 689 @type file_data: list of lists of str 690 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity 691 file format). If supplied, the mol_name_col, res_name_col, res_num_col, 692 spin_name_col, and spin_num_col arguments must be none. 693 @type spin_id_col: int or None 694 @keyword mol_name_col: The column containing the molecule name information (used by the generic 695 intensity file format). If supplied, spin_id_col must be None. 696 @type mol_name_col: int or None 697 @keyword res_name_col: The column containing the residue name information (used by the generic 698 intensity file format). If supplied, spin_id_col must be None. 699 @type res_name_col: int or None 700 @keyword res_num_col: The column containing the residue number information (used by the 701 generic intensity file format). If supplied, spin_id_col must be None. 702 @type res_num_col: int or None 703 @keyword spin_name_col: The column containing the spin name information (used by the generic 704 intensity file format). If supplied, spin_id_col must be None. 705 @type spin_name_col: int or None 706 @keyword spin_num_col: The column containing the spin number information (used by the generic 707 intensity file format). If supplied, spin_id_col must be None. 708 @type spin_num_col: int or None 709 @keyword data_col: The column containing the peak intensities. 710 @type data_col: int 711 @keyword sep: The column separator which, if None, defaults to whitespace. 712 @type sep: str or None 713 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 714 spins. 715 @type spin_id: None or str 716 @raises RelaxError: When the expected peak intensity is not a float. 717 @return: The extracted data as a list of lists. The first dimension corresponds 718 to the spin. The second dimension consists of the proton name, 719 heteronucleus name, spin ID string, and the intensity value. 720 @rtype: list of lists of str, str, str, float 721 """ 722 723 # Strip the data. 724 file_data = strip(file_data) 725 726 # Loop over the data. 727 data = [] 728 for mol_name, res_num, res_name, spin_num, spin_name, value in read_spin_data(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, sep=sep, spin_id=spin_id): 729 id = generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 730 data.append([None, None, id, value, id]) 731 732 # Return the data. 733 return data

734 735

736 -def intensity_nmrview(file_data=None, int_col=None):

737 """Return the process data from the NMRView peak intensity file. 738 739 The residue number, heteronucleus and proton names, and peak intensity will be returned. 740 741 742 @keyword file_data: The data extracted from the file converted into a list of lists. 743 @type file_data: list of lists of str 744 @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value. 745 @type int_col: int 746 @raises RelaxError: When the expected peak intensity is not a float. 747 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text 748 @rtype: list of lists of str, str, str, float, str 749 """ 750 751 # Assume the NMRView file has six header lines! 752 num = 6 753 print(("Number of header lines: " + repr(num))) 754 755 # Remove the header. 756 file_data = file_data[num:] 757 758 # Strip the data. 759 file_data = strip(file_data) 760 761 # The peak intensity column. 762 if int_col == None: 763 int_col = 16 764 if int_col == 16: 765 print('Using peak heights.') 766 if int_col == 15: 767 print('Using peak volumes (or evolumes).') 768 769 # Loop over the file data. 770 data = [] 771 for line in file_data: 772 # Unknown assignment. 773 if line[1] == '{}': 774 warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1])) 775 continue 776 777 # The residue number 778 res_num = '' 779 try: 780 res_num = string.strip(line[1], '{') 781 res_num = string.strip(res_num, '}') 782 res_num = string.split(res_num, '.') 783 res_num = res_num[0] 784 except ValueError: 785 raise RelaxError("The peak list is invalid.") 786 787 # Nuclei names. 788 x_name = '' 789 if line[8]!='{}': 790 x_name = string.strip(line[8], '{') 791 x_name = string.strip(x_name, '}') 792 x_name = string.split(x_name, '.') 793 x_name = x_name[1] 794 h_name = '' 795 if line[1]!='{}': 796 h_name = string.strip(line[1], '{') 797 h_name = string.strip(h_name, '}') 798 h_name = string.split(h_name, '.') 799 h_name = h_name[1] 800 801 # Intensity. 802 try: 803 intensity = float(line[int_col]) 804 except ValueError: 805 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 806 807 # Generate the spin_id. 808 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name) 809 810 # Append the data. 811 data.append([h_name, x_name, spin_id, intensity, line]) 812 813 # Return the data. 814 return data

815 816

817 -def intensity_sparky(file_data=None, int_col=None):

818 """Return the process data from the Sparky peak intensity file. 819 820 The residue number, heteronucleus and proton names, and peak intensity will be returned. 821 822 823 @keyword file_data: The data extracted from the file converted into a list of lists. 824 @type file_data: list of lists of str 825 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). 826 @type int_col: int 827 @raises RelaxError: When the expected peak intensity is not a float. 828 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text. 829 @rtype: list of lists of str, str, str, float, str 830 """ 831 832 # The number of header lines. 833 num = 0 834 if file_data[0][0] == 'Assignment': 835 num = num + 1 836 if file_data[1] == '': 837 num = num + 1 838 print("Number of header lines found: %s" % num) 839 840 # Remove the header. 841 file_data = file_data[num:] 842 843 # Strip the data. 844 file_data = strip(file_data) 845 846 # Loop over the file data. 847 data = [] 848 for line in file_data: 849 # The Sparky assignment. 850 assignment = '' 851 res_num = '' 852 h_name = '' 853 x_name = '' 854 intensity = '' 855 856 # Skip non-assigned peaks. 857 if line[0] == '?-?': 858 continue 859 860 # First split by the 2D separator. 861 x_assign, h_assign = split('-', line[0]) 862 863 # The proton info. 864 h_name = split('([A-Z]+)', h_assign)[-2] 865 866 # The heteronucleus info. 867 x_row = split('([A-Z]+)', x_assign) 868 x_name = x_row[-2] 869 870 # The residue number. 871 try: 872 res_num = int(x_row[-3]) 873 except: 874 raise RelaxError("Improperly formatted Sparky file.") 875 876 # The peak intensity column. 877 if int_col == None: 878 int_col = 3 879 880 # Intensity. 881 try: 882 intensity = float(line[int_col]) 883 except ValueError: 884 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 885 886 # Generate the spin_id. 887 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name) 888 889 # Append the data. 890 data.append([h_name, x_name, spin_id, intensity, line]) 891 892 # Return the data. 893 return data

894 895

896 -def intensity_xeasy(file_data=None, heteronuc=None, proton=None, int_col=None):

897 """Return the process data from the XEasy peak intensity file. 898 899 The residue number, heteronucleus and proton names, and peak intensity will be returned. 900 901 902 @keyword file_data: The data extracted from the file converted into a list of lists. 903 @type file_data: list of lists of str 904 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file. 905 @type heteronuc: str 906 @keyword proton: The name of the proton as specified in the peak intensity file. 907 @type proton: str 908 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). 909 @type int_col: int 910 @raises RelaxError: When the expected peak intensity is not a float. 911 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text. 912 @rtype: list of lists of str, str, str, float, str 913 """ 914 915 # The columns. 916 w1_col = 4 917 w2_col = 7 918 if int_col == None: 919 int_col = 10 920 921 # Set the default proton dimension. 922 H_dim = 'w1' 923 924 # Determine the number of header lines. 925 num = 0 926 for line in file_data: 927 # Try to see if the intensity can be extracted. 928 try: 929 intensity = float(line[int_col]) 930 except ValueError: 931 num = num + 1 932 except IndexError: 933 num = num + 1 934 else: 935 break 936 print(("Number of header lines found: " + repr(num))) 937 938 # Remove the header. 939 file_data = file_data[num:] 940 941 # Strip the data. 942 file_data = strip(file_data) 943 944 # Determine the proton and heteronucleus dimensions. 945 for line in file_data: 946 # Proton in w1, heteronucleus in w2. 947 if line[w1_col] == proton and line[w2_col] == heteronuc: 948 # Set the proton dimension. 949 H_dim = 'w1' 950 951 # Print out. 952 print("The proton dimension is w1") 953 954 # Don't continue (waste of time). 955 break 956 957 # Heteronucleus in w1, proton in w2. 958 if line[w1_col] == heteronuc and line[w2_col] == proton: 959 # Set the proton dimension. 960 H_dim = 'w2' 961 962 # Print out. 963 print("The proton dimension is w2") 964 965 # Don't continue (waste of time). 966 break 967 968 # Loop over the file data. 969 data = [] 970 for line in file_data: 971 # Test for invalid assignment lines which have the column numbers changed and return empty data. 972 if line[w1_col] == 'inv.': 973 continue 974 975 # The residue number. 976 try: 977 res_num = int(line[5]) 978 except: 979 raise RelaxError("Improperly formatted XEasy file.") 980 981 # Nuclei names. 982 if H_dim == 'w1': 983 h_name = line[w1_col] 984 x_name = line[w2_col] 985 else: 986 x_name = line[w1_col] 987 h_name = line[w2_col] 988 989 # Intensity. 990 try: 991 intensity = float(line[int_col]) 992 except ValueError: 993 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 994 995 # Generate the spin_id. 996 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name) 997 998 # Append the data. 999 data.append([h_name, x_name, spin_id, intensity, line]) 1000 1001 # Return the data. 1002 return data

1003 1004

1005 -def read(file=None, dir=None, spectrum_id=None, heteronuc=None, proton=None, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None):

1006 """Read the peak intensity data. 1007 1008 @keyword file: The name of the file containing the peak intensities. 1009 @type file: str 1010 @keyword dir: The directory where the file is located. 1011 @type dir: str 1012 @keyword spectrum_id: The spectrum identification string. 1013 @type spectrum_id: str 1014 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file. 1015 @type heteronuc: str 1016 @keyword proton: The name of the proton as specified in the peak intensity file. 1017 @type proton: str 1018 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). 1019 @type int_col: int 1020 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. 1021 @type int_method: str 1022 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 1023 @type spin_id_col: int or None 1024 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1025 @type mol_name_col: int or None 1026 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1027 @type res_name_col: int or None 1028 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1029 @type res_num_col: int or None 1030 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1031 @type spin_name_col: int or None 1032 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1033 @type spin_num_col: int or None 1034 @keyword sep: The column separator which, if None, defaults to whitespace. 1035 @type sep: str or None 1036 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 1037 @type spin_id: None or str 1038 @keyword ncproc: The Bruker ncproc binary intensity scaling factor. 1039 @type ncproc: int or None 1040 """ 1041 1042 # Test if the current data pipe exists. 1043 pipes.test() 1044 1045 # Test if sequence data is loaded. 1046 if not exists_mol_res_spin_data(): 1047 raise RelaxNoSequenceError 1048 1049 # Check the file name. 1050 if file == None: 1051 raise RelaxError("The file name must be supplied.") 1052 1053 # Test that the intensity measures are identical. 1054 if hasattr(cdp, 'int_method') and cdp.int_method != int_method: 1055 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) 1056 1057 # Check the intensity measure. 1058 if not int_method in ['height', 'point sum', 'other']: 1059 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) 1060 1061 # Set the peak intensity measure. 1062 cdp.int_method = int_method 1063 1064 # Extract the data from the file. 1065 file_data = extract_data(file, dir, sep=sep) 1066 1067 # Automatic format detection. 1068 format = autodetect_format(file_data) 1069 1070 # Generic. 1071 if format == 'generic': 1072 # Print out. 1073 print("Generic formatted data file.\n") 1074 1075 # Extract the data. 1076 intensity_data = intensity_generic(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=int_col, sep=sep, spin_id=spin_id) 1077 1078 # NMRView. 1079 elif format == 'nmrview': 1080 # Print out. 1081 print("NMRView formatted data file.\n") 1082 1083 # Extract the data. 1084 intensity_data = intensity_nmrview(file_data=file_data) 1085 1086 # Sparky. 1087 elif format == 'sparky': 1088 # Print out. 1089 print("Sparky formatted data file.\n") 1090 1091 # Extract the data. 1092 intensity_data = intensity_sparky(file_data=file_data, int_col=int_col) 1093 1094 # XEasy. 1095 elif format == 'xeasy': 1096 # Print out. 1097 print("XEasy formatted data file.\n") 1098 1099 # Extract the data. 1100 intensity_data = intensity_xeasy(file_data=file_data, proton=proton, heteronuc=heteronuc, int_col=int_col) 1101 1102 # Add the spectrum id (and ncproc) to the relax data store. 1103 if not hasattr(cdp, 'spectrum_ids'): 1104 cdp.spectrum_ids = [] 1105 if ncproc != None: 1106 cdp.ncproc = {} 1107 if spectrum_id in cdp.spectrum_ids: 1108 raise RelaxError("The spectrum identification string '%s' already exists." % spectrum_id) 1109 else: 1110 cdp.spectrum_ids.append(spectrum_id) 1111 if ncproc != None: 1112 cdp.ncproc[spectrum_id] = ncproc 1113 1114 # Loop over the peak intensity data. 1115 data_flag = False 1116 for i in xrange(len(intensity_data)): 1117 # Extract the data. 1118 H_name, X_name, spin_id, intensity, line = intensity_data[i] 1119 1120 # Skip data. 1121 if (X_name and X_name != heteronuc) or (H_name and H_name != proton): 1122 warn(RelaxWarning("Proton and heteronucleus names do not match, skipping the data %s." % line)) 1123 continue 1124 1125 # Get the spin container. 1126 spin = return_spin(spin_id) 1127 if not spin: 1128 warn(RelaxNoSpinWarning(spin_id)) 1129 continue 1130 1131 # Skip deselected spins. 1132 if not spin.select: 1133 continue 1134 1135 # Initialise. 1136 if not hasattr(spin, 'intensities'): 1137 spin.intensities = {} 1138 1139 # Intensity scaling. 1140 if ncproc != None: 1141 intensity = intensity / float(2**ncproc) 1142 1143 # Add the data. 1144 spin.intensities[spectrum_id] = intensity 1145 1146 # Switch the flag. 1147 data_flag = True 1148 1149 # No data. 1150 if not data_flag: 1151 # Delete all the data. 1152 delete(spectrum_id) 1153 1154 # Raise the error. 1155 raise RelaxError("No data could be loaded from the peak list")

1156 1157

1158 -def replicated(spectrum_ids=None):

1159 """Set which spectra are replicates. 1160 1161 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra. 1162 @type spectrum_ids: list of str 1163 """ 1164 1165 # Test if the current pipe exists 1166 pipes.test() 1167 1168 # Test if spectra have been loaded. 1169 if not hasattr(cdp, 'spectrum_ids'): 1170 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.") 1171 1172 # Test the spectrum id strings. 1173 for spectrum_id in spectrum_ids: 1174 if spectrum_id not in cdp.spectrum_ids: 1175 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id) 1176 1177 # Test for None. 1178 if spectrum_ids == None: 1179 warn(RelaxWarning("The spectrum ID list cannot be None.")) 1180 return 1181 1182 # Test for more than one element. 1183 if len(spectrum_ids) == 1: 1184 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids)) 1185 return 1186 1187 # Initialise. 1188 if not hasattr(cdp, 'replicates'): 1189 cdp.replicates = [] 1190 1191 # Check if the spectrum IDs are already in the list. 1192 found = False 1193 for i in xrange(len(cdp.replicates)): 1194 # Loop over all elements of the first. 1195 for j in xrange(len(spectrum_ids)): 1196 if spectrum_ids[j] in cdp.replicates[i]: 1197 found = True 1198 1199 # One of the spectrum IDs already have a replicate specified. 1200 if found: 1201 # Add the remaining replicates to the list and quit this function. 1202 for j in xrange(len(spectrum_ids)): 1203 if spectrum_ids[j] not in cdp.replicates[i]: 1204 cdp.replicates[i].append(spectrum_ids[j]) 1205 1206 # Nothing more to do. 1207 return 1208 1209 # A new set of replicates. 1210 cdp.replicates.append(spectrum_ids)

1211 1212

1213 -def replicated_flags():

1214 """Create and return a dictionary of flags of whether the spectrum is replicated or not. 1215 1216 @return: The dictionary of flags of whether the spectrum is replicated or not. 1217 @rtype: dict of bool 1218 """ 1219 1220 # Initialise all IDs to false. 1221 repl = {} 1222 for id in cdp.spectrum_ids: 1223 repl[id] = False 1224 1225 # Loop over the replicates. 1226 for i in range(len(cdp.replicates)): 1227 for j in range(len(cdp.replicates[i])): 1228 repl[cdp.replicates[i][j]] = True 1229 1230 # Return the dictionary. 1231 return repl

1232 1233

1234 -def replicated_ids(spectrum_id):

1235 """Create and return a list of spectra ID which are replicates of the given ID. 1236 1237 @param spectrum_id: The spectrum ID to find all the replicates of. 1238 @type spectrum_id: str 1239 @return: The list of spectrum IDs which are replicates of spectrum_id. 1240 @rtype: list of str 1241 """ 1242 1243 # Initialise the ID list. 1244 repl = [] 1245 1246 # Loop over the replicate lists. 1247 for i in range(len(cdp.replicates)): 1248 # The spectrum ID is in the list. 1249 if spectrum_id in cdp.replicates[i]: 1250 # Loop over the inner list. 1251 for j in range(len(cdp.replicates[i])): 1252 # Spectrum ID match. 1253 if spectrum_id == cdp.replicates[i][j]: 1254 continue 1255 1256 # Append the replicated ID. 1257 repl.append(cdp.replicates[i][j]) 1258 1259 # Nothing. 1260 if repl == []: 1261 return repl 1262 1263 # Sort the list. 1264 repl.sort() 1265 1266 # Remove duplicates (backward). 1267 id = repl[-1] 1268 for i in range(len(repl)-2, -1, -1): 1269 # Duplicate. 1270 if id == repl[i]: 1271 del repl[i] 1272 1273 # Unique. 1274 else: 1275 id = repl[i] 1276 1277 # Return the list. 1278 return repl

1279

Source Code for Module generic_fns.spectrum