generic_fns.spectrum

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2013 Edward d'Auvergne # 4 # Copyright (C) 2008 Sebastien Morin # 5 # # 6 # This file is part of the program relax (http://www.nmr-relax.com). # 7 # # 8 # This program is free software: you can redistribute it and/or modify # 9 # it under the terms of the GNU General Public License as published by # 10 # the Free Software Foundation, either version 3 of the License, or # 11 # (at your option) any later version. # 12 # # 13 # This program is distributed in the hope that it will be useful, # 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 # GNU General Public License for more details. # 17 # # 18 # You should have received a copy of the GNU General Public License # 19 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module containing functions for the handling of peak intensities.""" 25 26 27 # Python module imports. 28 from math import sqrt 29 from re import split 30 import sys 31 from warnings import warn 32 33 # relax module imports. 34 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id_unique, return_spin, spin_loop 35 from generic_fns import pipes 36 from relax_errors import RelaxArgNotNoneError, RelaxError, RelaxImplementError, RelaxNoSequenceError, RelaxNoSpectraError 37 from relax_io import extract_data, read_spin_data, strip, write_data 38 from relax_warnings import RelaxWarning, RelaxNoSpinWarning 39 40 41

42 -class Bruker_import:

43 - def __init__(self, dir=None, exp_type=None, file=None, UI='prompt', output_file=None):

44 """Function to import Bruker Protein Dynamic Center (PDC) files. 45 46 @param dir: The directory to save the new file in. 47 @type dir: str 48 @param file: The Bruker PDC output file. 49 @type file: str 50 @param exp_type: The type of experiment, e.g. NOE, T1 or T2 51 @type exp_type: str 52 @param UI: The relax user interface (either 'prompt' or 'GUI'). 53 @type UI: str 54 @param output_file: The file to save the imported list. 55 @type output_file: str 56 """ 57 58 # Create experiment type 59 self.exp_type = exp_type 60 61 # The output file 62 self.output_file = output_file 63 self.dir = dir 64 65 # The user interface. 66 self.UI = UI 67 68 # Read the file. 69 if file: 70 self.read_file(file) 71 else: 72 raise RelaxError('No file selected.') 73 74 # Collect the entries for the file. 75 self.collect_entries() 76 77 # Create dummy file. 78 self.create_file()

79 80

81 - def __str__(self):

82 """Function to allow to return a value.""" 83 return str(self.value)

84 85

86 - def collect_entries(self):

87 """Function to collect entries for the NOE/R1/R2 relax dummy file.""" 88 89 # storage for file entries 90 # [ [Mol_name, Res_num, Res_name, Spin_num, value, error], ... ] 91 self.file_entries = [] 92 93 # Data flag 94 is_data = False 95 96 # Spin name 97 spinname = 'N' 98 99 # loop over line 100 for line in range(0, len(self.entries)): 101 # Detect the experiment 102 if 'Project:' in self.entries[line][0]: 103 exp_type = '' 104 105 # NOE 106 if 'Dynamic method/Hetero NOE' in self.entries[line][1]: 107 exp_type = 'NOE' 108 # T1 109 elif 'Dynamic method/T1' in self.entries[line][1]: 110 exp_type = 'T1' 111 # T2 112 elif 'Dynamic method/T2' in self.entries[line][1]: 113 exp_type = 'T2' 114 115 # Check agreement of file and user input. 116 if self.exp_type: 117 if not self.exp_type == exp_type: 118 raise RelaxError('Selected file is not a '+self.exp_type+'-file.') 119 return 120 121 # Feedback 122 print("Reading BRUKER PDC "+exp_type+" file.\n") 123 124 # The entries 125 if 'SECTION:' in self.entries[line][0]: 126 # NOE/T1/T2 results 127 if 'results' in self.entries[line][1]: 128 is_data = True 129 continue 130 131 # Other entries 132 else: 133 is_data = False 134 135 # Spin name 136 if 'Labelling:' in self.entries[line][0]: 137 # 15N 138 if 'N' in self.entries[line][0]: 139 spinname = 'N' 140 141 # 13C 142 if 'C' in self.entries[line][0]: 143 spinname = 'C' 144 145 # Collect NOE/T1/T2 values 146 if is_data: 147 # Exclued header or blank line. 148 if self.entries[line][0] in ['Peak name', '']: 149 continue 150 151 # Label 152 label_tmp = self.entries[line][0] 153 label_tmp = label_tmp.replace(' ', '') 154 label_tmp = label_tmp.replace('[', '') 155 label_tmp = label_tmp.replace(']', '') 156 157 # Detect residue number 158 resnum = label_tmp 159 # find start of number 160 start = 0 161 while resnum[start].isdigit()==False: start = start+1 162 163 # find end of number 164 end = start 165 try: 166 while resnum[end].isdigit()==True: end = end+1 167 # Label ends with number 168 except: 169 end = end 170 171 # cut out residue number integer 172 resnum = resnum[start:end] 173 174 # Residue name 175 resname = label_tmp[0:start] 176 177 # Spin num 178 spin_no = line 179 180 # The value 181 value_tmp = float(self.entries[line][3]) 182 183 # Convert T1/T2 to R1/R2 184 if exp_type in ['T1', 'T2']: 185 value_tmp = 1.0/value_tmp 186 187 # error 188 error = float(self.entries[line][4]) 189 190 # Store file entry 191 self.file_entries.append(['Bruker_PDC_'+exp_type, resnum, resname, spin_no, spinname, value_tmp, error])

192 193

194 - def create_file(self):

195 """Function to write the file.""" 196 197 # The Sparky header 198 text = 'Mol_name\tRes_num\tRes_name\tSpin_num\tSpin_name\tValue\tError \n' 199 200 for line in range(0, len(self.file_entries)): 201 # Build entries. 202 tmp_text = '' 203 for i in range(0, len(self.file_entries[line])): 204 tmp_text = tmp_text + str(self.file_entries[line][i])+'\t' 205 206 # Add entries. 207 text = text+tmp_text + '\n' 208 209 # Print entries 210 if not self.UI == 'GUI': 211 print(text) 212 213 # craete output file 214 if self.output_file: 215 if self.dir: 216 file = open(self.dir+sep+self.output_file, 'w') 217 else: 218 file = open(self.output_file, 'w') 219 # create dummy file 220 else: 221 file = DummyFileObject() 222 223 # Write the file 224 file.write(text) 225 file.close() 226 227 # Feedback 228 if self.output_file: 229 if self.dir: 230 print('Created BRUKER PDC file '+self.dir+sep+self.output_file) 231 # The return value 232 self.value = self.dir+sep+self.output_file 233 else: 234 print('Created BRUKER PDC file '+self.output_file) 235 # The return value 236 self.value = self.output_file 237 else: 238 print('Created BRUKER PDC file.') 239 # Return the dummy file 240 self.value = file

241 242

243 - def read_file(self, filename):

244 """Function to read the file.""" 245 246 # Open the file. 247 file = open(filename, 'r') 248 249 # Storage of lines. 250 self.entries = [] 251 252 # Loop over line in PDC file 253 for line in file: 254 # Read entries in line 255 entry = line 256 entry = line.strip() 257 entry = entry.split('\t') 258 259 # Add entries to storage 260 self.entries.append(entry) 261 262 # close the file 263 file.close()

264 265

266 -def __errors_height_no_repl():

267 """Calculate the errors for peak heights when no spectra are replicated.""" 268 269 # Loop over the spins and set the error to the RMSD of the base plane noise. 270 for spin, spin_id in spin_loop(return_id=True): 271 # Skip deselected spins. 272 if not spin.select: 273 continue 274 275 # Skip spins missing intensity data. 276 if not hasattr(spin, 'intensities'): 277 continue 278 279 # Test if the RMSD has been set. 280 if not hasattr(spin, 'baseplane_rmsd'): 281 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 282 283 # Set the error to the RMSD. 284 spin.intensity_err = spin.baseplane_rmsd

285 286

287 -def __errors_repl(verbosity=0):

288 """Calculate the errors for peak intensities from replicated spectra. 289 290 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 291 @type verbosity: int 292 """ 293 294 # Replicated spectra. 295 repl = replicated_flags() 296 297 # Are all spectra replicated? 298 if False in repl.values(): 299 all_repl = False 300 print("All spectra replicated: No.") 301 else: 302 all_repl = True 303 print("All spectra replicated: Yes.") 304 305 # Initialise. 306 cdp.sigma_I = {} 307 cdp.var_I = {} 308 309 # Loop over the spectra. 310 for id in cdp.spectrum_ids: 311 # Skip non-replicated spectra. 312 if not repl[id]: 313 continue 314 315 # Skip replicated spectra which already have been used. 316 if id in cdp.var_I and cdp.var_I[id] != 0.0: 317 continue 318 319 # The replicated spectra. 320 for j in range(len(cdp.replicates)): 321 if id in cdp.replicates[j]: 322 spectra = cdp.replicates[j] 323 324 # Number of spectra. 325 num_spectra = len(spectra) 326 327 # Print out. 328 print("\nReplicated spectra: " + repr(spectra)) 329 if verbosity: 330 print("%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD")) 331 332 # Calculate the mean value. 333 count = 0 334 for spin in spin_loop(): 335 # Skip deselected spins. 336 if not spin.select: 337 continue 338 339 # Skip and deselect spins which have no data. 340 if not hasattr(spin, 'intensities'): 341 spin.select = False 342 continue 343 344 # Missing data. 345 missing = False 346 for j in range(num_spectra): 347 if not spectra[j] in spin.intensities: 348 missing = True 349 if missing: 350 continue 351 352 # Average intensity. 353 ave_intensity = 0.0 354 for j in range(num_spectra): 355 ave_intensity = ave_intensity + spin.intensities[spectra[j]] 356 ave_intensity = ave_intensity / num_spectra 357 358 # Sum of squared errors. 359 SSE = 0.0 360 for j in range(num_spectra): 361 SSE = SSE + (spin.intensities[spectra[j]] - ave_intensity) ** 2 362 363 # Variance. 364 # 365 # 1 366 # sigma^2 = ----- * sum({Xi - Xav}^2)] 367 # n - 1 368 # 369 var_I = 1.0 / (num_spectra - 1.0) * SSE 370 371 # Print out. 372 if verbosity: 373 print("%-5i%-6s%-20s%-20s" % (spin.num, spin.name, repr(ave_intensity), repr(var_I))) 374 375 # Sum of variances (for average). 376 if not id in cdp.var_I: 377 cdp.var_I[id] = 0.0 378 cdp.var_I[id] = cdp.var_I[id] + var_I 379 count = count + 1 380 381 # No data catch. 382 if not count: 383 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.") 384 385 # Average variance. 386 cdp.var_I[id] = cdp.var_I[id] / float(count) 387 388 # Set all spectra variances. 389 for j in range(num_spectra): 390 cdp.var_I[spectra[j]] = cdp.var_I[id] 391 392 # Print out. 393 print("Standard deviation: %s" % sqrt(cdp.var_I[id])) 394 395 396 # Average across all spectra if there are time points with a single spectrum. 397 if not all_repl: 398 # Print out. 399 print("\nVariance averaging over all spectra.") 400 401 # Initialise. 402 var_I = 0.0 403 num_dups = 0 404 405 # Loop over all time points. 406 for id in cdp.var_I.keys(): 407 # Single spectrum (or extraordinarily accurate NMR spectra!). 408 if cdp.var_I[id] == 0.0: 409 continue 410 411 # Sum and count. 412 var_I = var_I + cdp.var_I[id] 413 num_dups = num_dups + 1 414 415 # Average value. 416 var_I = var_I / float(num_dups) 417 418 # Assign the average value to all time points. 419 for id in cdp.spectrum_ids: 420 cdp.var_I[id] = var_I 421 422 # Print out. 423 print("Standard deviation for all spins: " + repr(sqrt(var_I))) 424 425 # Loop over the spectra. 426 for id in cdp.var_I.keys(): 427 # Create the standard deviation data structure. 428 cdp.sigma_I[id] = sqrt(cdp.var_I[id]) 429 430 # Set the spin specific errors. 431 for spin in spin_loop(): 432 # Skip deselected spins. 433 if not spin.select: 434 continue 435 436 # Set the error. 437 spin.intensity_err = cdp.sigma_I

438 439

440 -def __errors_volume_no_repl():

441 """Calculate the errors for peak volumes when no spectra are replicated.""" 442 443 # Loop over the spins and set the error to the RMSD of the base plane noise. 444 for spin, spin_id in spin_loop(return_id=True): 445 # Skip deselected spins. 446 if not spin.select: 447 continue 448 449 # Skip spins missing intensity data. 450 if not hasattr(spin, 'intensities'): 451 continue 452 453 # Test if the RMSD has been set. 454 if not hasattr(spin, 'baseplane_rmsd'): 455 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id) 456 457 # Test that the total number of points have been set. 458 if not hasattr(spin, 'N'): 459 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id) 460 461 # Set the error to the RMSD multiplied by the square root of the total number of points. 462 for key in spin.intensity.keys(): 463 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)

464 465

466 -def autodetect_format(file_data):

467 """Automatically detect the format of the peak list. 468 469 @param file_data: The processed results file data. 470 @type file_data: list of lists of str 471 """ 472 473 # The first header line. 474 for line in file_data: 475 if line != []: 476 break 477 478 # Sparky format. 479 if line[0] == 'Assignment': 480 return 'sparky' 481 482 # NMRView format. 483 if line == ['label', 'dataset', 'sw', 'sf']: 484 return 'nmrview' 485 486 # XEasy format. 487 if line == ['No.', 'Color', 'w1', 'w2', 'ass.', 'in', 'w1', 'ass.', 'in', 'w2', 'Volume', 'Vol.', 'Err.', 'Method', 'Comment']: 488 return 'xeasy' 489 490 # Assume a generic format. 491 return 'generic'

492 493

494 -def baseplane_rmsd(error=0.0, spectrum_id=None, spin_id=None):

495 """Set the peak intensity errors, as defined as the baseplane RMSD. 496 497 @param error: The peak intensity error value defined as the RMSD of the base plane 498 noise. 499 @type error: float 500 @keyword spectrum_id: The spectrum id. 501 @type spectrum_id: str 502 @param spin_id: The spin identification string. 503 @type spin_id: str 504 """ 505 506 # Test if the current pipe exists 507 pipes.test() 508 509 # Test if the sequence data is loaded. 510 if not exists_mol_res_spin_data(): 511 raise RelaxNoSequenceError 512 513 # Test the spectrum id string. 514 if spectrum_id not in cdp.spectrum_ids: 515 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id) 516 517 # The scaling by NC_proc. 518 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 519 scale = 1.0 / 2**cdp.ncproc[spectrum_id] 520 else: 521 scale = 1.0 522 523 # Loop over the spins. 524 for spin in spin_loop(spin_id): 525 # Skip deselected spins. 526 if not spin.select: 527 continue 528 529 # Initialise or update the baseplane_rmsd data structure as necessary. 530 if not hasattr(spin, 'baseplane_rmsd'): 531 spin.baseplane_rmsd = {} 532 533 # Set the error. 534 spin.baseplane_rmsd[spectrum_id] = float(error) * scale

535 536

537 -def delete(spectrum_id=None):

538 """Delete spectral data corresponding to the spectrum ID. 539 540 @keyword spectrum_id: The spectrum ID string. 541 @type spectrum_id: str 542 """ 543 544 # Test if the current pipe exists. 545 pipes.test() 546 547 # Test if the sequence data is loaded. 548 if not exists_mol_res_spin_data(): 549 raise RelaxNoSequenceError 550 551 # Test if data exists. 552 if not hasattr(cdp, 'spectrum_ids') or spectrum_id not in cdp.spectrum_ids: 553 raise RelaxNoSpectraError(spectrum_id) 554 555 # Remove the ID. 556 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id)) 557 558 # The ncproc parameter. 559 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc: 560 del cdp.ncproc[spectrum_id] 561 562 # Replicates. 563 if hasattr(cdp, 'replicates'): 564 # Loop over the replicates. 565 for i in range(len(cdp.replicates)): 566 # The spectrum is replicated. 567 if spectrum_id in cdp.replicates[i]: 568 # Duplicate. 569 if len(cdp.replicates[i]) == 2: 570 cdp.replicates.pop(i) 571 572 # More than two spectra: 573 else: 574 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id)) 575 576 # No need to check further. 577 break 578 579 # Errors. 580 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I: 581 del cdp.sigma_I[spectrum_id] 582 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I: 583 del cdp.var_I[spectrum_id] 584 585 # Loop over the spins. 586 for spin in spin_loop(): 587 # Intensity data. 588 if hasattr(spin, 'intensities') and spectrum_id in spin.intensities: 589 del spin.intensities[spectrum_id]

590 591

592 -def error_analysis():

593 """Determine the peak intensity standard deviation.""" 594 595 # Test if the current pipe exists 596 pipes.test() 597 598 # Test if the sequence data is loaded. 599 if not exists_mol_res_spin_data(): 600 raise RelaxNoSequenceError 601 602 # Test if spectra have been loaded. 603 if not hasattr(cdp, 'spectrum_ids'): 604 raise RelaxError("Error analysis is not possible, no spectra have been loaded.") 605 606 # Peak height category. 607 if cdp.int_method == 'height': 608 # Print out. 609 print("Intensity measure: Peak heights.") 610 611 # Do we have replicated spectra? 612 if hasattr(cdp, 'replicates'): 613 # Print out. 614 print("Replicated spectra: Yes.") 615 616 # Set the errors. 617 __errors_repl() 618 619 # No replicated spectra. 620 else: 621 # Print out. 622 print("Replicated spectra: No.") 623 624 # Set the errors. 625 __errors_height_no_repl() 626 627 # Peak volume category. 628 if cdp.int_method == 'point sum': 629 # Print out. 630 print("Intensity measure: Peak volumes.") 631 632 # Do we have replicated spectra? 633 if hasattr(cdp, 'replicates'): 634 # Print out. 635 print("Replicated spectra: Yes.") 636 637 # Set the errors. 638 __errors_repl() 639 640 # No replicated spectra. 641 else: 642 # Print out. 643 print("Replicated spectra: No.") 644 645 # No implemented. 646 raise RelaxImplementError 647 648 # Set the errors. 649 __errors_vol_no_repl()

650 651

652 -def get_ids():

653 """Return a list of all spectrum IDs. 654 655 @return: The list of spectrum IDs. 656 @rtype: list of str 657 """ 658 659 # No IDs. 660 if not hasattr(cdp, 'spectrum_ids'): 661 return [] 662 663 # Return the IDs. 664 return cdp.spectrum_ids

665 666

667 -def integration_points(N=0, spectrum_id=None, spin_id=None):

668 """Set the number of integration points for the given spectrum. 669 670 @param N: The number of integration points. 671 @type N: int 672 @keyword spectrum_id: The spectrum ID string. 673 @type spectrum_id: str 674 @keyword spin_id: The spin ID string used to restrict the value to. 675 @type spin_id: None or str 676 """ 677 678 raise RelaxImplementError

679 680

681 -def intensity_generic(file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None):

682 """Return the process data from the generic peak intensity file. 683 684 The residue number, heteronucleus and proton names, and peak intensity will be returned. 685 686 687 @keyword file_data: The data extracted from the file converted into a list of lists. 688 @type file_data: list of lists of str 689 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity 690 file format). If supplied, the mol_name_col, res_name_col, res_num_col, 691 spin_name_col, and spin_num_col arguments must be none. 692 @type spin_id_col: int or None 693 @keyword mol_name_col: The column containing the molecule name information (used by the generic 694 intensity file format). If supplied, spin_id_col must be None. 695 @type mol_name_col: int or None 696 @keyword res_name_col: The column containing the residue name information (used by the generic 697 intensity file format). If supplied, spin_id_col must be None. 698 @type res_name_col: int or None 699 @keyword res_num_col: The column containing the residue number information (used by the 700 generic intensity file format). If supplied, spin_id_col must be None. 701 @type res_num_col: int or None 702 @keyword spin_name_col: The column containing the spin name information (used by the generic 703 intensity file format). If supplied, spin_id_col must be None. 704 @type spin_name_col: int or None 705 @keyword spin_num_col: The column containing the spin number information (used by the generic 706 intensity file format). If supplied, spin_id_col must be None. 707 @type spin_num_col: int or None 708 @keyword data_col: The column containing the peak intensities. 709 @type data_col: int 710 @keyword sep: The column separator which, if None, defaults to whitespace. 711 @type sep: str or None 712 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all 713 spins. 714 @type spin_id: None or str 715 @raises RelaxError: When the expected peak intensity is not a float. 716 @return: The extracted data as a list of lists. The first dimension corresponds 717 to the spin. The second dimension consists of the proton name, 718 heteronucleus name, spin ID string, and the intensity value. 719 @rtype: list of lists of str, str, str, float 720 """ 721 722 # Strip the data. 723 file_data = strip(file_data) 724 725 # Loop over the data. 726 data = [] 727 for mol_name, res_num, res_name, spin_num, spin_name, value in read_spin_data(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, sep=sep, spin_id=spin_id): 728 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name) 729 data.append([None, None, id, value, id]) 730 731 # Return the data. 732 return data

733 734

735 -def intensity_nmrview(file_data=None, int_col=None):

736 """Return the process data from the NMRView peak intensity file. 737 738 The residue number, heteronucleus and proton names, and peak intensity will be returned. 739 740 741 @keyword file_data: The data extracted from the file converted into a list of lists. 742 @type file_data: list of lists of str 743 @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value. 744 @type int_col: int 745 @raises RelaxError: When the expected peak intensity is not a float. 746 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text 747 @rtype: list of lists of str, str, str, float, str 748 """ 749 750 # Assume the NMRView file has six header lines! 751 num = 6 752 print("Number of header lines: " + repr(num)) 753 754 # Remove the header. 755 file_data = file_data[num:] 756 757 # Strip the data. 758 file_data = strip(file_data) 759 760 # The peak intensity column. 761 if int_col == None: 762 int_col = 16 763 if int_col == 16: 764 print('Using peak heights.') 765 if int_col == 15: 766 print('Using peak volumes (or evolumes).') 767 768 # Loop over the file data. 769 data = [] 770 for line in file_data: 771 # Unknown assignment. 772 if line[1] == '{}': 773 warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1])) 774 continue 775 776 # The residue number 777 res_num = '' 778 try: 779 res_num = line[1].strip('{') 780 res_num = res_num.strip('}') 781 res_num = res_num.split('.') 782 res_num = res_num[0] 783 except ValueError: 784 raise RelaxError("The peak list is invalid.") 785 786 # Nuclei names. 787 x_name = '' 788 if line[8]!='{}': 789 x_name = line[8].strip('{') 790 x_name = x_name.strip('}') 791 x_name = x_name.split('.') 792 x_name = x_name[1] 793 h_name = '' 794 if line[1]!='{}': 795 h_name = line[1].strip('{') 796 h_name = h_name.strip('}') 797 h_name = h_name.split('.') 798 h_name = h_name[1] 799 800 # Intensity. 801 try: 802 intensity = float(line[int_col]) 803 except ValueError: 804 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 805 806 # Generate the spin_id. 807 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name) 808 809 # Append the data. 810 data.append([h_name, x_name, spin_id, intensity, line]) 811 812 # Return the data. 813 return data

814 815

816 -def intensity_sparky(file_data=None, int_col=None):

817 """Return the process data from the Sparky peak intensity file. 818 819 The residue number, heteronucleus and proton names, and peak intensity will be returned. 820 821 822 @keyword file_data: The data extracted from the file converted into a list of lists. 823 @type file_data: list of lists of str 824 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). 825 @type int_col: int 826 @raises RelaxError: When the expected peak intensity is not a float. 827 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text. 828 @rtype: list of lists of str, str, str, float, str 829 """ 830 831 # The number of header lines. 832 num = 0 833 if file_data[0][0] == 'Assignment': 834 num = num + 1 835 if file_data[1] == '': 836 num = num + 1 837 print("Number of header lines found: %s" % num) 838 839 # Remove the header. 840 file_data = file_data[num:] 841 842 # Strip the data. 843 file_data = strip(file_data) 844 845 # Loop over the file data. 846 data = [] 847 for line in file_data: 848 # The Sparky assignment. 849 assignment = '' 850 res_num = '' 851 h_name = '' 852 x_name = '' 853 intensity = '' 854 855 # Skip non-assigned peaks. 856 if line[0] == '?-?': 857 continue 858 859 # First split by the 2D separator. 860 x_assign, h_assign = split('-', line[0]) 861 862 # The proton info. 863 h_row = split('([A-Z]+)', h_assign) 864 h_name = h_row[-2] + h_row[-1] 865 866 # The heteronucleus info. 867 x_row = split('([A-Z]+)', x_assign) 868 x_name = x_row[-2] + x_row[-1] 869 870 # The residue number. 871 try: 872 res_num = int(x_row[-3]) 873 except: 874 raise RelaxError("Improperly formatted Sparky file.") 875 876 # The peak intensity column. 877 if int_col == None: 878 int_col = 3 879 880 # Intensity. 881 try: 882 intensity = float(line[int_col]) 883 except ValueError: 884 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 885 886 # Generate the spin_id. 887 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name) 888 889 # Append the data. 890 data.append([h_name, x_name, spin_id, intensity, line]) 891 892 # Return the data. 893 return data

894 895

896 -def intensity_xeasy(file_data=None, heteronuc=None, proton=None, int_col=None):

897 """Return the process data from the XEasy peak intensity file. 898 899 The residue number, heteronucleus and proton names, and peak intensity will be returned. 900 901 902 @keyword file_data: The data extracted from the file converted into a list of lists. 903 @type file_data: list of lists of str 904 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file. 905 @type heteronuc: str 906 @keyword proton: The name of the proton as specified in the peak intensity file. 907 @type proton: str 908 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). 909 @type int_col: int 910 @raises RelaxError: When the expected peak intensity is not a float. 911 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text. 912 @rtype: list of lists of str, str, str, float, str 913 """ 914 915 # The columns. 916 w1_col = 4 917 w2_col = 7 918 if int_col == None: 919 int_col = 10 920 921 # Set the default proton dimension. 922 H_dim = 'w1' 923 924 # Determine the number of header lines. 925 num = 0 926 for line in file_data: 927 # Try to see if the intensity can be extracted. 928 try: 929 intensity = float(line[int_col]) 930 except ValueError: 931 num = num + 1 932 except IndexError: 933 num = num + 1 934 else: 935 break 936 print("Number of header lines found: " + repr(num)) 937 938 # Remove the header. 939 file_data = file_data[num:] 940 941 # Strip the data. 942 file_data = strip(file_data) 943 944 # Determine the proton and heteronucleus dimensions. 945 for line in file_data: 946 # Proton in w1, heteronucleus in w2. 947 if line[w1_col] == proton and line[w2_col] == heteronuc: 948 # Set the proton dimension. 949 H_dim = 'w1' 950 951 # Print out. 952 print("The proton dimension is w1") 953 954 # Don't continue (waste of time). 955 break 956 957 # Heteronucleus in w1, proton in w2. 958 if line[w1_col] == heteronuc and line[w2_col] == proton: 959 # Set the proton dimension. 960 H_dim = 'w2' 961 962 # Print out. 963 print("The proton dimension is w2") 964 965 # Don't continue (waste of time). 966 break 967 968 # Loop over the file data. 969 data = [] 970 for line in file_data: 971 # Test for invalid assignment lines which have the column numbers changed and return empty data. 972 if line[w1_col] == 'inv.': 973 continue 974 975 # The residue number. 976 try: 977 res_num = int(line[5]) 978 except: 979 raise RelaxError("Improperly formatted XEasy file.") 980 981 # Nuclei names. 982 if H_dim == 'w1': 983 h_name = line[w1_col] 984 x_name = line[w2_col] 985 else: 986 x_name = line[w1_col] 987 h_name = line[w2_col] 988 989 # Intensity. 990 try: 991 intensity = float(line[int_col]) 992 except ValueError: 993 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") 994 995 # Generate the spin_id. 996 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name) 997 998 # Append the data. 999 data.append([h_name, x_name, spin_id, intensity, line]) 1000 1001 # Return the data. 1002 return data

1003 1004

1005 -def read(file=None, dir=None, spectrum_id=None, heteronuc=None, proton=None, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):

1006 """Read the peak intensity data. 1007 1008 @keyword file: The name of the file containing the peak intensities. 1009 @type file: str 1010 @keyword dir: The directory where the file is located. 1011 @type dir: str 1012 @keyword spectrum_id: The spectrum identification string. 1013 @type spectrum_id: str 1014 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file. 1015 @type heteronuc: str 1016 @keyword proton: The name of the proton as specified in the peak intensity file. 1017 @type proton: str 1018 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format). 1019 @type int_col: int 1020 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'. 1021 @type int_method: str 1022 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. 1023 @type spin_id_col: int or None 1024 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1025 @type mol_name_col: int or None 1026 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1027 @type res_name_col: int or None 1028 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1029 @type res_num_col: int or None 1030 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1031 @type spin_name_col: int or None 1032 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. 1033 @type spin_num_col: int or None 1034 @keyword sep: The column separator which, if None, defaults to whitespace. 1035 @type sep: str or None 1036 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. 1037 @type spin_id: None or str 1038 @keyword ncproc: The Bruker ncproc binary intensity scaling factor. 1039 @type ncproc: int or None 1040 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out. 1041 @type verbose: bool 1042 """ 1043 1044 # Test if the current data pipe exists. 1045 pipes.test() 1046 1047 # Test if sequence data is loaded. 1048 if not exists_mol_res_spin_data(): 1049 raise RelaxNoSequenceError 1050 1051 # Check the file name. 1052 if file == None: 1053 raise RelaxError("The file name must be supplied.") 1054 1055 # Test that the intensity measures are identical. 1056 if hasattr(cdp, 'int_method') and cdp.int_method != int_method: 1057 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method)) 1058 1059 # Check the intensity measure. 1060 if not int_method in ['height', 'point sum', 'other']: 1061 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method) 1062 1063 # Set the peak intensity measure. 1064 cdp.int_method = int_method 1065 1066 # Extract the data from the file. 1067 file_data = extract_data(file, dir, sep=sep) 1068 1069 # Automatic format detection. 1070 format = autodetect_format(file_data) 1071 1072 # Generic. 1073 if format == 'generic': 1074 # Print out. 1075 print("Generic formatted data file.\n") 1076 1077 # Extract the data. 1078 intensity_data = intensity_generic(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=int_col, sep=sep, spin_id=spin_id) 1079 1080 # NMRView. 1081 elif format == 'nmrview': 1082 # Print out. 1083 print("NMRView formatted data file.\n") 1084 1085 # Extract the data. 1086 intensity_data = intensity_nmrview(file_data=file_data) 1087 1088 # Sparky. 1089 elif format == 'sparky': 1090 # Print out. 1091 print("Sparky formatted data file.\n") 1092 1093 # Extract the data. 1094 intensity_data = intensity_sparky(file_data=file_data, int_col=int_col) 1095 1096 # XEasy. 1097 elif format == 'xeasy': 1098 # Print out. 1099 print("XEasy formatted data file.\n") 1100 1101 # Extract the data. 1102 intensity_data = intensity_xeasy(file_data=file_data, proton=proton, heteronuc=heteronuc, int_col=int_col) 1103 1104 # Add the spectrum id (and ncproc) to the relax data store. 1105 if not hasattr(cdp, 'spectrum_ids'): 1106 cdp.spectrum_ids = [] 1107 if ncproc != None: 1108 cdp.ncproc = {} 1109 if not spectrum_id in cdp.spectrum_ids: 1110 cdp.spectrum_ids.append(spectrum_id) 1111 if ncproc != None: 1112 cdp.ncproc[spectrum_id] = ncproc 1113 1114 # Loop over the peak intensity data. 1115 data = [] 1116 data_flag = False 1117 for i in range(len(intensity_data)): 1118 # Extract the data. 1119 H_name, X_name, spin_id, intensity, line = intensity_data[i] 1120 1121 # Skip data. 1122 if (X_name and X_name != heteronuc) or (H_name and H_name != proton): 1123 warn(RelaxWarning("Proton and heteronucleus names do not match, skipping the data %s." % line)) 1124 continue 1125 1126 # Get the spin container. 1127 spin = return_spin(spin_id) 1128 if not spin: 1129 warn(RelaxNoSpinWarning(spin_id)) 1130 continue 1131 1132 # Skip deselected spins. 1133 if not spin.select: 1134 continue 1135 1136 # Initialise. 1137 if not hasattr(spin, 'intensities'): 1138 spin.intensities = {} 1139 1140 # Intensity scaling. 1141 if ncproc != None: 1142 intensity = intensity / float(2**ncproc) 1143 1144 # Add the data. 1145 spin.intensities[spectrum_id] = intensity 1146 1147 # Switch the flag. 1148 data_flag = True 1149 1150 # Append the data for printing out. 1151 data.append([spin_id, repr(intensity)]) 1152 1153 # No data. 1154 if not data_flag: 1155 # Delete all the data. 1156 delete(spectrum_id) 1157 1158 # Raise the error. 1159 raise RelaxError("No data could be loaded from the peak list") 1160 1161 # Print out. 1162 if verbose: 1163 print("\nThe following intensities have been loaded into the relax data store:\n") 1164 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)

1165 1166 1167

1168 -def replicated(spectrum_ids=None):

1169 """Set which spectra are replicates. 1170 1171 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra. 1172 @type spectrum_ids: list of str 1173 """ 1174 1175 # Test if the current pipe exists 1176 pipes.test() 1177 1178 # Test if spectra have been loaded. 1179 if not hasattr(cdp, 'spectrum_ids'): 1180 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.") 1181 1182 # Test the spectrum id strings. 1183 for spectrum_id in spectrum_ids: 1184 if spectrum_id not in cdp.spectrum_ids: 1185 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id) 1186 1187 # Test for None. 1188 if spectrum_ids == None: 1189 warn(RelaxWarning("The spectrum ID list cannot be None.")) 1190 return 1191 1192 # Test for more than one element. 1193 if len(spectrum_ids) == 1: 1194 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids)) 1195 return 1196 1197 # Initialise. 1198 if not hasattr(cdp, 'replicates'): 1199 cdp.replicates = [] 1200 1201 # Check if the spectrum IDs are already in the list. 1202 found = False 1203 for i in range(len(cdp.replicates)): 1204 # Loop over all elements of the first. 1205 for j in range(len(spectrum_ids)): 1206 if spectrum_ids[j] in cdp.replicates[i]: 1207 found = True 1208 1209 # One of the spectrum IDs already have a replicate specified. 1210 if found: 1211 # Add the remaining replicates to the list and quit this function. 1212 for j in range(len(spectrum_ids)): 1213 if spectrum_ids[j] not in cdp.replicates[i]: 1214 cdp.replicates[i].append(spectrum_ids[j]) 1215 1216 # Nothing more to do. 1217 return 1218 1219 # A new set of replicates. 1220 cdp.replicates.append(spectrum_ids)

1221 1222

1223 -def replicated_flags():

1224 """Create and return a dictionary of flags of whether the spectrum is replicated or not. 1225 1226 @return: The dictionary of flags of whether the spectrum is replicated or not. 1227 @rtype: dict of bool 1228 """ 1229 1230 # Initialise all IDs to false. 1231 repl = {} 1232 for id in cdp.spectrum_ids: 1233 repl[id] = False 1234 1235 # Loop over the replicates. 1236 for i in range(len(cdp.replicates)): 1237 for j in range(len(cdp.replicates[i])): 1238 repl[cdp.replicates[i][j]] = True 1239 1240 # Return the dictionary. 1241 return repl

1242 1243

1244 -def replicated_ids(spectrum_id):

1245 """Create and return a list of spectra ID which are replicates of the given ID. 1246 1247 @param spectrum_id: The spectrum ID to find all the replicates of. 1248 @type spectrum_id: str 1249 @return: The list of spectrum IDs which are replicates of spectrum_id. 1250 @rtype: list of str 1251 """ 1252 1253 # Initialise the ID list. 1254 repl = [] 1255 1256 # Loop over the replicate lists. 1257 for i in range(len(cdp.replicates)): 1258 # The spectrum ID is in the list. 1259 if spectrum_id in cdp.replicates[i]: 1260 # Loop over the inner list. 1261 for j in range(len(cdp.replicates[i])): 1262 # Spectrum ID match. 1263 if spectrum_id == cdp.replicates[i][j]: 1264 continue 1265 1266 # Append the replicated ID. 1267 repl.append(cdp.replicates[i][j]) 1268 1269 # Nothing. 1270 if repl == []: 1271 return repl 1272 1273 # Sort the list. 1274 repl.sort() 1275 1276 # Remove duplicates (backward). 1277 id = repl[-1] 1278 for i in range(len(repl)-2, -1, -1): 1279 # Duplicate. 1280 if id == repl[i]: 1281 del repl[i] 1282 1283 # Unique. 1284 else: 1285 id = repl[i] 1286 1287 # Return the list. 1288 return repl

1289

Source Code for Module generic_fns.spectrum