1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing functions for the handling of peak intensities."""
26
27
28
29 from math import sqrt
30 import sys
31 from warnings import warn
32
33
34 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError
35 from lib.io import write_data
36 from lib.spectrum.peak_list import read_peak_list
37 from lib.statistics import std
38 from lib.warnings import RelaxWarning, RelaxNoSpinWarning
39 from pipe_control import pipes
40 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, return_spin, spin_loop
41
42
44 """Calculate the errors for peak heights when no spectra are replicated."""
45
46
47 for spin, spin_id in spin_loop(return_id=True):
48
49 if not spin.select:
50 continue
51
52
53 if not hasattr(spin, 'peak_intensity'):
54 continue
55
56
57 if not hasattr(spin, 'baseplane_rmsd'):
58 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
59
60
61 spin.peak_intensity_err = spin.baseplane_rmsd
62
63
65 """Calculate the errors for peak intensities from replicated spectra.
66
67 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
68 @type subset: list of str
69 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
70 @type verbosity: int
71 """
72
73
74 repl = replicated_flags()
75
76
77 if False in repl.values():
78 all_repl = False
79 print("All spectra replicated: No.")
80 else:
81 all_repl = True
82 print("All spectra replicated: Yes.")
83
84
85 if not hasattr(cdp, 'sigma_I'):
86 cdp.sigma_I = {}
87 if not hasattr(cdp, 'var_I'):
88 cdp.var_I = {}
89
90
91 subset_flag = False
92 if not subset:
93 subset_flag = True
94 subset = cdp.spectrum_ids
95
96
97 for id in subset:
98
99 if not repl[id]:
100 continue
101
102
103 if id in cdp.var_I and cdp.var_I[id] != 0.0:
104 continue
105
106
107 for j in range(len(cdp.replicates)):
108 if id in cdp.replicates[j]:
109 spectra = cdp.replicates[j]
110
111
112 num_spectra = len(spectra)
113
114
115 print("\nReplicated spectra: " + repr(spectra))
116 if verbosity:
117 print("%-20s%-20s" % ("Spin_ID", "SD"))
118
119
120 count = 0
121 for spin, spin_id in spin_loop(return_id=True):
122
123 if not spin.select:
124 continue
125
126
127 if not hasattr(spin, 'peak_intensity'):
128 spin.select = False
129 continue
130
131
132 missing = False
133 for j in range(num_spectra):
134 if not spectra[j] in spin.peak_intensity:
135 missing = True
136 if missing:
137 continue
138
139
140 values = []
141 for j in range(num_spectra):
142 values.append(spin.peak_intensity[spectra[j]])
143
144
145 sd = std(values=values, dof=1)
146
147
148 if verbosity:
149 print("%-20s%-20s" % (spin_id, sd))
150
151
152 if not id in cdp.var_I:
153 cdp.var_I[id] = 0.0
154 cdp.var_I[id] = cdp.var_I[id] + sd**2
155 count = count + 1
156
157
158 if not count:
159 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
160
161
162 cdp.var_I[id] = cdp.var_I[id] / float(count)
163
164
165 for j in range(num_spectra):
166 cdp.var_I[spectra[j]] = cdp.var_I[id]
167
168
169 print("Standard deviation: %s" % sqrt(cdp.var_I[id]))
170
171
172
173 if not all_repl:
174
175 if subset_flag:
176 print("\nVariance averaging over the spectra subset.")
177 else:
178 print("\nVariance averaging over all spectra.")
179
180
181 var_I = 0.0
182 num_dups = 0
183
184
185 for id in cdp.var_I.keys():
186
187 if id not in subset:
188 continue
189
190
191 if cdp.var_I[id] == 0.0:
192 continue
193
194
195 var_I = var_I + cdp.var_I[id]
196 num_dups = num_dups + 1
197
198
199 var_I = var_I / float(num_dups)
200
201
202 for id in subset:
203 cdp.var_I[id] = var_I
204
205
206 print("Standard deviation for all spins: " + repr(sqrt(var_I)))
207
208
209 for id in cdp.var_I.keys():
210
211 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
212
213
214 for spin in spin_loop():
215
216 if not spin.select:
217 continue
218
219
220 spin.peak_intensity_err = cdp.sigma_I
221
222
224 """Calculate the errors for peak volumes when no spectra are replicated."""
225
226
227 for spin, spin_id in spin_loop(return_id=True):
228
229 if not spin.select:
230 continue
231
232
233 if not hasattr(spin, 'peak_intensity'):
234 continue
235
236
237 if not hasattr(spin, 'baseplane_rmsd'):
238 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
239
240
241 if not hasattr(spin, 'N'):
242 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
243
244
245 for key in spin.peak_intensity.keys():
246 spin.peak_intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
247
248
250 """Add the spectrum ID to the data store.
251
252 @keyword spectrum_id: The spectrum ID string.
253 @type spectrum_id: str
254 """
255
256
257 if not hasattr(cdp, 'spectrum_ids'):
258 cdp.spectrum_ids = []
259
260
261 if spectrum_id in cdp.spectrum_ids:
262 return
263
264
265 cdp.spectrum_ids.append(spectrum_id)
266
267
269 """Set the peak intensity errors, as defined as the baseplane RMSD.
270
271 @param error: The peak intensity error value defined as the RMSD of the base plane
272 noise.
273 @type error: float
274 @keyword spectrum_id: The spectrum id.
275 @type spectrum_id: str
276 @param spin_id: The spin identification string.
277 @type spin_id: str
278 """
279
280
281 pipes.test()
282 check_mol_res_spin_data()
283 check_spectrum_id(spectrum_id)
284
285
286 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
287 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
288 else:
289 scale = 1.0
290
291
292 for spin in spin_loop(spin_id):
293
294 if not spin.select:
295 continue
296
297
298 if not hasattr(spin, 'baseplane_rmsd'):
299 spin.baseplane_rmsd = {}
300
301
302 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
303
304
306 """Check that the give spectrum ID exists.
307
308 @param id: The spectrum ID to check for.
309 @type id: str
310 @raises RelaxNoSpectraError: If the ID does not exist.
311 """
312
313
314 if not hasattr(cdp, 'spectrum_ids'):
315 raise RelaxNoSpectraError(id)
316
317
318 if id not in cdp.spectrum_ids:
319 raise RelaxNoSpectraError(id)
320
321
323 """Delete spectral data corresponding to the spectrum ID.
324
325 @keyword spectrum_id: The spectrum ID string.
326 @type spectrum_id: str
327 """
328
329
330 pipes.test()
331 check_mol_res_spin_data()
332 check_spectrum_id(spectrum_id)
333
334
335 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
336
337
338 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
339 del cdp.ncproc[spectrum_id]
340
341
342 if hasattr(cdp, 'replicates'):
343
344 for i in range(len(cdp.replicates)):
345
346 if spectrum_id in cdp.replicates[i]:
347
348 if len(cdp.replicates[i]) == 2:
349 cdp.replicates.pop(i)
350
351
352 else:
353 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
354
355
356 break
357
358
359 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I:
360 del cdp.sigma_I[spectrum_id]
361 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I:
362 del cdp.var_I[spectrum_id]
363
364
365 for spin in spin_loop():
366
367 if hasattr(spin, 'peak_intensity') and spectrum_id in spin.peak_intensity:
368 del spin.peak_intensity[spectrum_id]
369
370
372 """Determine the peak intensity standard deviation.
373
374 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
375 @type subset: list of str
376 """
377
378
379 pipes.test()
380 check_mol_res_spin_data()
381
382
383 if not hasattr(cdp, 'spectrum_ids'):
384 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
385
386
387 if subset:
388 for id in subset:
389 if id not in cdp.spectrum_ids:
390 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id)
391
392
393 if cdp.int_method == 'height':
394
395 print("Intensity measure: Peak heights.")
396
397
398 if hasattr(cdp, 'replicates'):
399
400 print("Replicated spectra: Yes.")
401
402
403 __errors_repl(subset=subset)
404
405
406 else:
407
408 print("Replicated spectra: No.")
409 if subset:
410 print("Spectra ID subset ignored.")
411
412
413 __errors_height_no_repl()
414
415
416 if cdp.int_method == 'point sum':
417
418 print("Intensity measure: Peak volumes.")
419
420
421 if hasattr(cdp, 'replicates'):
422
423 print("Replicated spectra: Yes.")
424
425
426 __errors_repl(subset=subset)
427
428
429 else:
430
431 print("Replicated spectra: No.")
432
433
434 raise RelaxImplementError
435
436
437 __errors_vol_no_repl()
438
439
441 """Return a list of all spectrum IDs.
442
443 @return: The list of spectrum IDs.
444 @rtype: list of str
445 """
446
447
448 if not hasattr(cdp, 'spectrum_ids'):
449 return []
450
451
452 return cdp.spectrum_ids
453
454
456 """Set the number of integration points for the given spectrum.
457
458 @param N: The number of integration points.
459 @type N: int
460 @keyword spectrum_id: The spectrum ID string.
461 @type spectrum_id: str
462 @keyword spin_id: The spin ID string used to restrict the value to.
463 @type spin_id: None or str
464 """
465
466 raise RelaxImplementError
467
468
469 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):
470 """Read the peak intensity data.
471
472 @keyword file: The name of the file(s) containing the peak intensities.
473 @type file: str or list of str
474 @keyword dir: The directory where the file is located.
475 @type dir: str
476 @keyword spectrum_id: The spectrum identification string.
477 @type spectrum_id: str or list of str
478 @keyword dim: The dimension of the peak list to associate the data with.
479 @type dim: int
480 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
481 @type int_col: int or list of int
482 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
483 @type int_method: str
484 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
485 @type spin_id_col: int or None
486 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
487 @type mol_name_col: int or None
488 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
489 @type res_name_col: int or None
490 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
491 @type res_num_col: int or None
492 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
493 @type spin_name_col: int or None
494 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
495 @type spin_num_col: int or None
496 @keyword sep: The column separator which, if None, defaults to whitespace.
497 @type sep: str or None
498 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
499 @type spin_id: None or str
500 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
501 @type ncproc: int or None
502 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
503 @type verbose: bool
504 """
505
506
507 pipes.test()
508 check_mol_res_spin_data()
509
510
511 if file == None:
512 raise RelaxError("The file name must be supplied.")
513
514
515 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
516 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
517
518
519 flag_multi = False
520 flag_multi_file = False
521 flag_multi_col = False
522 if isinstance(spectrum_id, list) or spectrum_id == 'auto':
523 flag_multi = True
524 if isinstance(file, list):
525 flag_multi_file = True
526 if isinstance(int_col, list) or spectrum_id == 'auto':
527 flag_multi_col = True
528
529
530 if flag_multi:
531
532 if flag_multi_file and flag_multi_col:
533 raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.")
534
535
536 if not flag_multi_file and not flag_multi_col:
537 raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.")
538
539
540 if flag_multi_file and len(spectrum_id) != len(file):
541 raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id))
542
543
544 if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col):
545 raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col))
546
547
548 else:
549
550 if flag_multi_file:
551 raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.")
552
553
554 if flag_multi_col:
555 raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.")
556
557
558 if spectrum_id != 'auto' and not flag_multi and flag_multi_col:
559 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.")
560
561
562 if not int_method in ['height', 'point sum', 'other']:
563 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
564
565
566 cdp.int_method = int_method
567
568
569 if not isinstance(file, list):
570 file = [file]
571
572
573 for file_index in range(len(file)):
574
575 peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)
576
577
578 if spectrum_id == 'auto':
579 spectrum_id = peak_list[0].intensity_name
580
581
582 data = []
583 data_flag = False
584 for assign in peak_list:
585
586 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1])
587
588
589 intensity = assign.intensity
590 if not isinstance(intensity, list):
591 intensity = [intensity]
592
593
594 for int_index in range(len(intensity)):
595
596 if intensity[int_index] == 0.0:
597 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id))
598
599
600 spin = return_spin(spin_id)
601 if not spin:
602 warn(RelaxNoSpinWarning(spin_id))
603 continue
604
605
606 if not spin.select:
607 continue
608
609
610 if not hasattr(spin, 'peak_intensity'):
611 spin.peak_intensity = {}
612
613
614 if ncproc != None:
615 intensity[int_index] = intensity[int_index] / float(2**ncproc)
616
617
618 if flag_multi_file:
619 id = spectrum_id[file_index]
620 elif flag_multi_col:
621 id = spectrum_id[int_index]
622 else:
623 id = spectrum_id
624 spin.peak_intensity[id] = intensity[int_index]
625
626
627 data_flag = True
628
629
630 data.append([spin_id, repr(intensity[int_index])])
631
632
633 spectrum_ids = spectrum_id
634 if isinstance(spectrum_id, str):
635 spectrum_ids = [spectrum_id]
636 if ncproc != None and not hasattr(cdp, 'ncproc'):
637 cdp.ncproc = {}
638 for i in range(len(spectrum_ids)):
639 add_spectrum_id(spectrum_ids[i])
640 if ncproc != None:
641 cdp.ncproc[spectrum_ids[i]] = ncproc
642
643
644 if not data_flag:
645
646 delete(spectrum_id)
647
648
649 raise RelaxError("No data could be loaded from the peak list")
650
651
652 if verbose:
653 print("\nThe following intensities have been loaded into the relax data store:\n")
654 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)
655 print('')
656
657
658 -def read_spins(file=None, dir=None, dim=1, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):
659 """Read the peak intensity data.
660
661 @keyword file: The name of the file containing the peak intensities.
662 @type file: str
663 @keyword dir: The directory where the file is located.
664 @type dir: str
665 @keyword dim: The dimension of the peak list to associate the data with.
666 @type dim: int
667 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
668 @type spin_id_col: int or None
669 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
670 @type mol_name_col: int or None
671 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
672 @type res_name_col: int or None
673 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
674 @type res_num_col: int or None
675 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
676 @type spin_name_col: int or None
677 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
678 @type spin_num_col: int or None
679 @keyword sep: The column separator which, if None, defaults to whitespace.
680 @type sep: str or None
681 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
682 @type spin_id: None or str
683 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
684 @type verbose: bool
685 """
686
687
688 pipes.test()
689
690
691 if file == None:
692 raise RelaxError("The file name must be supplied.")
693
694
695 peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)
696
697
698 created_spins = []
699 for assign in peak_list:
700 mol_name = assign.mol_names[dim-1]
701 res_num = assign.res_nums[dim-1]
702 res_name = assign.res_names[dim-1]
703 spin_num = assign.spin_nums[dim-1]
704 spin_name = assign.spin_names[dim-1]
705
706
707 spin_id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_name=spin_name)
708
709
710 if return_spin(spin_id=spin_id) == None:
711
712 create_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol_name=mol_name)
713
714
715 check_mol_res_spin_data()
716
718 """Set which spectra are replicates.
719
720 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
721 @type spectrum_ids: list of str
722 """
723
724
725 pipes.test()
726
727
728 if spectrum_ids == None:
729 warn(RelaxWarning("The spectrum ID list cannot be None."))
730 return
731
732
733 if not hasattr(cdp, 'spectrum_ids'):
734 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
735
736
737 for spectrum_id in spectrum_ids:
738 check_spectrum_id(spectrum_id)
739
740
741 if len(spectrum_ids) == 1:
742 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids))
743 return
744
745
746 if not hasattr(cdp, 'replicates'):
747 cdp.replicates = []
748
749
750 found = False
751 for i in range(len(cdp.replicates)):
752
753 for j in range(len(spectrum_ids)):
754 if spectrum_ids[j] in cdp.replicates[i]:
755 found = True
756
757
758 if found:
759
760 for j in range(len(spectrum_ids)):
761 if spectrum_ids[j] not in cdp.replicates[i]:
762 cdp.replicates[i].append(spectrum_ids[j])
763
764
765 return
766
767
768 cdp.replicates.append(spectrum_ids)
769
770
772 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
773
774 @return: The dictionary of flags of whether the spectrum is replicated or not.
775 @rtype: dict of bool
776 """
777
778
779 repl = {}
780 for id in cdp.spectrum_ids:
781 repl[id] = False
782
783
784 for i in range(len(cdp.replicates)):
785 for j in range(len(cdp.replicates[i])):
786 repl[cdp.replicates[i][j]] = True
787
788
789 return repl
790
791
793 """Create and return a list of spectra ID which are replicates of the given ID.
794
795 @param spectrum_id: The spectrum ID to find all the replicates of.
796 @type spectrum_id: str
797 @return: The list of spectrum IDs which are replicates of spectrum_id.
798 @rtype: list of str
799 """
800
801
802 repl = []
803
804
805 for i in range(len(cdp.replicates)):
806
807 if spectrum_id in cdp.replicates[i]:
808
809 for j in range(len(cdp.replicates[i])):
810
811 if spectrum_id == cdp.replicates[i][j]:
812 continue
813
814
815 repl.append(cdp.replicates[i][j])
816
817
818 if repl == []:
819 return repl
820
821
822 repl.sort()
823
824
825 id = repl[-1]
826 for i in range(len(repl)-2, -1, -1):
827
828 if id == repl[i]:
829 del repl[i]
830
831
832 else:
833 id = repl[i]
834
835
836 return repl
837