1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing functions for the handling of peak intensities."""
26
27
28
29 from math import sqrt
30 import sys
31 from warnings import warn
32
33
34 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError
35 from lib.io import write_data
36 from lib.spectrum.peak_list import read_peak_list
37 from lib.statistics import std
38 from lib.warnings import RelaxWarning, RelaxNoSpinWarning
39 from pipe_control import pipes
40 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, return_spin, spin_loop
41
42
44 """Calculate the errors for peak heights when no spectra are replicated."""
45
46
47 for spin, spin_id in spin_loop(return_id=True):
48
49 if not spin.select:
50 continue
51
52
53 if not hasattr(spin, 'intensities'):
54 continue
55
56
57 if not hasattr(spin, 'baseplane_rmsd'):
58 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
59
60
61 spin.intensity_err = spin.baseplane_rmsd
62
63
65 """Calculate the errors for peak intensities from replicated spectra.
66
67 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
68 @type subset: list of str
69 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
70 @type verbosity: int
71 """
72
73
74 repl = replicated_flags()
75
76
77 if False in repl.values():
78 all_repl = False
79 print("All spectra replicated: No.")
80 else:
81 all_repl = True
82 print("All spectra replicated: Yes.")
83
84
85 if not hasattr(cdp, 'sigma_I'):
86 cdp.sigma_I = {}
87 if not hasattr(cdp, 'var_I'):
88 cdp.var_I = {}
89
90
91 subset_flag = False
92 if not subset:
93 subset_flag = True
94 subset = cdp.spectrum_ids
95
96
97 for id in subset:
98
99 if not repl[id]:
100 continue
101
102
103 if id in cdp.var_I and cdp.var_I[id] != 0.0:
104 continue
105
106
107 for j in range(len(cdp.replicates)):
108 if id in cdp.replicates[j]:
109 spectra = cdp.replicates[j]
110
111
112 num_spectra = len(spectra)
113
114
115 print("\nReplicated spectra: " + repr(spectra))
116 if verbosity:
117 print("%-20s%-20s" % ("Spin_ID", "SD"))
118
119
120 count = 0
121 for spin, spin_id in spin_loop(return_id=True):
122
123 if not spin.select:
124 continue
125
126
127 if not hasattr(spin, 'intensities'):
128 spin.select = False
129 continue
130
131
132 missing = False
133 for j in range(num_spectra):
134 if not spectra[j] in spin.intensities:
135 missing = True
136 if missing:
137 continue
138
139
140 values = []
141 for j in range(num_spectra):
142 values.append(spin.intensities[spectra[j]])
143
144
145 sd = std(values=values, dof=1)
146
147
148 if verbosity:
149 print("%-20s%-20s" % (spin_id, sd))
150
151
152 if not id in cdp.var_I:
153 cdp.var_I[id] = 0.0
154 cdp.var_I[id] = cdp.var_I[id] + sd**2
155 count = count + 1
156
157
158 if not count:
159 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
160
161
162 cdp.var_I[id] = cdp.var_I[id] / float(count)
163
164
165 for j in range(num_spectra):
166 cdp.var_I[spectra[j]] = cdp.var_I[id]
167
168
169 print("Standard deviation: %s" % sqrt(cdp.var_I[id]))
170
171
172
173 if not all_repl:
174
175 if subset_flag:
176 print("\nVariance averaging over the spectra subset.")
177 else:
178 print("\nVariance averaging over all spectra.")
179
180
181 var_I = 0.0
182 num_dups = 0
183
184
185 for id in cdp.var_I.keys():
186
187 if cdp.var_I[id] == 0.0:
188 continue
189
190
191 var_I = var_I + cdp.var_I[id]
192 num_dups = num_dups + 1
193
194
195 var_I = var_I / float(num_dups)
196
197
198 for id in subset:
199 cdp.var_I[id] = var_I
200
201
202 print("Standard deviation for all spins: " + repr(sqrt(var_I)))
203
204
205 for id in cdp.var_I.keys():
206
207 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
208
209
210 for spin in spin_loop():
211
212 if not spin.select:
213 continue
214
215
216 spin.intensity_err = cdp.sigma_I
217
218
220 """Calculate the errors for peak volumes when no spectra are replicated."""
221
222
223 for spin, spin_id in spin_loop(return_id=True):
224
225 if not spin.select:
226 continue
227
228
229 if not hasattr(spin, 'intensities'):
230 continue
231
232
233 if not hasattr(spin, 'baseplane_rmsd'):
234 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
235
236
237 if not hasattr(spin, 'N'):
238 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
239
240
241 for key in spin.intensity.keys():
242 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
243
244
246 """Add the spectrum ID to the data store.
247
248 @keyword spectrum_id: The spectrum ID string.
249 @type spectrum_id: str
250 """
251
252
253 if not hasattr(cdp, 'spectrum_ids'):
254 cdp.spectrum_ids = []
255
256
257 if spectrum_id in cdp.spectrum_ids:
258 return
259
260
261 cdp.spectrum_ids.append(spectrum_id)
262
263
265 """Set the peak intensity errors, as defined as the baseplane RMSD.
266
267 @param error: The peak intensity error value defined as the RMSD of the base plane
268 noise.
269 @type error: float
270 @keyword spectrum_id: The spectrum id.
271 @type spectrum_id: str
272 @param spin_id: The spin identification string.
273 @type spin_id: str
274 """
275
276
277 pipes.test()
278 check_mol_res_spin_data()
279 check_spectrum_id(spectrum_id)
280
281
282 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
283 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
284 else:
285 scale = 1.0
286
287
288 for spin in spin_loop(spin_id):
289
290 if not spin.select:
291 continue
292
293
294 if not hasattr(spin, 'baseplane_rmsd'):
295 spin.baseplane_rmsd = {}
296
297
298 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
299
300
302 """Check that the give spectrum ID exists.
303
304 @param id: The spectrum ID to check for.
305 @type id: str
306 @raises RelaxNoSpectraError: If the ID does not exist.
307 """
308
309
310 if not hasattr(cdp, 'spectrum_ids'):
311 raise RelaxNoSpectraError(id)
312
313
314 if id not in cdp.spectrum_ids:
315 raise RelaxNoSpectraError(id)
316
317
319 """Delete spectral data corresponding to the spectrum ID.
320
321 @keyword spectrum_id: The spectrum ID string.
322 @type spectrum_id: str
323 """
324
325
326 pipes.test()
327 check_mol_res_spin_data()
328 check_spectrum_id(spectrum_id)
329
330
331 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
332
333
334 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
335 del cdp.ncproc[spectrum_id]
336
337
338 if hasattr(cdp, 'replicates'):
339
340 for i in range(len(cdp.replicates)):
341
342 if spectrum_id in cdp.replicates[i]:
343
344 if len(cdp.replicates[i]) == 2:
345 cdp.replicates.pop(i)
346
347
348 else:
349 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
350
351
352 break
353
354
355 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I:
356 del cdp.sigma_I[spectrum_id]
357 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I:
358 del cdp.var_I[spectrum_id]
359
360
361 for spin in spin_loop():
362
363 if hasattr(spin, 'intensities') and spectrum_id in spin.intensities:
364 del spin.intensities[spectrum_id]
365
366
368 """Determine the peak intensity standard deviation.
369
370 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
371 @type subset: list of str
372 """
373
374
375 pipes.test()
376 check_mol_res_spin_data()
377
378
379 if not hasattr(cdp, 'spectrum_ids'):
380 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
381
382
383 if subset:
384 for id in subset:
385 if id not in cdp.spectrum_ids:
386 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id)
387
388
389 if cdp.int_method == 'height':
390
391 print("Intensity measure: Peak heights.")
392
393
394 if hasattr(cdp, 'replicates'):
395
396 print("Replicated spectra: Yes.")
397
398
399 __errors_repl(subset=subset)
400
401
402 else:
403
404 print("Replicated spectra: No.")
405 if subset:
406 print("Spectra ID subset ignored.")
407
408
409 __errors_height_no_repl()
410
411
412 if cdp.int_method == 'point sum':
413
414 print("Intensity measure: Peak volumes.")
415
416
417 if hasattr(cdp, 'replicates'):
418
419 print("Replicated spectra: Yes.")
420
421
422 __errors_repl(subset=subset)
423
424
425 else:
426
427 print("Replicated spectra: No.")
428
429
430 raise RelaxImplementError
431
432
433 __errors_vol_no_repl()
434
435
437 """Return a list of all spectrum IDs.
438
439 @return: The list of spectrum IDs.
440 @rtype: list of str
441 """
442
443
444 if not hasattr(cdp, 'spectrum_ids'):
445 return []
446
447
448 return cdp.spectrum_ids
449
450
452 """Set the number of integration points for the given spectrum.
453
454 @param N: The number of integration points.
455 @type N: int
456 @keyword spectrum_id: The spectrum ID string.
457 @type spectrum_id: str
458 @keyword spin_id: The spin ID string used to restrict the value to.
459 @type spin_id: None or str
460 """
461
462 raise RelaxImplementError
463
464
465 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):
466 """Read the peak intensity data.
467
468 @keyword file: The name of the file(s) containing the peak intensities.
469 @type file: str or list of str
470 @keyword dir: The directory where the file is located.
471 @type dir: str
472 @keyword spectrum_id: The spectrum identification string.
473 @type spectrum_id: str or list of str
474 @keyword dim: The dimension of the peak list to associate the data with.
475 @type dim: int
476 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
477 @type int_col: int or list of int
478 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
479 @type int_method: str
480 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
481 @type spin_id_col: int or None
482 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
483 @type mol_name_col: int or None
484 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
485 @type res_name_col: int or None
486 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
487 @type res_num_col: int or None
488 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
489 @type spin_name_col: int or None
490 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
491 @type spin_num_col: int or None
492 @keyword sep: The column separator which, if None, defaults to whitespace.
493 @type sep: str or None
494 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
495 @type spin_id: None or str
496 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
497 @type ncproc: int or None
498 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
499 @type verbose: bool
500 """
501
502
503 pipes.test()
504 check_mol_res_spin_data()
505
506
507 if file == None:
508 raise RelaxError("The file name must be supplied.")
509
510
511 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
512 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
513
514
515 flag_multi = False
516 flag_multi_file = False
517 flag_multi_col = False
518 if isinstance(spectrum_id, list) or spectrum_id == 'auto':
519 flag_multi = True
520 if isinstance(file, list):
521 flag_multi_file = True
522 if isinstance(int_col, list) or spectrum_id == 'auto':
523 flag_multi_col = True
524
525
526 if flag_multi:
527
528 if flag_multi_file and flag_multi_col:
529 raise RelaxError("If a list of spectrum IDs is supplied, the file names and intensity column arguments cannot both be lists.")
530
531
532 if not flag_multi_file and not flag_multi_col:
533 raise RelaxError("If a list of spectrum IDs is supplied, either the file name or intensity column arguments must be a list of equal length.")
534
535
536 if flag_multi_file and len(spectrum_id) != len(file):
537 raise RelaxError("The file list %s and spectrum ID list %s do not have the same number of elements." % (file, spectrum_id))
538
539
540 if flag_multi_col and spectrum_id != 'auto' and len(spectrum_id) != len(int_col):
541 raise RelaxError("The spectrum ID list %s and intensity column list %s do not have the same number of elements." % (spectrum_id, int_col))
542
543
544 else:
545
546 if flag_multi_file:
547 raise RelaxError("If multiple files are supplied, then multiple spectrum IDs must also be supplied.")
548
549
550 if flag_multi_col:
551 raise RelaxError("If multiple intensity columns are supplied, then multiple spectrum IDs must also be supplied.")
552
553
554 if spectrum_id != 'auto' and not flag_multi and flag_multi_col:
555 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.")
556
557
558 if not int_method in ['height', 'point sum', 'other']:
559 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
560
561
562 cdp.int_method = int_method
563
564
565 if not isinstance(file, list):
566 file = [file]
567
568
569 for file_index in range(len(file)):
570
571 peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)
572
573
574 if spectrum_id == 'auto':
575 spectrum_id = peak_list[0].intensity_name
576
577
578 data = []
579 data_flag = False
580 for assign in peak_list:
581
582 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1])
583
584
585 intensity = assign.intensity
586 if not isinstance(intensity, list):
587 intensity = [intensity]
588
589
590 for int_index in range(len(intensity)):
591
592 if intensity[int_index] == 0.0:
593 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id))
594
595
596 spin = return_spin(spin_id)
597 if not spin:
598 warn(RelaxNoSpinWarning(spin_id))
599 continue
600
601
602 if not spin.select:
603 continue
604
605
606 if not hasattr(spin, 'intensities'):
607 spin.intensities = {}
608
609
610 if ncproc != None:
611 intensity[int_index] = intensity[int_index] / float(2**ncproc)
612
613
614 if flag_multi_file:
615 id = spectrum_id[file_index]
616 elif flag_multi_col:
617 id = spectrum_id[int_index]
618 else:
619 id = spectrum_id
620 spin.intensities[id] = intensity[int_index]
621
622
623 data_flag = True
624
625
626 data.append([spin_id, repr(intensity[int_index])])
627
628
629 spectrum_ids = spectrum_id
630 if isinstance(spectrum_id, str):
631 spectrum_ids = [spectrum_id]
632 if ncproc != None and not hasattr(cdp, 'ncproc'):
633 cdp.ncproc = {}
634 for i in range(len(spectrum_ids)):
635 add_spectrum_id(spectrum_ids[i])
636 if ncproc != None:
637 cdp.ncproc[spectrum_ids[i]] = ncproc
638
639
640 if not data_flag:
641
642 delete(spectrum_id)
643
644
645 raise RelaxError("No data could be loaded from the peak list")
646
647
648 if verbose:
649 print("\nThe following intensities have been loaded into the relax data store:\n")
650 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)
651 print('')
652
653
654 -def read_spins(file=None, dir=None, dim=1, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, verbose=True):
655 """Read the peak intensity data.
656
657 @keyword file: The name of the file containing the peak intensities.
658 @type file: str
659 @keyword dir: The directory where the file is located.
660 @type dir: str
661 @keyword dim: The dimension of the peak list to associate the data with.
662 @type dim: int
663 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
664 @type spin_id_col: int or None
665 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
666 @type mol_name_col: int or None
667 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
668 @type res_name_col: int or None
669 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
670 @type res_num_col: int or None
671 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
672 @type spin_name_col: int or None
673 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
674 @type spin_num_col: int or None
675 @keyword sep: The column separator which, if None, defaults to whitespace.
676 @type sep: str or None
677 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
678 @type spin_id: None or str
679 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
680 @type verbose: bool
681 """
682
683
684 pipes.test()
685
686
687 if file == None:
688 raise RelaxError("The file name must be supplied.")
689
690
691 peak_list = read_peak_list(file=file, dir=dir, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)
692
693
694 created_spins = []
695 for assign in peak_list:
696 mol_name = assign.mol_names[dim-1]
697 res_num = assign.res_nums[dim-1]
698 res_name = assign.res_names[dim-1]
699 spin_num = assign.spin_nums[dim-1]
700 spin_name = assign.spin_names[dim-1]
701
702
703 spin_id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_name=spin_name)
704
705
706 if return_spin(spin_id=spin_id) == None:
707
708 create_spin(spin_num=spin_num, spin_name=spin_name, res_num=res_num, res_name=res_name, mol_name=mol_name)
709
710
711 check_mol_res_spin_data()
712
714 """Set which spectra are replicates.
715
716 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
717 @type spectrum_ids: list of str
718 """
719
720
721 pipes.test()
722
723
724 if spectrum_ids == None:
725 warn(RelaxWarning("The spectrum ID list cannot be None."))
726 return
727
728
729 if not hasattr(cdp, 'spectrum_ids'):
730 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
731
732
733 for spectrum_id in spectrum_ids:
734 check_spectrum_id(spectrum_id)
735
736
737 if len(spectrum_ids) == 1:
738 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids))
739 return
740
741
742 if not hasattr(cdp, 'replicates'):
743 cdp.replicates = []
744
745
746 found = False
747 for i in range(len(cdp.replicates)):
748
749 for j in range(len(spectrum_ids)):
750 if spectrum_ids[j] in cdp.replicates[i]:
751 found = True
752
753
754 if found:
755
756 for j in range(len(spectrum_ids)):
757 if spectrum_ids[j] not in cdp.replicates[i]:
758 cdp.replicates[i].append(spectrum_ids[j])
759
760
761 return
762
763
764 cdp.replicates.append(spectrum_ids)
765
766
768 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
769
770 @return: The dictionary of flags of whether the spectrum is replicated or not.
771 @rtype: dict of bool
772 """
773
774
775 repl = {}
776 for id in cdp.spectrum_ids:
777 repl[id] = False
778
779
780 for i in range(len(cdp.replicates)):
781 for j in range(len(cdp.replicates[i])):
782 repl[cdp.replicates[i][j]] = True
783
784
785 return repl
786
787
789 """Create and return a list of spectra ID which are replicates of the given ID.
790
791 @param spectrum_id: The spectrum ID to find all the replicates of.
792 @type spectrum_id: str
793 @return: The list of spectrum IDs which are replicates of spectrum_id.
794 @rtype: list of str
795 """
796
797
798 repl = []
799
800
801 for i in range(len(cdp.replicates)):
802
803 if spectrum_id in cdp.replicates[i]:
804
805 for j in range(len(cdp.replicates[i])):
806
807 if spectrum_id == cdp.replicates[i][j]:
808 continue
809
810
811 repl.append(cdp.replicates[i][j])
812
813
814 if repl == []:
815 return repl
816
817
818 repl.sort()
819
820
821 id = repl[-1]
822 for i in range(len(repl)-2, -1, -1):
823
824 if id == repl[i]:
825 del repl[i]
826
827
828 else:
829 id = repl[i]
830
831
832 return repl
833