1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing functions for the handling of peak intensities."""
26
27
28
29 from math import sqrt
30 import sys
31 from warnings import warn
32
33
34 from lib.errors import RelaxError, RelaxImplementError, RelaxNoSpectraError
35 from lib.io import write_data
36 from lib.spectrum.peak_list import read_peak_list
37 from lib.statistics import std
38 from lib.warnings import RelaxWarning, RelaxNoSpinWarning
39 from pipe_control import pipes
40 from pipe_control.mol_res_spin import check_mol_res_spin_data, generate_spin_id_unique, return_spin, spin_loop
41
42
44 """Calculate the errors for peak heights when no spectra are replicated."""
45
46
47 for spin, spin_id in spin_loop(return_id=True):
48
49 if not spin.select:
50 continue
51
52
53 if not hasattr(spin, 'intensities'):
54 continue
55
56
57 if not hasattr(spin, 'baseplane_rmsd'):
58 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
59
60
61 spin.intensity_err = spin.baseplane_rmsd
62
63
65 """Calculate the errors for peak intensities from replicated spectra.
66
67 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
68 @type subset: list of str
69 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
70 @type verbosity: int
71 """
72
73
74 repl = replicated_flags()
75
76
77 if False in repl.values():
78 all_repl = False
79 print("All spectra replicated: No.")
80 else:
81 all_repl = True
82 print("All spectra replicated: Yes.")
83
84
85 if not hasattr(cdp, 'sigma_I'):
86 cdp.sigma_I = {}
87 if not hasattr(cdp, 'var_I'):
88 cdp.var_I = {}
89
90
91 subset_flag = False
92 if not subset:
93 subset_flag = True
94 subset = cdp.spectrum_ids
95
96
97 for id in subset:
98
99 if not repl[id]:
100 continue
101
102
103 if id in cdp.var_I and cdp.var_I[id] != 0.0:
104 continue
105
106
107 for j in range(len(cdp.replicates)):
108 if id in cdp.replicates[j]:
109 spectra = cdp.replicates[j]
110
111
112 num_spectra = len(spectra)
113
114
115 print("\nReplicated spectra: " + repr(spectra))
116 if verbosity:
117 print("%-20s%-20s" % ("Spin_ID", "SD"))
118
119
120 count = 0
121 for spin, spin_id in spin_loop(return_id=True):
122
123 if not spin.select:
124 continue
125
126
127 if not hasattr(spin, 'intensities'):
128 spin.select = False
129 continue
130
131
132 missing = False
133 for j in range(num_spectra):
134 if not spectra[j] in spin.intensities:
135 missing = True
136 if missing:
137 continue
138
139
140 values = []
141 for j in range(num_spectra):
142 values.append(spin.intensities[spectra[j]])
143
144
145 sd = std(values=values, dof=1)
146
147
148 if verbosity:
149 print("%-20s%-20s" % (spin_id, sd))
150
151
152 if not id in cdp.var_I:
153 cdp.var_I[id] = 0.0
154 cdp.var_I[id] = cdp.var_I[id] + sd**2
155 count = count + 1
156
157
158 if not count:
159 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
160
161
162 cdp.var_I[id] = cdp.var_I[id] / float(count)
163
164
165 for j in range(num_spectra):
166 cdp.var_I[spectra[j]] = cdp.var_I[id]
167
168
169 print("Standard deviation: %s" % sqrt(cdp.var_I[id]))
170
171
172
173 if not all_repl:
174
175 if subset_flag:
176 print("\nVariance averaging over the spectra subset.")
177 else:
178 print("\nVariance averaging over all spectra.")
179
180
181 var_I = 0.0
182 num_dups = 0
183
184
185 for id in cdp.var_I.keys():
186
187 if cdp.var_I[id] == 0.0:
188 continue
189
190
191 var_I = var_I + cdp.var_I[id]
192 num_dups = num_dups + 1
193
194
195 var_I = var_I / float(num_dups)
196
197
198 for id in subset:
199 cdp.var_I[id] = var_I
200
201
202 print("Standard deviation for all spins: " + repr(sqrt(var_I)))
203
204
205 for id in cdp.var_I.keys():
206
207 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
208
209
210 for spin in spin_loop():
211
212 if not spin.select:
213 continue
214
215
216 spin.intensity_err = cdp.sigma_I
217
218
220 """Calculate the errors for peak volumes when no spectra are replicated."""
221
222
223 for spin, spin_id in spin_loop(return_id=True):
224
225 if not spin.select:
226 continue
227
228
229 if not hasattr(spin, 'intensities'):
230 continue
231
232
233 if not hasattr(spin, 'baseplane_rmsd'):
234 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
235
236
237 if not hasattr(spin, 'N'):
238 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
239
240
241 for key in spin.intensity.keys():
242 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
243
244
246 """Add the spectrum ID to the data store.
247
248 @keyword spectrum_id: The spectrum ID string.
249 @type spectrum_id: str
250 """
251
252
253 if not hasattr(cdp, 'spectrum_ids'):
254 cdp.spectrum_ids = []
255
256
257 if spectrum_id in cdp.spectrum_ids:
258 return
259
260
261 cdp.spectrum_ids.append(spectrum_id)
262
263
265 """Set the peak intensity errors, as defined as the baseplane RMSD.
266
267 @param error: The peak intensity error value defined as the RMSD of the base plane
268 noise.
269 @type error: float
270 @keyword spectrum_id: The spectrum id.
271 @type spectrum_id: str
272 @param spin_id: The spin identification string.
273 @type spin_id: str
274 """
275
276
277 pipes.test()
278 check_mol_res_spin_data()
279 check_spectrum_id(spectrum_id)
280
281
282 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
283 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
284 else:
285 scale = 1.0
286
287
288 for spin in spin_loop(spin_id):
289
290 if not spin.select:
291 continue
292
293
294 if not hasattr(spin, 'baseplane_rmsd'):
295 spin.baseplane_rmsd = {}
296
297
298 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
299
300
302 """Check that the give spectrum ID exists.
303
304 @param id: The spectrum ID to check for.
305 @type id: str
306 @raises RelaxNoSpectraError: If the ID does not exist.
307 """
308
309
310 if not hasattr(cdp, 'spectrum_ids'):
311 raise RelaxNoSpectraError(id)
312
313
314 if id not in cdp.spectrum_ids:
315 raise RelaxNoSpectraError(id)
316
317
319 """Delete spectral data corresponding to the spectrum ID.
320
321 @keyword spectrum_id: The spectrum ID string.
322 @type spectrum_id: str
323 """
324
325
326 pipes.test()
327 check_mol_res_spin_data()
328 check_spectrum_id(spectrum_id)
329
330
331 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
332
333
334 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
335 del cdp.ncproc[spectrum_id]
336
337
338 if hasattr(cdp, 'replicates'):
339
340 for i in range(len(cdp.replicates)):
341
342 if spectrum_id in cdp.replicates[i]:
343
344 if len(cdp.replicates[i]) == 2:
345 cdp.replicates.pop(i)
346
347
348 else:
349 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
350
351
352 break
353
354
355 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I:
356 del cdp.sigma_I[spectrum_id]
357 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I:
358 del cdp.var_I[spectrum_id]
359
360
361 for spin in spin_loop():
362
363 if hasattr(spin, 'intensities') and spectrum_id in spin.intensities:
364 del spin.intensities[spectrum_id]
365
366
368 """Determine the peak intensity standard deviation.
369
370 @keyword subset: The list of spectrum ID strings to restrict the analysis to.
371 @type subset: list of str
372 """
373
374
375 pipes.test()
376 check_mol_res_spin_data()
377
378
379 if not hasattr(cdp, 'spectrum_ids'):
380 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
381
382
383 if subset:
384 for id in subset:
385 if id not in cdp.spectrum_ids:
386 raise RelaxError("The spectrum ID '%s' has not been loaded into relax." % id)
387
388
389 if cdp.int_method == 'height':
390
391 print("Intensity measure: Peak heights.")
392
393
394 if hasattr(cdp, 'replicates'):
395
396 print("Replicated spectra: Yes.")
397
398
399 __errors_repl(subset=subset)
400
401
402 else:
403
404 print("Replicated spectra: No.")
405 if subset:
406 print("Spectra ID subset ignored.")
407
408
409 __errors_height_no_repl()
410
411
412 if cdp.int_method == 'point sum':
413
414 print("Intensity measure: Peak volumes.")
415
416
417 if hasattr(cdp, 'replicates'):
418
419 print("Replicated spectra: Yes.")
420
421
422 __errors_repl(subset=subset)
423
424
425 else:
426
427 print("Replicated spectra: No.")
428
429
430 raise RelaxImplementError
431
432
433 __errors_vol_no_repl()
434
435
437 """Return a list of all spectrum IDs.
438
439 @return: The list of spectrum IDs.
440 @rtype: list of str
441 """
442
443
444 if not hasattr(cdp, 'spectrum_ids'):
445 return []
446
447
448 return cdp.spectrum_ids
449
450
452 """Set the number of integration points for the given spectrum.
453
454 @param N: The number of integration points.
455 @type N: int
456 @keyword spectrum_id: The spectrum ID string.
457 @type spectrum_id: str
458 @keyword spin_id: The spin ID string used to restrict the value to.
459 @type spin_id: None or str
460 """
461
462 raise RelaxImplementError
463
464
465 -def read(file=None, dir=None, spectrum_id=None, dim=1, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):
466 """Read the peak intensity data.
467
468 @keyword file: The name of the file containing the peak intensities.
469 @type file: str
470 @keyword dir: The directory where the file is located.
471 @type dir: str
472 @keyword spectrum_id: The spectrum identification string.
473 @type spectrum_id: str or list of str
474 @keyword dim: The dimension of the peak list to associate the data with.
475 @type dim: int
476 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
477 @type int_col: int or list of int
478 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
479 @type int_method: str
480 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
481 @type spin_id_col: int or None
482 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
483 @type mol_name_col: int or None
484 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
485 @type res_name_col: int or None
486 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
487 @type res_num_col: int or None
488 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
489 @type spin_name_col: int or None
490 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
491 @type spin_num_col: int or None
492 @keyword sep: The column separator which, if None, defaults to whitespace.
493 @type sep: str or None
494 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. If 'auto' is provided for a NMRPipe seriesTab formatted file, the ID's are auto generated in form of Z_Ai.
495 @type spin_id: None or str
496 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
497 @type ncproc: int or None
498 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
499 @type verbose: bool
500 """
501
502
503 pipes.test()
504 check_mol_res_spin_data()
505
506
507 if file == None:
508 raise RelaxError("The file name must be supplied.")
509
510
511 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
512 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
513
514
515 if isinstance(spectrum_id, list) and not isinstance(int_col, list):
516 raise RelaxError("If a list of spectrum IDs is supplied, the intensity column argument must also be a list of equal length.")
517 if spectrum_id != 'auto' and not isinstance(spectrum_id, list) and isinstance(int_col, list):
518 raise RelaxError("If a list of intensity columns is supplied, the spectrum ID argument must also be a list of equal length.")
519 if isinstance(spectrum_id, list) and len(spectrum_id) != len(int_col):
520 raise RelaxError("The spectrum ID list %s has a different number of elements to the intensity column list %s." % (spectrum_id, int_col))
521
522
523 if not int_method in ['height', 'point sum', 'other']:
524 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
525
526
527 cdp.int_method = int_method
528
529
530 peak_list = read_peak_list(file=file, dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id)
531
532
533 if spectrum_id == 'auto':
534 spectrum_id = peak_list[0].intensity_name
535
536
537 data = []
538 data_flag = False
539 for assign in peak_list:
540
541 spin_id = generate_spin_id_unique(res_num=assign.res_nums[dim-1], spin_name=assign.spin_names[dim-1])
542
543
544 intensity = assign.intensity
545 if not isinstance(intensity, list):
546 intensity = [intensity]
547 if not isinstance(spectrum_id, list):
548 spectrum_id = [spectrum_id]
549
550
551 if len(spectrum_id) != len(intensity):
552 raise RelaxError("The spectrum ID list %s has a different number of elements to the intensity column list %s." % (spectrum_id, len(intensity)))
553
554
555 for i in range(len(intensity)):
556
557 if intensity[i] == 0.0:
558 warn(RelaxWarning("A peak intensity of zero has been encountered for the spin '%s' - this could be fatal later on." % spin_id))
559
560
561 spin = return_spin(spin_id)
562 if not spin:
563 warn(RelaxNoSpinWarning(spin_id))
564 continue
565
566
567 if not spin.select:
568 continue
569
570
571 if not hasattr(spin, 'intensities'):
572 spin.intensities = {}
573
574
575 if ncproc != None:
576 intensity[i] = intensity[i] / float(2**ncproc)
577
578
579 spin.intensities[spectrum_id[i]] = intensity[i]
580
581
582 data_flag = True
583
584
585 data.append([spin_id, repr(intensity[i])])
586
587
588 spectrum_ids = spectrum_id
589 if isinstance(spectrum_id, str):
590 spectrum_ids = [spectrum_id]
591 if ncproc != None and not hasattr(cdp, 'ncproc'):
592 cdp.ncproc = {}
593 for i in range(len(spectrum_ids)):
594 add_spectrum_id(spectrum_ids[i])
595 if ncproc != None:
596 cdp.ncproc[spectrum_ids[i]] = ncproc
597
598
599 if not data_flag:
600
601 delete(spectrum_id)
602
603
604 raise RelaxError("No data could be loaded from the peak list")
605
606
607 if verbose:
608 print("\nThe following intensities have been loaded into the relax data store:\n")
609 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)
610
611
612
614 """Set which spectra are replicates.
615
616 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
617 @type spectrum_ids: list of str
618 """
619
620
621 pipes.test()
622
623
624 if spectrum_ids == None:
625 warn(RelaxWarning("The spectrum ID list cannot be None."))
626 return
627
628
629 if not hasattr(cdp, 'spectrum_ids'):
630 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
631
632
633 for spectrum_id in spectrum_ids:
634 check_spectrum_id(spectrum_id)
635
636
637 if len(spectrum_ids) == 1:
638 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids))
639 return
640
641
642 if not hasattr(cdp, 'replicates'):
643 cdp.replicates = []
644
645
646 found = False
647 for i in range(len(cdp.replicates)):
648
649 for j in range(len(spectrum_ids)):
650 if spectrum_ids[j] in cdp.replicates[i]:
651 found = True
652
653
654 if found:
655
656 for j in range(len(spectrum_ids)):
657 if spectrum_ids[j] not in cdp.replicates[i]:
658 cdp.replicates[i].append(spectrum_ids[j])
659
660
661 return
662
663
664 cdp.replicates.append(spectrum_ids)
665
666
668 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
669
670 @return: The dictionary of flags of whether the spectrum is replicated or not.
671 @rtype: dict of bool
672 """
673
674
675 repl = {}
676 for id in cdp.spectrum_ids:
677 repl[id] = False
678
679
680 for i in range(len(cdp.replicates)):
681 for j in range(len(cdp.replicates[i])):
682 repl[cdp.replicates[i][j]] = True
683
684
685 return repl
686
687
689 """Create and return a list of spectra ID which are replicates of the given ID.
690
691 @param spectrum_id: The spectrum ID to find all the replicates of.
692 @type spectrum_id: str
693 @return: The list of spectrum IDs which are replicates of spectrum_id.
694 @rtype: list of str
695 """
696
697
698 repl = []
699
700
701 for i in range(len(cdp.replicates)):
702
703 if spectrum_id in cdp.replicates[i]:
704
705 for j in range(len(cdp.replicates[i])):
706
707 if spectrum_id == cdp.replicates[i][j]:
708 continue
709
710
711 repl.append(cdp.replicates[i][j])
712
713
714 if repl == []:
715 return repl
716
717
718 repl.sort()
719
720
721 id = repl[-1]
722 for i in range(len(repl)-2, -1, -1):
723
724 if id == repl[i]:
725 del repl[i]
726
727
728 else:
729 id = repl[i]
730
731
732 return repl
733