1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing functions for the handling of peak intensities."""
26
27
28
29 from math import sqrt
30 from re import split
31 import string
32 from warnings import warn
33
34
35 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, generate_spin_id_data_array, return_spin, spin_loop
36 from generic_fns import pipes
37 from relax_errors import RelaxArgNotNoneError, RelaxError, RelaxImplementError, RelaxNoSequenceError, RelaxNoSpectraError
38 from relax_io import extract_data, read_spin_data, strip
39 from relax_warnings import RelaxWarning, RelaxNoSpinWarning
40
41
42
44 - def __init__(self, dir=None, exp_type=None, file=None, UI='prompt', output_file=None):
45 """Function to import Bruker Protein Dynamic Center (PDC) files.
46
47 @param dir: The directory to save the new file in.
48 @type dir: str
49 @param file: The Bruker PDC output file.
50 @type file: str
51 @param exp_type: The type of experiment, e.g. NOE, T1 or T2
52 @type exp_type: str
53 @param UI: The relax user interface (either 'prompt' or 'GUI').
54 @type UI: str
55 @param output_file: The file to save the imported list.
56 @type output_file: str
57 """
58
59
60 self.exp_type = exp_type
61
62
63 self.output_file = output_file
64 self.dir = dir
65
66
67 self.UI = UI
68
69
70 if file:
71 self.read_file(file)
72 else:
73 raise RelaxError('No file selected.')
74
75
76 self.collect_entries()
77
78
79 self.create_file()
80
81
83 """Function to allow to return a value."""
84 return str(self.value)
85
86
88 """Function to collect entries for the NOE/R1/R2 relax dummy file."""
89
90
91
92 self.file_entries = []
93
94
95 is_data = False
96
97
98 spinname = 'N'
99
100
101 for line in range(0, len(self.entries)):
102
103 if 'Project:' in self.entries[line][0]:
104 exp_type = ''
105
106
107 if 'Dynamic method/Hetero NOE' in self.entries[line][1]:
108 exp_type = 'NOE'
109
110 elif 'Dynamic method/T1' in self.entries[line][1]:
111 exp_type = 'T1'
112
113 elif 'Dynamic method/T2' in self.entries[line][1]:
114 exp_type = 'T2'
115
116
117 if self.exp_type:
118 if not self.exp_type == exp_type:
119 raise RelaxError('Selected file is not a '+self.exp_type+'-file.')
120 return
121
122
123 print("Reading BRUKER PDC "+exp_type+" file.\n")
124
125
126 if 'SECTION:' in self.entries[line][0]:
127
128 if 'results' in self.entries[line][1]:
129 is_data = True
130 continue
131
132
133 else:
134 is_data = False
135
136
137 if 'Labelling:' in self.entries[line][0]:
138
139 if 'N' in self.entries[line][0]:
140 spinname = 'N'
141
142
143 if 'C' in self.entries[line][0]:
144 spinname = 'C'
145
146
147 if is_data:
148
149 if self.entries[line][0] in ['Peak name', '']:
150 continue
151
152
153 label_tmp = self.entries[line][0]
154 label_tmp = label_tmp.replace(' ', '')
155 label_tmp = label_tmp.replace('[', '')
156 label_tmp = label_tmp.replace(']', '')
157
158
159 resnum = label_tmp
160
161 start = 0
162 while resnum[start].isdigit()==False: start = start+1
163
164
165 end = start
166 try:
167 while resnum[end].isdigit()==True: end = end+1
168
169 except:
170 end = end
171
172
173 resnum = resnum[start:end]
174
175
176 resname = label_tmp[0:start]
177
178
179 spin_no = line
180
181
182 value_tmp = float(self.entries[line][3])
183
184
185 if exp_type in ['T1', 'T2']:
186 value_tmp = 1.0/value_tmp
187
188
189 error = float(self.entries[line][4])
190
191
192 self.file_entries.append(['Bruker_PDC_'+exp_type, resnum, resname, spin_no, spinname, value_tmp, error])
193
194
196 """Function to write the file."""
197
198
199 text = 'Mol_name\tRes_num\tRes_name\tSpin_num\tSpin_name\tValue\tError \n'
200
201 for line in range(0, len(self.file_entries)):
202
203 tmp_text = ''
204 for i in range(0, len(self.file_entries[line])):
205 tmp_text = tmp_text + str(self.file_entries[line][i])+'\t'
206
207
208 text = text+tmp_text + '\n'
209
210
211 if not self.UI == 'GUI':
212 print(text)
213
214
215 if self.output_file:
216 if self.dir:
217 file = open(self.dir+sep+self.output_file, 'w')
218 else:
219 file = open(self.output_file, 'w')
220
221 else:
222 file = DummyFileObject()
223
224
225 file.write(text)
226 file.close()
227
228
229 if self.output_file:
230 if self.dir:
231 print('Created BRUKER PDC file '+self.dir+sep+self.output_file)
232
233 self.value = self.dir+sep+self.output_file
234 else:
235 print('Created BRUKER PDC file '+self.output_file)
236
237 self.value = self.output_file
238 else:
239 print('Created BRUKER PDC file.')
240
241 self.value = file
242
243
245 """Function to read the file."""
246
247
248 file = open(filename, 'r')
249
250
251 self.entries = []
252
253
254 for line in file:
255
256 entry = line
257 entry = line.strip()
258 entry = entry.split('\t')
259
260
261 self.entries.append(entry)
262
263
264 file.close()
265
266
268 """Calculate the errors for peak heights when no spectra are replicated."""
269
270
271 for spin, spin_id in spin_loop(return_id=True):
272
273 if not spin.select:
274 continue
275
276
277 if not hasattr(spin, 'intensities'):
278 continue
279
280
281 if not hasattr(spin, 'baseplane_rmsd'):
282 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
283
284
285 spin.intensity_err = spin.baseplane_rmsd
286
287
289 """Calculate the errors for peak intensities from replicated spectra.
290
291 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
292 @type verbosity: int
293 """
294
295
296 repl = replicated_flags()
297
298
299 if False in repl.values():
300 all_repl = False
301 print("All spectra replicated: No.")
302 else:
303 all_repl = True
304 print("All spectra replicated: Yes.")
305
306
307 cdp.sigma_I = {}
308 cdp.var_I = {}
309
310
311 for id in cdp.spectrum_ids:
312
313 if not repl[id]:
314 continue
315
316
317 if cdp.var_I.has_key(id) and cdp.var_I[id] != 0.0:
318 continue
319
320
321 for j in xrange(len(cdp.replicates)):
322 if id in cdp.replicates[j]:
323 spectra = cdp.replicates[j]
324
325
326 num_spectra = len(spectra)
327
328
329 print(("\nReplicated spectra: " + repr(spectra)))
330 if verbosity:
331 print(("%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD")))
332
333
334 count = 0
335 for spin in spin_loop():
336
337 if not spin.select:
338 continue
339
340
341 if not hasattr(spin, 'intensities'):
342 spin.select = False
343 continue
344
345
346 missing = False
347 for j in xrange(num_spectra):
348 if not spin.intensities.has_key(spectra[j]):
349 missing = True
350 if missing:
351 continue
352
353
354 ave_intensity = 0.0
355 for j in xrange(num_spectra):
356 ave_intensity = ave_intensity + spin.intensities[spectra[j]]
357 ave_intensity = ave_intensity / num_spectra
358
359
360 SSE = 0.0
361 for j in xrange(num_spectra):
362 SSE = SSE + (spin.intensities[spectra[j]] - ave_intensity) ** 2
363
364
365
366
367
368
369
370 var_I = 1.0 / (num_spectra - 1.0) * SSE
371
372
373 if verbosity:
374 print(("%-5i%-6s%-20s%-20s" % (spin.num, spin.name, repr(ave_intensity), repr(var_I))))
375
376
377 if not cdp.var_I.has_key(id):
378 cdp.var_I[id] = 0.0
379 cdp.var_I[id] = cdp.var_I[id] + var_I
380 count = count + 1
381
382
383 if not count:
384 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
385
386
387 cdp.var_I[id] = cdp.var_I[id] / float(count)
388
389
390 for j in range(num_spectra):
391 cdp.var_I[spectra[j]] = cdp.var_I[id]
392
393
394 print(("Standard deviation: %s" % sqrt(cdp.var_I[id])))
395
396
397
398 if not all_repl:
399
400 print("\nVariance averaging over all spectra.")
401
402
403 var_I = 0.0
404 num_dups = 0
405
406
407 for id in cdp.var_I.keys():
408
409 if cdp.var_I[id] == 0.0:
410 continue
411
412
413 var_I = var_I + cdp.var_I[id]
414 num_dups = num_dups + 1
415
416
417 var_I = var_I / float(num_dups)
418
419
420 for id in cdp.spectrum_ids:
421 cdp.var_I[id] = var_I
422
423
424 print(("Standard deviation for all spins: " + repr(sqrt(var_I))))
425
426
427 for id in cdp.var_I.keys():
428
429 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
430
431
432 for spin in spin_loop():
433
434 if not spin.select:
435 continue
436
437
438 spin.intensity_err = cdp.sigma_I
439
440
442 """Calculate the errors for peak volumes when no spectra are replicated."""
443
444
445 for spin, spin_id in spin_loop(return_id=True):
446
447 if not spin.select:
448 continue
449
450
451 if not hasattr(spin, 'intensities'):
452 continue
453
454
455 if not hasattr(spin, 'baseplane_rmsd'):
456 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
457
458
459 if not hasattr(spin, 'N'):
460 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
461
462
463 for key in spin.intensity.keys():
464 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
465
466
493
494
496 """Set the peak intensity errors, as defined as the baseplane RMSD.
497
498 @param error: The peak intensity error value defined as the RMSD of the base plane
499 noise.
500 @type error: float
501 @keyword spectrum_id: The spectrum id.
502 @type spectrum_id: str
503 @param spin_id: The spin identification string.
504 @type spin_id: str
505 """
506
507
508 pipes.test()
509
510
511 if not exists_mol_res_spin_data():
512 raise RelaxNoSequenceError
513
514
515 if spectrum_id not in cdp.spectrum_ids:
516 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
517
518
519 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
520 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
521 else:
522 scale = 1.0
523
524
525 for spin in spin_loop(spin_id):
526
527 if not spin.select:
528 continue
529
530
531 if not hasattr(spin, 'baseplane_rmsd'):
532 spin.baseplane_rmsd = {}
533
534
535 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
536
537
539 """Delete spectral data corresponding to the spectrum ID.
540
541 @keyword spectrum_id: The spectrum ID string.
542 @type spectrum_id: str
543 """
544
545
546 pipes.test()
547
548
549 if not exists_mol_res_spin_data():
550 raise RelaxNoSequenceError
551
552
553 if not hasattr(cdp, 'spectrum_ids') or spectrum_id not in cdp.spectrum_ids:
554 raise RelaxNoSpectraError(spectrum_id)
555
556
557 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
558
559
560 if hasattr(cdp, 'ncproc') and cdp.ncproc.has_key(spectrum_id):
561 del cdp.ncproc[spectrum_id]
562
563
564 if hasattr(cdp, 'replicates'):
565
566 for i in range(len(cdp.replicates)):
567
568 if spectrum_id in cdp.replicates[i]:
569
570 if len(cdp.replicates[i]) == 2:
571 cdp.replicates.pop(i)
572
573
574 else:
575 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
576
577
578 break
579
580
581 if hasattr(cdp, 'sigma_I') and cdp.sigma_I.has_key(spectrum_id):
582 del cdp.sigma_I[spectrum_id]
583 if hasattr(cdp, 'var_I') and cdp.var_I.has_key(spectrum_id):
584 del cdp.var_I[spectrum_id]
585
586
587 for spin in spin_loop():
588
589 if hasattr(spin, 'intensities') and spin.intensities.has_key(spectrum_id):
590 del spin.intensities[spectrum_id]
591
592
594 """Determine the peak intensity standard deviation."""
595
596
597 pipes.test()
598
599
600 if not exists_mol_res_spin_data():
601 raise RelaxNoSequenceError
602
603
604 if not hasattr(cdp, 'spectrum_ids'):
605 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
606
607
608 if cdp.int_method == 'height':
609
610 print("Intensity measure: Peak heights.")
611
612
613 if hasattr(cdp, 'replicates'):
614
615 print("Replicated spectra: Yes.")
616
617
618 __errors_repl()
619
620
621 else:
622
623 print("Replicated spectra: No.")
624
625
626 __errors_height_no_repl()
627
628
629 if cdp.int_method == 'point sum':
630
631 print("Intensity measure: Peak volumes.")
632
633
634 if hasattr(cdp, 'replicates'):
635
636 print("Replicated spectra: Yes.")
637
638
639 __errors_repl()
640
641
642 else:
643
644 print("Replicated spectra: No.")
645
646
647 raise RelaxImplementError
648
649
650 __errors_vol_no_repl()
651
652
654 """Return a list of all spectrum IDs.
655
656 @return: The list of spectrum IDs.
657 @rtype: list of str
658 """
659
660
661 if not hasattr(cdp, 'spectrum_ids'):
662 return []
663
664
665 return cdp.spectrum_ids
666
667
669 """Set the number of integration points for the given spectrum.
670
671 @param N: The number of integration points.
672 @type N: int
673 @keyword spectrum_id: The spectrum ID string.
674 @type spectrum_id: str
675 @keyword spin_id: The spin ID string used to restrict the value to.
676 @type spin_id: None or str
677 """
678
679 raise RelaxImplementError
680
681
682 -def intensity_generic(file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None):
683 """Return the process data from the generic peak intensity file.
684
685 The residue number, heteronucleus and proton names, and peak intensity will be returned.
686
687
688 @keyword file_data: The data extracted from the file converted into a list of lists.
689 @type file_data: list of lists of str
690 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity
691 file format). If supplied, the mol_name_col, res_name_col, res_num_col,
692 spin_name_col, and spin_num_col arguments must be none.
693 @type spin_id_col: int or None
694 @keyword mol_name_col: The column containing the molecule name information (used by the generic
695 intensity file format). If supplied, spin_id_col must be None.
696 @type mol_name_col: int or None
697 @keyword res_name_col: The column containing the residue name information (used by the generic
698 intensity file format). If supplied, spin_id_col must be None.
699 @type res_name_col: int or None
700 @keyword res_num_col: The column containing the residue number information (used by the
701 generic intensity file format). If supplied, spin_id_col must be None.
702 @type res_num_col: int or None
703 @keyword spin_name_col: The column containing the spin name information (used by the generic
704 intensity file format). If supplied, spin_id_col must be None.
705 @type spin_name_col: int or None
706 @keyword spin_num_col: The column containing the spin number information (used by the generic
707 intensity file format). If supplied, spin_id_col must be None.
708 @type spin_num_col: int or None
709 @keyword data_col: The column containing the peak intensities.
710 @type data_col: int
711 @keyword sep: The column separator which, if None, defaults to whitespace.
712 @type sep: str or None
713 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all
714 spins.
715 @type spin_id: None or str
716 @raises RelaxError: When the expected peak intensity is not a float.
717 @return: The extracted data as a list of lists. The first dimension corresponds
718 to the spin. The second dimension consists of the proton name,
719 heteronucleus name, spin ID string, and the intensity value.
720 @rtype: list of lists of str, str, str, float
721 """
722
723
724 file_data = strip(file_data)
725
726
727 data = []
728 for mol_name, res_num, res_name, spin_num, spin_name, value in read_spin_data(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, sep=sep, spin_id=spin_id):
729 id = generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name)
730 data.append([None, None, id, value, id])
731
732
733 return data
734
735
737 """Return the process data from the NMRView peak intensity file.
738
739 The residue number, heteronucleus and proton names, and peak intensity will be returned.
740
741
742 @keyword file_data: The data extracted from the file converted into a list of lists.
743 @type file_data: list of lists of str
744 @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value.
745 @type int_col: int
746 @raises RelaxError: When the expected peak intensity is not a float.
747 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text
748 @rtype: list of lists of str, str, str, float, str
749 """
750
751
752 num = 6
753 print(("Number of header lines: " + repr(num)))
754
755
756 file_data = file_data[num:]
757
758
759 file_data = strip(file_data)
760
761
762 if int_col == None:
763 int_col = 16
764 if int_col == 16:
765 print('Using peak heights.')
766 if int_col == 15:
767 print('Using peak volumes (or evolumes).')
768
769
770 data = []
771 for line in file_data:
772
773 if line[1] == '{}':
774 warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1]))
775 continue
776
777
778 res_num = ''
779 try:
780 res_num = string.strip(line[1], '{')
781 res_num = string.strip(res_num, '}')
782 res_num = string.split(res_num, '.')
783 res_num = res_num[0]
784 except ValueError:
785 raise RelaxError("The peak list is invalid.")
786
787
788 x_name = ''
789 if line[8]!='{}':
790 x_name = string.strip(line[8], '{')
791 x_name = string.strip(x_name, '}')
792 x_name = string.split(x_name, '.')
793 x_name = x_name[1]
794 h_name = ''
795 if line[1]!='{}':
796 h_name = string.strip(line[1], '{')
797 h_name = string.strip(h_name, '}')
798 h_name = string.split(h_name, '.')
799 h_name = h_name[1]
800
801
802 try:
803 intensity = float(line[int_col])
804 except ValueError:
805 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
806
807
808 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
809
810
811 data.append([h_name, x_name, spin_id, intensity, line])
812
813
814 return data
815
816
818 """Return the process data from the Sparky peak intensity file.
819
820 The residue number, heteronucleus and proton names, and peak intensity will be returned.
821
822
823 @keyword file_data: The data extracted from the file converted into a list of lists.
824 @type file_data: list of lists of str
825 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
826 @type int_col: int
827 @raises RelaxError: When the expected peak intensity is not a float.
828 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
829 @rtype: list of lists of str, str, str, float, str
830 """
831
832
833 num = 0
834 if file_data[0][0] == 'Assignment':
835 num = num + 1
836 if file_data[1] == '':
837 num = num + 1
838 print("Number of header lines found: %s" % num)
839
840
841 file_data = file_data[num:]
842
843
844 file_data = strip(file_data)
845
846
847 data = []
848 for line in file_data:
849
850 assignment = ''
851 res_num = ''
852 h_name = ''
853 x_name = ''
854 intensity = ''
855
856
857 if line[0] == '?-?':
858 continue
859
860
861 x_assign, h_assign = split('-', line[0])
862
863
864 h_name = split('([A-Z]+)', h_assign)[-2]
865
866
867 x_row = split('([A-Z]+)', x_assign)
868 x_name = x_row[-2]
869
870
871 try:
872 res_num = int(x_row[-3])
873 except:
874 raise RelaxError("Improperly formatted Sparky file.")
875
876
877 if int_col == None:
878 int_col = 3
879
880
881 try:
882 intensity = float(line[int_col])
883 except ValueError:
884 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
885
886
887 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
888
889
890 data.append([h_name, x_name, spin_id, intensity, line])
891
892
893 return data
894
895
896 -def intensity_xeasy(file_data=None, heteronuc=None, proton=None, int_col=None):
897 """Return the process data from the XEasy peak intensity file.
898
899 The residue number, heteronucleus and proton names, and peak intensity will be returned.
900
901
902 @keyword file_data: The data extracted from the file converted into a list of lists.
903 @type file_data: list of lists of str
904 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
905 @type heteronuc: str
906 @keyword proton: The name of the proton as specified in the peak intensity file.
907 @type proton: str
908 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
909 @type int_col: int
910 @raises RelaxError: When the expected peak intensity is not a float.
911 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
912 @rtype: list of lists of str, str, str, float, str
913 """
914
915
916 w1_col = 4
917 w2_col = 7
918 if int_col == None:
919 int_col = 10
920
921
922 H_dim = 'w1'
923
924
925 num = 0
926 for line in file_data:
927
928 try:
929 intensity = float(line[int_col])
930 except ValueError:
931 num = num + 1
932 except IndexError:
933 num = num + 1
934 else:
935 break
936 print(("Number of header lines found: " + repr(num)))
937
938
939 file_data = file_data[num:]
940
941
942 file_data = strip(file_data)
943
944
945 for line in file_data:
946
947 if line[w1_col] == proton and line[w2_col] == heteronuc:
948
949 H_dim = 'w1'
950
951
952 print("The proton dimension is w1")
953
954
955 break
956
957
958 if line[w1_col] == heteronuc and line[w2_col] == proton:
959
960 H_dim = 'w2'
961
962
963 print("The proton dimension is w2")
964
965
966 break
967
968
969 data = []
970 for line in file_data:
971
972 if line[w1_col] == 'inv.':
973 continue
974
975
976 try:
977 res_num = int(line[5])
978 except:
979 raise RelaxError("Improperly formatted XEasy file.")
980
981
982 if H_dim == 'w1':
983 h_name = line[w1_col]
984 x_name = line[w2_col]
985 else:
986 x_name = line[w1_col]
987 h_name = line[w2_col]
988
989
990 try:
991 intensity = float(line[int_col])
992 except ValueError:
993 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
994
995
996 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
997
998
999 data.append([h_name, x_name, spin_id, intensity, line])
1000
1001
1002 return data
1003
1004
1005 -def read(file=None, dir=None, spectrum_id=None, heteronuc=None, proton=None, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None):
1006 """Read the peak intensity data.
1007
1008 @keyword file: The name of the file containing the peak intensities.
1009 @type file: str
1010 @keyword dir: The directory where the file is located.
1011 @type dir: str
1012 @keyword spectrum_id: The spectrum identification string.
1013 @type spectrum_id: str
1014 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
1015 @type heteronuc: str
1016 @keyword proton: The name of the proton as specified in the peak intensity file.
1017 @type proton: str
1018 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
1019 @type int_col: int
1020 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
1021 @type int_method: str
1022 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
1023 @type spin_id_col: int or None
1024 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1025 @type mol_name_col: int or None
1026 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1027 @type res_name_col: int or None
1028 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1029 @type res_num_col: int or None
1030 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1031 @type spin_name_col: int or None
1032 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1033 @type spin_num_col: int or None
1034 @keyword sep: The column separator which, if None, defaults to whitespace.
1035 @type sep: str or None
1036 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
1037 @type spin_id: None or str
1038 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
1039 @type ncproc: int or None
1040 """
1041
1042
1043 pipes.test()
1044
1045
1046 if not exists_mol_res_spin_data():
1047 raise RelaxNoSequenceError
1048
1049
1050 if file == None:
1051 raise RelaxError("The file name must be supplied.")
1052
1053
1054 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
1055 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
1056
1057
1058 if not int_method in ['height', 'point sum', 'other']:
1059 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
1060
1061
1062 cdp.int_method = int_method
1063
1064
1065 file_data = extract_data(file, dir, sep=sep)
1066
1067
1068 format = autodetect_format(file_data)
1069
1070
1071 if format == 'generic':
1072
1073 print("Generic formatted data file.\n")
1074
1075
1076 intensity_data = intensity_generic(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=int_col, sep=sep, spin_id=spin_id)
1077
1078
1079 elif format == 'nmrview':
1080
1081 print("NMRView formatted data file.\n")
1082
1083
1084 intensity_data = intensity_nmrview(file_data=file_data)
1085
1086
1087 elif format == 'sparky':
1088
1089 print("Sparky formatted data file.\n")
1090
1091
1092 intensity_data = intensity_sparky(file_data=file_data, int_col=int_col)
1093
1094
1095 elif format == 'xeasy':
1096
1097 print("XEasy formatted data file.\n")
1098
1099
1100 intensity_data = intensity_xeasy(file_data=file_data, proton=proton, heteronuc=heteronuc, int_col=int_col)
1101
1102
1103 if not hasattr(cdp, 'spectrum_ids'):
1104 cdp.spectrum_ids = []
1105 if ncproc != None:
1106 cdp.ncproc = {}
1107 if spectrum_id in cdp.spectrum_ids:
1108 raise RelaxError("The spectrum identification string '%s' already exists." % spectrum_id)
1109 else:
1110 cdp.spectrum_ids.append(spectrum_id)
1111 if ncproc != None:
1112 cdp.ncproc[spectrum_id] = ncproc
1113
1114
1115 data_flag = False
1116 for i in xrange(len(intensity_data)):
1117
1118 H_name, X_name, spin_id, intensity, line = intensity_data[i]
1119
1120
1121 if (X_name and X_name != heteronuc) or (H_name and H_name != proton):
1122 warn(RelaxWarning("Proton and heteronucleus names do not match, skipping the data %s." % line))
1123 continue
1124
1125
1126 spin = return_spin(spin_id)
1127 if not spin:
1128 warn(RelaxNoSpinWarning(spin_id))
1129 continue
1130
1131
1132 if not spin.select:
1133 continue
1134
1135
1136 if not hasattr(spin, 'intensities'):
1137 spin.intensities = {}
1138
1139
1140 if ncproc != None:
1141 intensity = intensity / float(2**ncproc)
1142
1143
1144 spin.intensities[spectrum_id] = intensity
1145
1146
1147 data_flag = True
1148
1149
1150 if not data_flag:
1151
1152 delete(spectrum_id)
1153
1154
1155 raise RelaxError("No data could be loaded from the peak list")
1156
1157
1159 """Set which spectra are replicates.
1160
1161 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
1162 @type spectrum_ids: list of str
1163 """
1164
1165
1166 pipes.test()
1167
1168
1169 if not hasattr(cdp, 'spectrum_ids'):
1170 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
1171
1172
1173 for spectrum_id in spectrum_ids:
1174 if spectrum_id not in cdp.spectrum_ids:
1175 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
1176
1177
1178 if spectrum_ids == None:
1179 warn(RelaxWarning("The spectrum ID list cannot be None."))
1180 return
1181
1182
1183 if len(spectrum_ids) == 1:
1184 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids))
1185 return
1186
1187
1188 if not hasattr(cdp, 'replicates'):
1189 cdp.replicates = []
1190
1191
1192 found = False
1193 for i in xrange(len(cdp.replicates)):
1194
1195 for j in xrange(len(spectrum_ids)):
1196 if spectrum_ids[j] in cdp.replicates[i]:
1197 found = True
1198
1199
1200 if found:
1201
1202 for j in xrange(len(spectrum_ids)):
1203 if spectrum_ids[j] not in cdp.replicates[i]:
1204 cdp.replicates[i].append(spectrum_ids[j])
1205
1206
1207 return
1208
1209
1210 cdp.replicates.append(spectrum_ids)
1211
1212
1214 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
1215
1216 @return: The dictionary of flags of whether the spectrum is replicated or not.
1217 @rtype: dict of bool
1218 """
1219
1220
1221 repl = {}
1222 for id in cdp.spectrum_ids:
1223 repl[id] = False
1224
1225
1226 for i in range(len(cdp.replicates)):
1227 for j in range(len(cdp.replicates[i])):
1228 repl[cdp.replicates[i][j]] = True
1229
1230
1231 return repl
1232
1233
1235 """Create and return a list of spectra ID which are replicates of the given ID.
1236
1237 @param spectrum_id: The spectrum ID to find all the replicates of.
1238 @type spectrum_id: str
1239 @return: The list of spectrum IDs which are replicates of spectrum_id.
1240 @rtype: list of str
1241 """
1242
1243
1244 repl = []
1245
1246
1247 for i in range(len(cdp.replicates)):
1248
1249 if spectrum_id in cdp.replicates[i]:
1250
1251 for j in range(len(cdp.replicates[i])):
1252
1253 if spectrum_id == cdp.replicates[i][j]:
1254 continue
1255
1256
1257 repl.append(cdp.replicates[i][j])
1258
1259
1260 if repl == []:
1261 return repl
1262
1263
1264 repl.sort()
1265
1266
1267 id = repl[-1]
1268 for i in range(len(repl)-2, -1, -1):
1269
1270 if id == repl[i]:
1271 del repl[i]
1272
1273
1274 else:
1275 id = repl[i]
1276
1277
1278 return repl
1279