1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module containing functions for the handling of peak intensities."""
26
27
28
29 from math import sqrt
30 from re import split
31 import string
32 from warnings import warn
33
34
35 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, generate_spin_id_data_array, return_spin, spin_loop
36 from generic_fns import pipes
37 from relax_errors import RelaxArgNotNoneError, RelaxError, RelaxImplementError, RelaxNoSequenceError, RelaxNoSpectraError
38 from relax_io import extract_data, read_spin_data, strip
39 from relax_warnings import RelaxWarning, RelaxNoSpinWarning
40
41
42
44 - def __init__(self, dir=None, exp_type=None, file=None, UI='prompt', output_file=None):
45 """Function to import Bruker Protein Dynamic Center (PDC) files.
46
47 @param dir: The directory to save the new file in.
48 @type dir: str
49 @param file: The Bruker PDC output file.
50 @type file: str
51 @param exp_type: The type of experiment, e.g. NOE, T1 or T2
52 @type exp_type: str
53 @param UI: The relax user interface (either 'prompt' or 'GUI').
54 @type UI: str
55 @param output_file: The file to save the imported list.
56 @type output_file: str
57 """
58
59
60 self.exp_type = exp_type
61
62
63 self.output_file = output_file
64 self.dir = dir
65
66
67 self.UI = UI
68
69
70 if file:
71 self.read_file(file)
72 else:
73 raise RelaxError('No file selected.')
74
75
76 self.collect_entries()
77
78
79 self.create_file()
80
81
83 """Function to allow to return a value."""
84 return str(self.value)
85
86
88 """Function to collect entries for the NOE/R1/R2 relax dummy file."""
89
90
91
92 self.file_entries = []
93
94
95 is_data = False
96
97
98 spinname = 'N'
99
100
101 for line in range(0, len(self.entries)):
102
103 if 'Project:' in self.entries[line][0]:
104 exp_type = ''
105
106
107 if 'Dynamic method/Hetero NOE' in self.entries[line][1]:
108 exp_type = 'NOE'
109
110 elif 'Dynamic method/T1' in self.entries[line][1]:
111 exp_type = 'T1'
112
113 elif 'Dynamic method/T2' in self.entries[line][1]:
114 exp_type = 'T2'
115
116
117 if self.exp_type:
118 if not self.exp_type == exp_type:
119 raise RelaxError('Selected file is not a '+self.exp_type+'-file.')
120 return
121
122
123 print "Reading BRUKER PDC "+exp_type+" file.\n"
124
125
126 if 'SECTION:' in self.entries[line][0]:
127
128 if 'results' in self.entries[line][1]:
129 is_data = True
130 continue
131
132
133 else:
134 is_data = False
135
136
137 if 'Labelling:' in self.entries[line][0]:
138
139 if 'N' in self.entries[line][0]:
140 spinname = 'N'
141
142
143 if 'C' in self.entries[line][0]:
144 spinname = 'C'
145
146
147 if is_data:
148
149 if self.entries[line][0] in ['Peak name', '']:
150 continue
151
152
153 label_tmp = self.entries[line][0]
154 label_tmp = label_tmp.replace(' ', '')
155 label_tmp = label_tmp.replace('[', '')
156 label_tmp = label_tmp.replace(']', '')
157
158
159 resnum = label_tmp
160
161 start = 0
162 while resnum[start].isdigit()==False: start = start+1
163
164
165 end = start
166 try:
167 while resnum[end].isdigit()==True: end = end+1
168
169 except:
170 end = end
171
172
173 resnum = resnum[start:end]
174
175
176 resname = label_tmp[0:start]
177
178
179 spin_no = line
180
181
182 value_tmp = float(self.entries[line][3])
183
184
185 if exp_type in ['T1', 'T2']:
186 value_tmp = 1.0/value_tmp
187
188
189 error = float(self.entries[line][4])
190
191
192 self.file_entries.append(['Bruker_PDC_'+exp_type, resnum, resname, spin_no, spinname, value_tmp, error])
193
194
196 """Function to write the file."""
197
198
199 text = 'Mol_name\tRes_num\tRes_name\tSpin_num\tSpin_name\tValue\tError \n'
200
201 for line in range(0, len(self.file_entries)):
202
203 tmp_text = ''
204 for i in range(0, len(self.file_entries[line])):
205 tmp_text = tmp_text + str(self.file_entries[line][i])+'\t'
206
207
208 text = text+tmp_text + '\n'
209
210
211 if not self.UI == 'GUI':
212 print text
213
214
215 if self.output_file:
216 if self.dir:
217 file = open(self.dir+sep+self.output_file, 'w')
218 else:
219 file = open(self.output_file, 'w')
220
221 else:
222 file = DummyFileObject()
223
224
225 file.write(text)
226 file.close()
227
228
229 if self.output_file:
230 if self.dir:
231 print 'Created BRUKER PDC file '+self.dir+sep+self.output_file
232
233 self.value = self.dir+sep+self.output_file
234 else:
235 print 'Created BRUKER PDC file '+self.output_file
236
237 self.value = self.output_file
238 else:
239 print 'Created BRUKER PDC file.'
240
241 self.value = file
242
243
245 """Function to read the file."""
246
247
248 file = open(filename, 'r')
249
250
251 self.entries = []
252
253
254 for line in file:
255
256 entry = line
257 entry = line.strip()
258 entry = entry.split('\t')
259
260
261 self.entries.append(entry)
262
263
264 file.close()
265
266
268 """Calculate the errors for peak heights when no spectra are replicated."""
269
270
271 for spin, spin_id in spin_loop(return_id=True):
272
273 if not spin.select:
274 continue
275
276
277 if not hasattr(spin, 'intensities'):
278 continue
279
280
281 if not hasattr(spin, 'baseplane_rmsd'):
282 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
283
284
285 spin.intensity_err = spin.baseplane_rmsd
286
287
289 """Calculate the errors for peak intensities from replicated spectra.
290
291 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
292 @type verbosity: int
293 """
294
295
296 repl = replicated_flags()
297
298
299 if False in repl.values():
300 all_repl = False
301 print("All spectra replicated: No.")
302 else:
303 all_repl = True
304 print("All spectra replicated: Yes.")
305
306
307 cdp.sigma_I = {}
308 cdp.var_I = {}
309
310
311 for id in cdp.spectrum_ids:
312
313 if not repl[id]:
314 continue
315
316
317 if cdp.var_I.has_key(id) and cdp.var_I[id] != 0.0:
318 continue
319
320
321 for j in xrange(len(cdp.replicates)):
322 if id in cdp.replicates[j]:
323 spectra = cdp.replicates[j]
324
325
326 num_spectra = len(spectra)
327
328
329 print(("\nReplicated spectra: " + repr(spectra)))
330 if verbosity:
331 print(("%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD")))
332
333
334 count = 0
335 for spin in spin_loop():
336
337 if not spin.select:
338 continue
339
340
341 if not hasattr(spin, 'intensities'):
342 spin.select = False
343 continue
344
345
346 missing = False
347 for j in xrange(num_spectra):
348 if not spin.intensities.has_key(spectra[j]):
349 missing = True
350 if missing:
351 continue
352
353
354 ave_intensity = 0.0
355 for j in xrange(num_spectra):
356 ave_intensity = ave_intensity + spin.intensities[spectra[j]]
357 ave_intensity = ave_intensity / num_spectra
358
359
360 SSE = 0.0
361 for j in xrange(num_spectra):
362 SSE = SSE + (spin.intensities[spectra[j]] - ave_intensity) ** 2
363
364
365
366
367
368
369
370 var_I = 1.0 / (num_spectra - 1.0) * SSE
371
372
373 if verbosity:
374 print(("%-5i%-6s%-20s%-20s" % (spin.num, spin.name, repr(ave_intensity), repr(var_I))))
375
376
377 if not cdp.var_I.has_key(id):
378 cdp.var_I[id] = 0.0
379 cdp.var_I[id] = cdp.var_I[id] + var_I
380 count = count + 1
381
382
383 if not count:
384 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
385
386
387 cdp.var_I[id] = cdp.var_I[id] / float(count)
388
389
390 for j in range(num_spectra):
391 cdp.var_I[spectra[j]] = cdp.var_I[id]
392
393
394 print(("Standard deviation: %s" % sqrt(cdp.var_I[id])))
395
396
397
398 if not all_repl:
399
400 print("\nVariance averaging over all spectra.")
401
402
403 var_I = 0.0
404 num_dups = 0
405
406
407 for id in cdp.var_I.keys():
408
409 if cdp.var_I[id] == 0.0:
410 continue
411
412
413 var_I = var_I + cdp.var_I[id]
414 num_dups = num_dups + 1
415
416
417 var_I = var_I / float(num_dups)
418
419
420 for id in cdp.spectrum_ids:
421 cdp.var_I[id] = var_I
422
423
424 print(("Standard deviation for all spins: " + repr(sqrt(var_I))))
425
426
427 for id in cdp.var_I.keys():
428
429 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
430
431
432 for spin in spin_loop():
433
434 if not spin.select:
435 continue
436
437
438 spin.intensity_err = cdp.sigma_I
439
440
442 """Calculate the errors for peak volumes when no spectra are replicated."""
443
444
445 for spin, spin_id in spin_loop(return_id=True):
446
447 if not spin.select:
448 continue
449
450
451 if not hasattr(spin, 'intensities'):
452 continue
453
454
455 if not hasattr(spin, 'baseplane_rmsd'):
456 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
457
458
459 if not hasattr(spin, 'N'):
460 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
461
462
463 for key in spin.intensity.keys():
464 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
465
466
493
494
496 """Set the peak intensity errors, as defined as the baseplane RMSD.
497
498 @param error: The peak intensity error value defined as the RMSD of the base plane
499 noise.
500 @type error: float
501 @keyword spectrum_id: The spectrum id.
502 @type spectrum_id: str
503 @param spin_id: The spin identification string.
504 @type spin_id: str
505 """
506
507
508 pipes.test()
509
510
511 if not exists_mol_res_spin_data():
512 raise RelaxNoSequenceError
513
514
515 if spectrum_id not in cdp.spectrum_ids:
516 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
517
518
519 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
520 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
521 else:
522 scale = 1.0
523
524
525 for spin in spin_loop(spin_id):
526
527 if not spin.select:
528 continue
529
530
531 if not hasattr(spin, 'baseplane_rmsd'):
532 spin.baseplane_rmsd = {}
533
534
535 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
536
537
539 """Delete spectral data corresponding to the spectrum ID.
540
541 @keyword spectrum_id: The spectrum ID string.
542 @type spectrum_id: str
543 """
544
545
546 pipes.test()
547
548
549 if not exists_mol_res_spin_data():
550 raise RelaxNoSequenceError
551
552
553 if not hasattr(cdp, 'spectrum_ids') or spectrum_id not in cdp.spectrum_ids:
554 raise RelaxNoSpectraError(spectrum_id)
555
556
557 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
558
559
560 if hasattr(cdp, 'ncproc') and cdp.ncproc.has_key(spectrum_id):
561 del cdp.ncproc[spectrum_id]
562
563
564 if hasattr(cdp, 'replicates'):
565
566 for i in range(len(cdp.replicates)):
567
568 if spectrum_id in cdp.replicates[i]:
569
570 if len(cdp.replicates[i]) == 2:
571 cdp.replicates.pop(i)
572
573
574 else:
575 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
576
577
578 break
579
580
581 if hasattr(cdp, 'sigma_I') and cdp.sigma_I.has_key(spectrum_id):
582 del cdp.sigma_I[spectrum_id]
583 if hasattr(cdp, 'var_I') and cdp.var_I.has_key(spectrum_id):
584 del cdp.var_I[spectrum_id]
585
586
587 for spin in spin_loop():
588
589 if hasattr(spin, 'intensities') and spin.intensities.has_key(spectrum_id):
590 del spin.intensities[spectrum_id]
591
592
594 """Determine the peak intensity standard deviation."""
595
596
597 pipes.test()
598
599
600 if not exists_mol_res_spin_data():
601 raise RelaxNoSequenceError
602
603
604 if not hasattr(cdp, 'spectrum_ids'):
605 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
606
607
608 if cdp.int_method == 'height':
609
610 print("Intensity measure: Peak heights.")
611
612
613 if hasattr(cdp, 'replicates'):
614
615 print("Replicated spectra: Yes.")
616
617
618 __errors_repl()
619
620
621 else:
622
623 print("Replicated spectra: No.")
624
625
626 __errors_height_no_repl()
627
628
629 if cdp.int_method == 'point sum':
630
631 print("Intensity measure: Peak volumes.")
632
633
634 if hasattr(cdp, 'replicates'):
635
636 print("Replicated spectra: Yes.")
637
638
639 __errors_repl()
640
641
642 else:
643
644 print("Replicated spectra: No.")
645
646
647 __errors_repl()
648
649
650 -def intensity_generic(file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None):
651 """Return the process data from the generic peak intensity file.
652
653 The residue number, heteronucleus and proton names, and peak intensity will be returned.
654
655
656 @keyword file_data: The data extracted from the file converted into a list of lists.
657 @type file_data: list of lists of str
658 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity
659 file format). If supplied, the mol_name_col, res_name_col, res_num_col,
660 spin_name_col, and spin_num_col arguments must be none.
661 @type spin_id_col: int or None
662 @keyword mol_name_col: The column containing the molecule name information (used by the generic
663 intensity file format). If supplied, spin_id_col must be None.
664 @type mol_name_col: int or None
665 @keyword res_name_col: The column containing the residue name information (used by the generic
666 intensity file format). If supplied, spin_id_col must be None.
667 @type res_name_col: int or None
668 @keyword res_num_col: The column containing the residue number information (used by the
669 generic intensity file format). If supplied, spin_id_col must be None.
670 @type res_num_col: int or None
671 @keyword spin_name_col: The column containing the spin name information (used by the generic
672 intensity file format). If supplied, spin_id_col must be None.
673 @type spin_name_col: int or None
674 @keyword spin_num_col: The column containing the spin number information (used by the generic
675 intensity file format). If supplied, spin_id_col must be None.
676 @type spin_num_col: int or None
677 @keyword data_col: The column containing the peak intensities.
678 @type data_col: int
679 @keyword sep: The column separator which, if None, defaults to whitespace.
680 @type sep: str or None
681 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all
682 spins.
683 @type spin_id: None or str
684 @raises RelaxError: When the expected peak intensity is not a float.
685 @return: The extracted data as a list of lists. The first dimension corresponds
686 to the spin. The second dimension consists of the proton name,
687 heteronucleus name, spin ID string, and the intensity value.
688 @rtype: list of lists of str, str, str, float
689 """
690
691
692 file_data = strip(file_data)
693
694
695 data = []
696 for id, value in read_spin_data(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, sep=sep, spin_id=spin_id):
697 data.append([None, None, id, value, id])
698
699
700 return data
701
702
704 """Return the process data from the NMRView peak intensity file.
705
706 The residue number, heteronucleus and proton names, and peak intensity will be returned.
707
708
709 @keyword file_data: The data extracted from the file converted into a list of lists.
710 @type file_data: list of lists of str
711 @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value.
712 @type int_col: int
713 @raises RelaxError: When the expected peak intensity is not a float.
714 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text
715 @rtype: list of lists of str, str, str, float, str
716 """
717
718
719 num = 6
720 print(("Number of header lines: " + repr(num)))
721
722
723 file_data = file_data[num:]
724
725
726 file_data = strip(file_data)
727
728
729 if int_col == None:
730 int_col = 16
731 if int_col == 16:
732 print('Using peak heights.')
733 if int_col == 15:
734 print('Using peak volumes (or evolumes).')
735
736
737 data = []
738 for line in file_data:
739
740 if line[1] == '{}':
741 warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1]))
742 continue
743
744
745 res_num = ''
746 try:
747 res_num = string.strip(line[1], '{')
748 res_num = string.strip(res_num, '}')
749 res_num = string.split(res_num, '.')
750 res_num = res_num[0]
751 except ValueError:
752 raise RelaxError("The peak list is invalid.")
753
754
755 x_name = ''
756 if line[8]!='{}':
757 x_name = string.strip(line[8], '{')
758 x_name = string.strip(x_name, '}')
759 x_name = string.split(x_name, '.')
760 x_name = x_name[1]
761 h_name = ''
762 if line[1]!='{}':
763 h_name = string.strip(line[1], '{')
764 h_name = string.strip(h_name, '}')
765 h_name = string.split(h_name, '.')
766 h_name = h_name[1]
767
768
769 try:
770 intensity = float(line[int_col])
771 except ValueError:
772 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
773
774
775 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
776
777
778 data.append([h_name, x_name, spin_id, intensity, line])
779
780
781 return data
782
783
785 """Return the process data from the Sparky peak intensity file.
786
787 The residue number, heteronucleus and proton names, and peak intensity will be returned.
788
789
790 @keyword file_data: The data extracted from the file converted into a list of lists.
791 @type file_data: list of lists of str
792 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
793 @type int_col: int
794 @raises RelaxError: When the expected peak intensity is not a float.
795 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
796 @rtype: list of lists of str, str, str, float, str
797 """
798
799
800 num = 0
801 if file_data[0][0] == 'Assignment':
802 num = num + 1
803 if file_data[1] == '':
804 num = num + 1
805 print("Number of header lines found: %s" % num)
806
807
808 file_data = file_data[num:]
809
810
811 file_data = strip(file_data)
812
813
814 data = []
815 for line in file_data:
816
817 assignment = ''
818 res_num = ''
819 h_name = ''
820 x_name = ''
821 intensity = ''
822
823
824 if line[0] == '?-?':
825 continue
826
827
828 x_assign, h_assign = split('-', line[0])
829
830
831 h_name = split('([A-Z]+)', h_assign)[-2]
832
833
834 x_row = split('([A-Z]+)', x_assign)
835 x_name = x_row[-2]
836
837
838 try:
839 res_num = int(x_row[-3])
840 except:
841 raise RelaxError("Improperly formatted Sparky file.")
842
843
844 if int_col == None:
845 int_col = 3
846
847
848 try:
849 intensity = float(line[int_col])
850 except ValueError:
851 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
852
853
854 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
855
856
857 data.append([h_name, x_name, spin_id, intensity, line])
858
859
860 return data
861
862
863 -def intensity_xeasy(file_data=None, heteronuc=None, proton=None, int_col=None):
864 """Return the process data from the XEasy peak intensity file.
865
866 The residue number, heteronucleus and proton names, and peak intensity will be returned.
867
868
869 @keyword file_data: The data extracted from the file converted into a list of lists.
870 @type file_data: list of lists of str
871 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
872 @type heteronuc: str
873 @keyword proton: The name of the proton as specified in the peak intensity file.
874 @type proton: str
875 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
876 @type int_col: int
877 @raises RelaxError: When the expected peak intensity is not a float.
878 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
879 @rtype: list of lists of str, str, str, float, str
880 """
881
882
883 w1_col = 4
884 w2_col = 7
885 if int_col == None:
886 int_col = 10
887
888
889 H_dim = 'w1'
890
891
892 num = 0
893 for line in file_data:
894
895 try:
896 intensity = float(line[int_col])
897 except ValueError:
898 num = num + 1
899 except IndexError:
900 num = num + 1
901 else:
902 break
903 print(("Number of header lines found: " + repr(num)))
904
905
906 file_data = file_data[num:]
907
908
909 file_data = strip(file_data)
910
911
912 for line in file_data:
913
914 if line[w1_col] == proton and line[w2_col] == heteronuc:
915
916 H_dim = 'w1'
917
918
919 print("The proton dimension is w1")
920
921
922 break
923
924
925 if line[w1_col] == heteronuc and line[w2_col] == proton:
926
927 H_dim = 'w2'
928
929
930 print("The proton dimension is w2")
931
932
933 break
934
935
936 data = []
937 for line in file_data:
938
939 if line[w1_col] == 'inv.':
940 continue
941
942
943 try:
944 res_num = int(line[5])
945 except:
946 raise RelaxError("Improperly formatted XEasy file.")
947
948
949 if H_dim == 'w1':
950 h_name = line[w1_col]
951 x_name = line[w2_col]
952 else:
953 x_name = line[w1_col]
954 h_name = line[w2_col]
955
956
957 try:
958 intensity = float(line[int_col])
959 except ValueError:
960 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
961
962
963 spin_id = generate_spin_id(res_num=res_num, spin_name=x_name)
964
965
966 data.append([h_name, x_name, spin_id, intensity, line])
967
968
969 return data
970
971
972 -def read(file=None, dir=None, spectrum_id=None, heteronuc=None, proton=None, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None):
973 """Read the peak intensity data.
974
975 @keyword file: The name of the file containing the peak intensities.
976 @type file: str
977 @keyword dir: The directory where the file is located.
978 @type dir: str
979 @keyword spectrum_id: The spectrum identification string.
980 @type spectrum_id: str
981 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
982 @type heteronuc: str
983 @keyword proton: The name of the proton as specified in the peak intensity file.
984 @type proton: str
985 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
986 @type int_col: int
987 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
988 @type int_method: str
989 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
990 @type spin_id_col: int or None
991 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
992 @type mol_name_col: int or None
993 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
994 @type res_name_col: int or None
995 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
996 @type res_num_col: int or None
997 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
998 @type spin_name_col: int or None
999 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1000 @type spin_num_col: int or None
1001 @keyword sep: The column separator which, if None, defaults to whitespace.
1002 @type sep: str or None
1003 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
1004 @type spin_id: None or str
1005 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
1006 @type ncproc: int or None
1007 """
1008
1009
1010 pipes.test()
1011
1012
1013 if not exists_mol_res_spin_data():
1014 raise RelaxNoSequenceError
1015
1016
1017 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
1018 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
1019
1020
1021 if not int_method in ['height', 'point sum', 'other']:
1022 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
1023
1024
1025 cdp.int_method = int_method
1026
1027
1028 file_data = extract_data(file, dir, sep=sep)
1029
1030
1031 format = autodetect_format(file_data)
1032
1033
1034 if format == 'generic':
1035
1036 print("Generic formatted data file.\n")
1037
1038
1039 intensity_data = intensity_generic(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=int_col, sep=sep, spin_id=spin_id)
1040
1041
1042 elif format == 'nmrview':
1043
1044 print("NMRView formatted data file.\n")
1045
1046
1047 intensity_data = intensity_nmrview(file_data=file_data)
1048
1049
1050 elif format == 'sparky':
1051
1052 print("Sparky formatted data file.\n")
1053
1054
1055 intensity_data = intensity_sparky(file_data=file_data, int_col=int_col)
1056
1057
1058 elif format == 'xeasy':
1059
1060 print("XEasy formatted data file.\n")
1061
1062
1063 intensity_data = intensity_xeasy(file_data=file_data, proton=proton, heteronuc=heteronuc, int_col=int_col)
1064
1065
1066 if not hasattr(cdp, 'spectrum_ids'):
1067 cdp.spectrum_ids = []
1068 if ncproc != None:
1069 cdp.ncproc = {}
1070 if spectrum_id in cdp.spectrum_ids:
1071 raise RelaxError("The spectrum identification string '%s' already exists." % spectrum_id)
1072 else:
1073 cdp.spectrum_ids.append(spectrum_id)
1074 if ncproc != None:
1075 cdp.ncproc[spectrum_id] = ncproc
1076
1077
1078 data_flag = False
1079 for i in xrange(len(intensity_data)):
1080
1081 H_name, X_name, spin_id, intensity, line = intensity_data[i]
1082
1083
1084 if (X_name and X_name != heteronuc) or (H_name and H_name != proton):
1085 warn(RelaxWarning("Proton and heteronucleus names do not match, skipping the data %s." % line))
1086 continue
1087
1088
1089 spin = return_spin(spin_id)
1090 if not spin:
1091 warn(RelaxNoSpinWarning(spin_id))
1092 continue
1093
1094
1095 if not spin.select:
1096 continue
1097
1098
1099 if not hasattr(spin, 'intensities'):
1100 spin.intensities = {}
1101
1102
1103 if ncproc != None:
1104 intensity = intensity / float(2**ncproc)
1105
1106
1107 spin.intensities[spectrum_id] = intensity
1108
1109
1110 data_flag = True
1111
1112
1113 if not data_flag:
1114
1115 delete(spectrum_id)
1116
1117
1118 raise RelaxError("No data could be loaded from the peak list")
1119
1120
1122 """Set which spectra are replicates.
1123
1124 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
1125 @type spectrum_ids: list of str
1126 """
1127
1128
1129 pipes.test()
1130
1131
1132 if not hasattr(cdp, 'spectrum_ids'):
1133 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
1134
1135
1136 for spectrum_id in spectrum_ids:
1137 if spectrum_id not in cdp.spectrum_ids:
1138 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
1139
1140
1141 if not hasattr(cdp, 'replicates'):
1142 cdp.replicates = []
1143
1144
1145 found = False
1146 for i in xrange(len(cdp.replicates)):
1147
1148 for j in xrange(len(spectrum_ids)):
1149 if spectrum_ids[j] in cdp.replicates[i]:
1150 found = True
1151
1152
1153 if found:
1154
1155 for j in xrange(len(spectrum_ids)):
1156 if spectrum_ids[j] not in cdp.replicates[i]:
1157 cdp.replicates[i].append(spectrum_ids[j])
1158
1159
1160 return
1161
1162
1163 cdp.replicates.append(spectrum_ids)
1164
1165
1167 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
1168
1169 @return: The dictionary of flags of whether the spectrum is replicated or not.
1170 @rtype: dict of bool
1171 """
1172
1173
1174 repl = {}
1175 for id in cdp.spectrum_ids:
1176 repl[id] = False
1177
1178
1179 for i in range(len(cdp.replicates)):
1180 for j in range(len(cdp.replicates[i])):
1181 repl[cdp.replicates[i][j]] = True
1182
1183
1184 return repl
1185
1186
1188 """Create and return a list of spectra ID which are replicates of the given ID.
1189
1190 @param spectrum_id: The spectrum ID to find all the replicates of.
1191 @type spectrum_id: str
1192 @return: The list of spectrum IDs which are replicates of spectrum_id.
1193 @rtype: list of str
1194 """
1195
1196
1197 repl = []
1198
1199
1200 for i in range(len(cdp.replicates)):
1201
1202 if spectrum_id in cdp.replicates[i]:
1203
1204 for j in range(len(cdp.replicates[i])):
1205
1206 if spectrum_id == cdp.replicates[i][j]:
1207 continue
1208
1209
1210 repl.append(cdp.replicates[i][j])
1211
1212
1213 repl.sort()
1214
1215
1216 id = repl[-1]
1217 for i in range(len(repl)-2, -1, -1):
1218
1219 if id == repl[i]:
1220 del repl[i]
1221
1222
1223 else:
1224 id = repl[i]
1225
1226
1227 return repl
1228