1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module containing functions for the handling of peak intensities."""
25
26
27
28 from math import sqrt
29 from re import split
30 import sys
31 from warnings import warn
32
33
34 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id_unique, return_spin, spin_loop
35 from generic_fns import pipes
36 from relax_errors import RelaxArgNotNoneError, RelaxError, RelaxImplementError, RelaxNoSequenceError, RelaxNoSpectraError
37 from relax_io import extract_data, read_spin_data, strip, write_data
38 from relax_warnings import RelaxWarning, RelaxNoSpinWarning
39
40
41
43 - def __init__(self, dir=None, exp_type=None, file=None, UI='prompt', output_file=None):
44 """Function to import Bruker Protein Dynamic Center (PDC) files.
45
46 @param dir: The directory to save the new file in.
47 @type dir: str
48 @param file: The Bruker PDC output file.
49 @type file: str
50 @param exp_type: The type of experiment, e.g. NOE, T1 or T2
51 @type exp_type: str
52 @param UI: The relax user interface (either 'prompt' or 'GUI').
53 @type UI: str
54 @param output_file: The file to save the imported list.
55 @type output_file: str
56 """
57
58
59 self.exp_type = exp_type
60
61
62 self.output_file = output_file
63 self.dir = dir
64
65
66 self.UI = UI
67
68
69 if file:
70 self.read_file(file)
71 else:
72 raise RelaxError('No file selected.')
73
74
75 self.collect_entries()
76
77
78 self.create_file()
79
80
82 """Function to allow to return a value."""
83 return str(self.value)
84
85
87 """Function to collect entries for the NOE/R1/R2 relax dummy file."""
88
89
90
91 self.file_entries = []
92
93
94 is_data = False
95
96
97 spinname = 'N'
98
99
100 for line in range(0, len(self.entries)):
101
102 if 'Project:' in self.entries[line][0]:
103 exp_type = ''
104
105
106 if 'Dynamic method/Hetero NOE' in self.entries[line][1]:
107 exp_type = 'NOE'
108
109 elif 'Dynamic method/T1' in self.entries[line][1]:
110 exp_type = 'T1'
111
112 elif 'Dynamic method/T2' in self.entries[line][1]:
113 exp_type = 'T2'
114
115
116 if self.exp_type:
117 if not self.exp_type == exp_type:
118 raise RelaxError('Selected file is not a '+self.exp_type+'-file.')
119 return
120
121
122 print("Reading BRUKER PDC "+exp_type+" file.\n")
123
124
125 if 'SECTION:' in self.entries[line][0]:
126
127 if 'results' in self.entries[line][1]:
128 is_data = True
129 continue
130
131
132 else:
133 is_data = False
134
135
136 if 'Labelling:' in self.entries[line][0]:
137
138 if 'N' in self.entries[line][0]:
139 spinname = 'N'
140
141
142 if 'C' in self.entries[line][0]:
143 spinname = 'C'
144
145
146 if is_data:
147
148 if self.entries[line][0] in ['Peak name', '']:
149 continue
150
151
152 label_tmp = self.entries[line][0]
153 label_tmp = label_tmp.replace(' ', '')
154 label_tmp = label_tmp.replace('[', '')
155 label_tmp = label_tmp.replace(']', '')
156
157
158 resnum = label_tmp
159
160 start = 0
161 while resnum[start].isdigit()==False: start = start+1
162
163
164 end = start
165 try:
166 while resnum[end].isdigit()==True: end = end+1
167
168 except:
169 end = end
170
171
172 resnum = resnum[start:end]
173
174
175 resname = label_tmp[0:start]
176
177
178 spin_no = line
179
180
181 value_tmp = float(self.entries[line][3])
182
183
184 if exp_type in ['T1', 'T2']:
185 value_tmp = 1.0/value_tmp
186
187
188 error = float(self.entries[line][4])
189
190
191 self.file_entries.append(['Bruker_PDC_'+exp_type, resnum, resname, spin_no, spinname, value_tmp, error])
192
193
195 """Function to write the file."""
196
197
198 text = 'Mol_name\tRes_num\tRes_name\tSpin_num\tSpin_name\tValue\tError \n'
199
200 for line in range(0, len(self.file_entries)):
201
202 tmp_text = ''
203 for i in range(0, len(self.file_entries[line])):
204 tmp_text = tmp_text + str(self.file_entries[line][i])+'\t'
205
206
207 text = text+tmp_text + '\n'
208
209
210 if not self.UI == 'GUI':
211 print(text)
212
213
214 if self.output_file:
215 if self.dir:
216 file = open(self.dir+sep+self.output_file, 'w')
217 else:
218 file = open(self.output_file, 'w')
219
220 else:
221 file = DummyFileObject()
222
223
224 file.write(text)
225 file.close()
226
227
228 if self.output_file:
229 if self.dir:
230 print('Created BRUKER PDC file '+self.dir+sep+self.output_file)
231
232 self.value = self.dir+sep+self.output_file
233 else:
234 print('Created BRUKER PDC file '+self.output_file)
235
236 self.value = self.output_file
237 else:
238 print('Created BRUKER PDC file.')
239
240 self.value = file
241
242
244 """Function to read the file."""
245
246
247 file = open(filename, 'r')
248
249
250 self.entries = []
251
252
253 for line in file:
254
255 entry = line
256 entry = line.strip()
257 entry = entry.split('\t')
258
259
260 self.entries.append(entry)
261
262
263 file.close()
264
265
267 """Calculate the errors for peak heights when no spectra are replicated."""
268
269
270 for spin, spin_id in spin_loop(return_id=True):
271
272 if not spin.select:
273 continue
274
275
276 if not hasattr(spin, 'intensities'):
277 continue
278
279
280 if not hasattr(spin, 'baseplane_rmsd'):
281 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
282
283
284 spin.intensity_err = spin.baseplane_rmsd
285
286
288 """Calculate the errors for peak intensities from replicated spectra.
289
290 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
291 @type verbosity: int
292 """
293
294
295 repl = replicated_flags()
296
297
298 if False in repl.values():
299 all_repl = False
300 print("All spectra replicated: No.")
301 else:
302 all_repl = True
303 print("All spectra replicated: Yes.")
304
305
306 cdp.sigma_I = {}
307 cdp.var_I = {}
308
309
310 for id in cdp.spectrum_ids:
311
312 if not repl[id]:
313 continue
314
315
316 if id in cdp.var_I and cdp.var_I[id] != 0.0:
317 continue
318
319
320 for j in range(len(cdp.replicates)):
321 if id in cdp.replicates[j]:
322 spectra = cdp.replicates[j]
323
324
325 num_spectra = len(spectra)
326
327
328 print("\nReplicated spectra: " + repr(spectra))
329 if verbosity:
330 print("%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD"))
331
332
333 count = 0
334 for spin in spin_loop():
335
336 if not spin.select:
337 continue
338
339
340 if not hasattr(spin, 'intensities'):
341 spin.select = False
342 continue
343
344
345 missing = False
346 for j in range(num_spectra):
347 if not spectra[j] in spin.intensities:
348 missing = True
349 if missing:
350 continue
351
352
353 ave_intensity = 0.0
354 for j in range(num_spectra):
355 ave_intensity = ave_intensity + spin.intensities[spectra[j]]
356 ave_intensity = ave_intensity / num_spectra
357
358
359 SSE = 0.0
360 for j in range(num_spectra):
361 SSE = SSE + (spin.intensities[spectra[j]] - ave_intensity) ** 2
362
363
364
365
366
367
368
369 var_I = 1.0 / (num_spectra - 1.0) * SSE
370
371
372 if verbosity:
373 print("%-5i%-6s%-20s%-20s" % (spin.num, spin.name, repr(ave_intensity), repr(var_I)))
374
375
376 if not id in cdp.var_I:
377 cdp.var_I[id] = 0.0
378 cdp.var_I[id] = cdp.var_I[id] + var_I
379 count = count + 1
380
381
382 if not count:
383 raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")
384
385
386 cdp.var_I[id] = cdp.var_I[id] / float(count)
387
388
389 for j in range(num_spectra):
390 cdp.var_I[spectra[j]] = cdp.var_I[id]
391
392
393 print("Standard deviation: %s" % sqrt(cdp.var_I[id]))
394
395
396
397 if not all_repl:
398
399 print("\nVariance averaging over all spectra.")
400
401
402 var_I = 0.0
403 num_dups = 0
404
405
406 for id in cdp.var_I.keys():
407
408 if cdp.var_I[id] == 0.0:
409 continue
410
411
412 var_I = var_I + cdp.var_I[id]
413 num_dups = num_dups + 1
414
415
416 var_I = var_I / float(num_dups)
417
418
419 for id in cdp.spectrum_ids:
420 cdp.var_I[id] = var_I
421
422
423 print("Standard deviation for all spins: " + repr(sqrt(var_I)))
424
425
426 for id in cdp.var_I.keys():
427
428 cdp.sigma_I[id] = sqrt(cdp.var_I[id])
429
430
431 for spin in spin_loop():
432
433 if not spin.select:
434 continue
435
436
437 spin.intensity_err = cdp.sigma_I
438
439
441 """Calculate the errors for peak volumes when no spectra are replicated."""
442
443
444 for spin, spin_id in spin_loop(return_id=True):
445
446 if not spin.select:
447 continue
448
449
450 if not hasattr(spin, 'intensities'):
451 continue
452
453
454 if not hasattr(spin, 'baseplane_rmsd'):
455 raise RelaxError("The RMSD of the base plane noise for spin '%s' has not been set." % spin_id)
456
457
458 if not hasattr(spin, 'N'):
459 raise RelaxError("The total number of points used in the volume integration has not been specified for spin '%s'." % spin_id)
460
461
462 for key in spin.intensity.keys():
463 spin.intensity_err[key] = spin.baseplane_rmsd[key] * sqrt(spin.N)
464
465
492
493
495 """Set the peak intensity errors, as defined as the baseplane RMSD.
496
497 @param error: The peak intensity error value defined as the RMSD of the base plane
498 noise.
499 @type error: float
500 @keyword spectrum_id: The spectrum id.
501 @type spectrum_id: str
502 @param spin_id: The spin identification string.
503 @type spin_id: str
504 """
505
506
507 pipes.test()
508
509
510 if not exists_mol_res_spin_data():
511 raise RelaxNoSequenceError
512
513
514 if spectrum_id not in cdp.spectrum_ids:
515 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
516
517
518 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
519 scale = 1.0 / 2**cdp.ncproc[spectrum_id]
520 else:
521 scale = 1.0
522
523
524 for spin in spin_loop(spin_id):
525
526 if not spin.select:
527 continue
528
529
530 if not hasattr(spin, 'baseplane_rmsd'):
531 spin.baseplane_rmsd = {}
532
533
534 spin.baseplane_rmsd[spectrum_id] = float(error) * scale
535
536
538 """Delete spectral data corresponding to the spectrum ID.
539
540 @keyword spectrum_id: The spectrum ID string.
541 @type spectrum_id: str
542 """
543
544
545 pipes.test()
546
547
548 if not exists_mol_res_spin_data():
549 raise RelaxNoSequenceError
550
551
552 if not hasattr(cdp, 'spectrum_ids') or spectrum_id not in cdp.spectrum_ids:
553 raise RelaxNoSpectraError(spectrum_id)
554
555
556 cdp.spectrum_ids.pop(cdp.spectrum_ids.index(spectrum_id))
557
558
559 if hasattr(cdp, 'ncproc') and spectrum_id in cdp.ncproc:
560 del cdp.ncproc[spectrum_id]
561
562
563 if hasattr(cdp, 'replicates'):
564
565 for i in range(len(cdp.replicates)):
566
567 if spectrum_id in cdp.replicates[i]:
568
569 if len(cdp.replicates[i]) == 2:
570 cdp.replicates.pop(i)
571
572
573 else:
574 cdp.replicates[i].pop(cdp.replicates[i].index(spectrum_id))
575
576
577 break
578
579
580 if hasattr(cdp, 'sigma_I') and spectrum_id in cdp.sigma_I:
581 del cdp.sigma_I[spectrum_id]
582 if hasattr(cdp, 'var_I') and spectrum_id in cdp.var_I:
583 del cdp.var_I[spectrum_id]
584
585
586 for spin in spin_loop():
587
588 if hasattr(spin, 'intensities') and spectrum_id in spin.intensities:
589 del spin.intensities[spectrum_id]
590
591
593 """Determine the peak intensity standard deviation."""
594
595
596 pipes.test()
597
598
599 if not exists_mol_res_spin_data():
600 raise RelaxNoSequenceError
601
602
603 if not hasattr(cdp, 'spectrum_ids'):
604 raise RelaxError("Error analysis is not possible, no spectra have been loaded.")
605
606
607 if cdp.int_method == 'height':
608
609 print("Intensity measure: Peak heights.")
610
611
612 if hasattr(cdp, 'replicates'):
613
614 print("Replicated spectra: Yes.")
615
616
617 __errors_repl()
618
619
620 else:
621
622 print("Replicated spectra: No.")
623
624
625 __errors_height_no_repl()
626
627
628 if cdp.int_method == 'point sum':
629
630 print("Intensity measure: Peak volumes.")
631
632
633 if hasattr(cdp, 'replicates'):
634
635 print("Replicated spectra: Yes.")
636
637
638 __errors_repl()
639
640
641 else:
642
643 print("Replicated spectra: No.")
644
645
646 raise RelaxImplementError
647
648
649 __errors_vol_no_repl()
650
651
653 """Return a list of all spectrum IDs.
654
655 @return: The list of spectrum IDs.
656 @rtype: list of str
657 """
658
659
660 if not hasattr(cdp, 'spectrum_ids'):
661 return []
662
663
664 return cdp.spectrum_ids
665
666
668 """Set the number of integration points for the given spectrum.
669
670 @param N: The number of integration points.
671 @type N: int
672 @keyword spectrum_id: The spectrum ID string.
673 @type spectrum_id: str
674 @keyword spin_id: The spin ID string used to restrict the value to.
675 @type spin_id: None or str
676 """
677
678 raise RelaxImplementError
679
680
681 -def intensity_generic(file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None):
682 """Return the process data from the generic peak intensity file.
683
684 The residue number, heteronucleus and proton names, and peak intensity will be returned.
685
686
687 @keyword file_data: The data extracted from the file converted into a list of lists.
688 @type file_data: list of lists of str
689 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity
690 file format). If supplied, the mol_name_col, res_name_col, res_num_col,
691 spin_name_col, and spin_num_col arguments must be none.
692 @type spin_id_col: int or None
693 @keyword mol_name_col: The column containing the molecule name information (used by the generic
694 intensity file format). If supplied, spin_id_col must be None.
695 @type mol_name_col: int or None
696 @keyword res_name_col: The column containing the residue name information (used by the generic
697 intensity file format). If supplied, spin_id_col must be None.
698 @type res_name_col: int or None
699 @keyword res_num_col: The column containing the residue number information (used by the
700 generic intensity file format). If supplied, spin_id_col must be None.
701 @type res_num_col: int or None
702 @keyword spin_name_col: The column containing the spin name information (used by the generic
703 intensity file format). If supplied, spin_id_col must be None.
704 @type spin_name_col: int or None
705 @keyword spin_num_col: The column containing the spin number information (used by the generic
706 intensity file format). If supplied, spin_id_col must be None.
707 @type spin_num_col: int or None
708 @keyword data_col: The column containing the peak intensities.
709 @type data_col: int
710 @keyword sep: The column separator which, if None, defaults to whitespace.
711 @type sep: str or None
712 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all
713 spins.
714 @type spin_id: None or str
715 @raises RelaxError: When the expected peak intensity is not a float.
716 @return: The extracted data as a list of lists. The first dimension corresponds
717 to the spin. The second dimension consists of the proton name,
718 heteronucleus name, spin ID string, and the intensity value.
719 @rtype: list of lists of str, str, str, float
720 """
721
722
723 file_data = strip(file_data)
724
725
726 data = []
727 for mol_name, res_num, res_name, spin_num, spin_name, value in read_spin_data(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, sep=sep, spin_id=spin_id):
728 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name)
729 data.append([None, None, id, value, id])
730
731
732 return data
733
734
736 """Return the process data from the NMRView peak intensity file.
737
738 The residue number, heteronucleus and proton names, and peak intensity will be returned.
739
740
741 @keyword file_data: The data extracted from the file converted into a list of lists.
742 @type file_data: list of lists of str
743 @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value.
744 @type int_col: int
745 @raises RelaxError: When the expected peak intensity is not a float.
746 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text
747 @rtype: list of lists of str, str, str, float, str
748 """
749
750
751 num = 6
752 print("Number of header lines: " + repr(num))
753
754
755 file_data = file_data[num:]
756
757
758 file_data = strip(file_data)
759
760
761 if int_col == None:
762 int_col = 16
763 if int_col == 16:
764 print('Using peak heights.')
765 if int_col == 15:
766 print('Using peak volumes (or evolumes).')
767
768
769 data = []
770 for line in file_data:
771
772 if line[1] == '{}':
773 warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1]))
774 continue
775
776
777 res_num = ''
778 try:
779 res_num = line[1].strip('{')
780 res_num = res_num.strip('}')
781 res_num = res_num.split('.')
782 res_num = res_num[0]
783 except ValueError:
784 raise RelaxError("The peak list is invalid.")
785
786
787 x_name = ''
788 if line[8]!='{}':
789 x_name = line[8].strip('{')
790 x_name = x_name.strip('}')
791 x_name = x_name.split('.')
792 x_name = x_name[1]
793 h_name = ''
794 if line[1]!='{}':
795 h_name = line[1].strip('{')
796 h_name = h_name.strip('}')
797 h_name = h_name.split('.')
798 h_name = h_name[1]
799
800
801 try:
802 intensity = float(line[int_col])
803 except ValueError:
804 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
805
806
807 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name)
808
809
810 data.append([h_name, x_name, spin_id, intensity, line])
811
812
813 return data
814
815
817 """Return the process data from the Sparky peak intensity file.
818
819 The residue number, heteronucleus and proton names, and peak intensity will be returned.
820
821
822 @keyword file_data: The data extracted from the file converted into a list of lists.
823 @type file_data: list of lists of str
824 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
825 @type int_col: int
826 @raises RelaxError: When the expected peak intensity is not a float.
827 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
828 @rtype: list of lists of str, str, str, float, str
829 """
830
831
832 num = 0
833 if file_data[0][0] == 'Assignment':
834 num = num + 1
835 if file_data[1] == '':
836 num = num + 1
837 print("Number of header lines found: %s" % num)
838
839
840 file_data = file_data[num:]
841
842
843 file_data = strip(file_data)
844
845
846 data = []
847 for line in file_data:
848
849 assignment = ''
850 res_num = ''
851 h_name = ''
852 x_name = ''
853 intensity = ''
854
855
856 if line[0] == '?-?':
857 continue
858
859
860 x_assign, h_assign = split('-', line[0])
861
862
863 h_row = split('([A-Z]+)', h_assign)
864 h_name = h_row[-2] + h_row[-1]
865
866
867 x_row = split('([A-Z]+)', x_assign)
868 x_name = x_row[-2] + x_row[-1]
869
870
871 try:
872 res_num = int(x_row[-3])
873 except:
874 raise RelaxError("Improperly formatted Sparky file.")
875
876
877 if int_col == None:
878 int_col = 3
879
880
881 try:
882 intensity = float(line[int_col])
883 except ValueError:
884 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
885
886
887 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name)
888
889
890 data.append([h_name, x_name, spin_id, intensity, line])
891
892
893 return data
894
895
896 -def intensity_xeasy(file_data=None, heteronuc=None, proton=None, int_col=None):
897 """Return the process data from the XEasy peak intensity file.
898
899 The residue number, heteronucleus and proton names, and peak intensity will be returned.
900
901
902 @keyword file_data: The data extracted from the file converted into a list of lists.
903 @type file_data: list of lists of str
904 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
905 @type heteronuc: str
906 @keyword proton: The name of the proton as specified in the peak intensity file.
907 @type proton: str
908 @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file).
909 @type int_col: int
910 @raises RelaxError: When the expected peak intensity is not a float.
911 @return: The extracted data as a list of lists. The first dimension corresponds to the spin. The second dimension consists of the proton name, heteronucleus name, spin ID string, the intensity value, and the original line of text.
912 @rtype: list of lists of str, str, str, float, str
913 """
914
915
916 w1_col = 4
917 w2_col = 7
918 if int_col == None:
919 int_col = 10
920
921
922 H_dim = 'w1'
923
924
925 num = 0
926 for line in file_data:
927
928 try:
929 intensity = float(line[int_col])
930 except ValueError:
931 num = num + 1
932 except IndexError:
933 num = num + 1
934 else:
935 break
936 print("Number of header lines found: " + repr(num))
937
938
939 file_data = file_data[num:]
940
941
942 file_data = strip(file_data)
943
944
945 for line in file_data:
946
947 if line[w1_col] == proton and line[w2_col] == heteronuc:
948
949 H_dim = 'w1'
950
951
952 print("The proton dimension is w1")
953
954
955 break
956
957
958 if line[w1_col] == heteronuc and line[w2_col] == proton:
959
960 H_dim = 'w2'
961
962
963 print("The proton dimension is w2")
964
965
966 break
967
968
969 data = []
970 for line in file_data:
971
972 if line[w1_col] == 'inv.':
973 continue
974
975
976 try:
977 res_num = int(line[5])
978 except:
979 raise RelaxError("Improperly formatted XEasy file.")
980
981
982 if H_dim == 'w1':
983 h_name = line[w1_col]
984 x_name = line[w2_col]
985 else:
986 x_name = line[w1_col]
987 h_name = line[w2_col]
988
989
990 try:
991 intensity = float(line[int_col])
992 except ValueError:
993 raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.")
994
995
996 spin_id = generate_spin_id_unique(res_num=res_num, spin_name=x_name)
997
998
999 data.append([h_name, x_name, spin_id, intensity, line])
1000
1001
1002 return data
1003
1004
1005 -def read(file=None, dir=None, spectrum_id=None, heteronuc=None, proton=None, int_col=None, int_method=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, sep=None, spin_id=None, ncproc=None, verbose=True):
1006 """Read the peak intensity data.
1007
1008 @keyword file: The name of the file containing the peak intensities.
1009 @type file: str
1010 @keyword dir: The directory where the file is located.
1011 @type dir: str
1012 @keyword spectrum_id: The spectrum identification string.
1013 @type spectrum_id: str
1014 @keyword heteronuc: The name of the heteronucleus as specified in the peak intensity file.
1015 @type heteronuc: str
1016 @keyword proton: The name of the proton as specified in the peak intensity file.
1017 @type proton: str
1018 @keyword int_col: The column containing the peak intensity data (used by the generic intensity file format).
1019 @type int_col: int
1020 @keyword int_method: The integration method, one of 'height', 'point sum' or 'other'.
1021 @type int_method: str
1022 @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
1023 @type spin_id_col: int or None
1024 @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1025 @type mol_name_col: int or None
1026 @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1027 @type res_name_col: int or None
1028 @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1029 @type res_num_col: int or None
1030 @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1031 @type spin_name_col: int or None
1032 @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None.
1033 @type spin_num_col: int or None
1034 @keyword sep: The column separator which, if None, defaults to whitespace.
1035 @type sep: str or None
1036 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
1037 @type spin_id: None or str
1038 @keyword ncproc: The Bruker ncproc binary intensity scaling factor.
1039 @type ncproc: int or None
1040 @keyword verbose: A flag which if True will cause all relaxation data loaded to be printed out.
1041 @type verbose: bool
1042 """
1043
1044
1045 pipes.test()
1046
1047
1048 if not exists_mol_res_spin_data():
1049 raise RelaxNoSequenceError
1050
1051
1052 if file == None:
1053 raise RelaxError("The file name must be supplied.")
1054
1055
1056 if hasattr(cdp, 'int_method') and cdp.int_method != int_method:
1057 raise RelaxError("The '%s' measure of peak intensities does not match '%s' of the previously loaded spectra." % (int_method, cdp.int_method))
1058
1059
1060 if not int_method in ['height', 'point sum', 'other']:
1061 raise RelaxError("The intensity measure '%s' is not one of 'height', 'point sum', 'other'." % int_method)
1062
1063
1064 cdp.int_method = int_method
1065
1066
1067 file_data = extract_data(file, dir, sep=sep)
1068
1069
1070 format = autodetect_format(file_data)
1071
1072
1073 if format == 'generic':
1074
1075 print("Generic formatted data file.\n")
1076
1077
1078 intensity_data = intensity_generic(file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=int_col, sep=sep, spin_id=spin_id)
1079
1080
1081 elif format == 'nmrview':
1082
1083 print("NMRView formatted data file.\n")
1084
1085
1086 intensity_data = intensity_nmrview(file_data=file_data)
1087
1088
1089 elif format == 'sparky':
1090
1091 print("Sparky formatted data file.\n")
1092
1093
1094 intensity_data = intensity_sparky(file_data=file_data, int_col=int_col)
1095
1096
1097 elif format == 'xeasy':
1098
1099 print("XEasy formatted data file.\n")
1100
1101
1102 intensity_data = intensity_xeasy(file_data=file_data, proton=proton, heteronuc=heteronuc, int_col=int_col)
1103
1104
1105 if not hasattr(cdp, 'spectrum_ids'):
1106 cdp.spectrum_ids = []
1107 if ncproc != None:
1108 cdp.ncproc = {}
1109 if not spectrum_id in cdp.spectrum_ids:
1110 cdp.spectrum_ids.append(spectrum_id)
1111 if ncproc != None:
1112 cdp.ncproc[spectrum_id] = ncproc
1113
1114
1115 data = []
1116 data_flag = False
1117 for i in range(len(intensity_data)):
1118
1119 H_name, X_name, spin_id, intensity, line = intensity_data[i]
1120
1121
1122 if (X_name and X_name != heteronuc) or (H_name and H_name != proton):
1123 warn(RelaxWarning("Proton and heteronucleus names do not match, skipping the data %s." % line))
1124 continue
1125
1126
1127 spin = return_spin(spin_id)
1128 if not spin:
1129 warn(RelaxNoSpinWarning(spin_id))
1130 continue
1131
1132
1133 if not spin.select:
1134 continue
1135
1136
1137 if not hasattr(spin, 'intensities'):
1138 spin.intensities = {}
1139
1140
1141 if ncproc != None:
1142 intensity = intensity / float(2**ncproc)
1143
1144
1145 spin.intensities[spectrum_id] = intensity
1146
1147
1148 data_flag = True
1149
1150
1151 data.append([spin_id, repr(intensity)])
1152
1153
1154 if not data_flag:
1155
1156 delete(spectrum_id)
1157
1158
1159 raise RelaxError("No data could be loaded from the peak list")
1160
1161
1162 if verbose:
1163 print("\nThe following intensities have been loaded into the relax data store:\n")
1164 write_data(out=sys.stdout, headings=["Spin_ID", "Intensity"], data=data)
1165
1166
1167
1169 """Set which spectra are replicates.
1170
1171 @keyword spectrum_ids: A list of spectrum ids corresponding to replicated spectra.
1172 @type spectrum_ids: list of str
1173 """
1174
1175
1176 pipes.test()
1177
1178
1179 if not hasattr(cdp, 'spectrum_ids'):
1180 raise RelaxError("No spectra have been loaded therefore replicates cannot be specified.")
1181
1182
1183 for spectrum_id in spectrum_ids:
1184 if spectrum_id not in cdp.spectrum_ids:
1185 raise RelaxError("The peak intensities corresponding to the spectrum id '%s' do not exist." % spectrum_id)
1186
1187
1188 if spectrum_ids == None:
1189 warn(RelaxWarning("The spectrum ID list cannot be None."))
1190 return
1191
1192
1193 if len(spectrum_ids) == 1:
1194 warn(RelaxWarning("The number of spectrum IDs in the list %s must be greater than one." % spectrum_ids))
1195 return
1196
1197
1198 if not hasattr(cdp, 'replicates'):
1199 cdp.replicates = []
1200
1201
1202 found = False
1203 for i in range(len(cdp.replicates)):
1204
1205 for j in range(len(spectrum_ids)):
1206 if spectrum_ids[j] in cdp.replicates[i]:
1207 found = True
1208
1209
1210 if found:
1211
1212 for j in range(len(spectrum_ids)):
1213 if spectrum_ids[j] not in cdp.replicates[i]:
1214 cdp.replicates[i].append(spectrum_ids[j])
1215
1216
1217 return
1218
1219
1220 cdp.replicates.append(spectrum_ids)
1221
1222
1224 """Create and return a dictionary of flags of whether the spectrum is replicated or not.
1225
1226 @return: The dictionary of flags of whether the spectrum is replicated or not.
1227 @rtype: dict of bool
1228 """
1229
1230
1231 repl = {}
1232 for id in cdp.spectrum_ids:
1233 repl[id] = False
1234
1235
1236 for i in range(len(cdp.replicates)):
1237 for j in range(len(cdp.replicates[i])):
1238 repl[cdp.replicates[i][j]] = True
1239
1240
1241 return repl
1242
1243
1245 """Create and return a list of spectra ID which are replicates of the given ID.
1246
1247 @param spectrum_id: The spectrum ID to find all the replicates of.
1248 @type spectrum_id: str
1249 @return: The list of spectrum IDs which are replicates of spectrum_id.
1250 @rtype: list of str
1251 """
1252
1253
1254 repl = []
1255
1256
1257 for i in range(len(cdp.replicates)):
1258
1259 if spectrum_id in cdp.replicates[i]:
1260
1261 for j in range(len(cdp.replicates[i])):
1262
1263 if spectrum_id == cdp.replicates[i][j]:
1264 continue
1265
1266
1267 repl.append(cdp.replicates[i][j])
1268
1269
1270 if repl == []:
1271 return repl
1272
1273
1274 repl.sort()
1275
1276
1277 id = repl[-1]
1278 for i in range(len(repl)-2, -1, -1):
1279
1280 if id == repl[i]:
1281 del repl[i]
1282
1283
1284 else:
1285 id = repl[i]
1286
1287
1288 return repl
1289