1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module containing functions for handling NMRPipe SeriesTab files."""
25
26
27
28 import re
29 from glob import glob
30 from os import sep
31 from os.path import abspath
32 subprocess_module = False
33 try:
34 import subprocess
35 subprocess_module = True
36 except ImportError:
37 pass
38 from warnings import warn
39
40
41 from lib.errors import RelaxError
42 from lib.io import file_root, get_file_path, open_write_file, sort_filenames, write_data
43 from lib.warnings import RelaxWarning
44
45
47 """Extract the intensity information from the NMRPipe SeriesTab peak intensity file.
48
49 @keyword peak_list: The peak list object to place all data into.
50 @type peak_list: lib.spectrum.objects.Peak_list instance
51 @keyword file_data: The data extracted from the file converted into a list of lists.
52 @type file_data: list of lists of str
53 @keyword int_col: The column which to multiply the peak intensity data (used by the SeriesTab intensity file format).
54 @type int_col: int
55 @raises RelaxError: When the expected peak intensity is not a float.
56 """
57
58
59 modeline = False
60 mode = False
61 varsline = False
62 header = False
63
64
65 line_nr = 0
66 for line in file_data:
67 if len(line) > 0:
68 if line[0] == 'REMARK' and line[1] == 'Mode:':
69 modeline = line[2:]
70 mode = modeline[0]
71 elif line[0] == 'VARS':
72 varsline = line[1:]
73 elif line[0] == '1':
74 header = line_nr
75 break
76 line_nr += 1
77
78
79 if not (modeline and mode):
80 raise RelaxError("MODE not detected. Expecting line 2:\nREMARK Mode: Summation")
81
82
83 if not (varsline):
84 raise RelaxError("VARS not detected. Expecting line 8:\nVARS INDEX X_AXIS Y_AXIS X_PPM Y_PPM VOL ASS Z_A0")
85
86
87 if not header:
88 raise RelaxError("'1' not detected in start of line. Cannot determine header size.")
89
90
91 ass_i = varsline.index('ASS')
92
93
94 if type(int_col) == list:
95
96 set_int_col = list(set(int_col))
97
98
99 if len(set_int_col) == 1:
100 int_col = set_int_col[0]
101 else:
102 warn(RelaxWarning("Multiple int_col is set to '%s'. I only accept a list of same values."%(int_col )))
103
104
105 if int_col == None or type(int_col) == list:
106 if "VOL" in varsline:
107 int_type = "VOL"
108 elif "HEIGHT" in varsline:
109 int_type = "HEIGHT"
110 else:
111 raise RelaxError("The int_col is set to '%s'. Cannot determine which column to multiply with."%(int_col))
112 warn(RelaxWarning("The int_col is set to '%s'. Looking for the '%s' index."%(int_col, int_type) ))
113 int_col = varsline.index('%s'%int_type) + 1
114 warn(RelaxWarning("The int_col is set to '%i' from the '%s' index."%(int_col, int_type) ))
115
116
117 w1_col = None
118 w2_col = None
119
120
121 w1_col = varsline.index('Y_PPM')
122
123
124 w2_col = varsline.index('X_PPM')
125
126
127 Z_A = re.compile("Z_A*")
128 spectra = list(filter(Z_A.search, varsline))
129
130
131 spectra_i = []
132 for y in spectra:
133 spectra_i.append(varsline.index(y))
134
135
136 file_data = file_data[header:]
137
138
139 for line in file_data:
140
141 if line[ass_i] == '?-?':
142 continue
143
144
145 if line[ass_i] == 'None':
146 new_line_ass_i = "A%sN-HN"%line[0]
147 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file. The spin assignment column 'ASS' is set to '%s'. Setting to %s." % (line[ass_i], new_line_ass_i)))
148 line[ass_i] = new_line_ass_i
149
150
151 assign1, assign2 = re.split('-', line[ass_i])
152
153
154 row1 = re.split('([a-zA-Z]+)', assign1)
155 name1 = row1[-2] + row1[-1]
156
157
158 row2 = re.split('([a-zA-Z]+)', assign2)
159 name2 = row2[-2] + row2[-1]
160
161
162 got_res_num1 = True
163 try:
164 res_num1 = int(row1[-3])
165 except:
166 got_res_num1 = False
167 raise RelaxError("Improperly formatted NMRPipe SeriesTab file, cannot process the residue number for dimension 1 in assignment: %s." % line[0])
168
169
170 try:
171 res_num2 = int(row2[-3])
172 except:
173
174 if got_res_num1:
175 res_num2 = res_num1
176 else:
177 res_num2 = None
178 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to %s." % (line[0], res_num2)))
179
180
181 got_res_name1 = True
182 try:
183 res_name1 = row1[-4]
184 except:
185 got_res_name1 = False
186 res_name1 = None
187 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 1 in assignment: %s. Setting residue name to %s." % (line[0], res_name1)))
188
189
190 try:
191 res_name2 = row2[-4]
192 except:
193
194 if got_res_name1:
195 res_name2 = res_name1
196 else:
197 res_name2 = None
198 warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 2 in assignment: %s. Setting residue name to %s." % (line[0], res_name2)))
199
200
201 try:
202
203 intensities = []
204 for i in range(len(spectra)):
205
206 intensities.append( float(line[spectra_i[i]]) * float(line[int_col-1]) )
207
208
209 except ValueError:
210 raise RelaxError("The peak intensity value %s from the line %s is invalid." % (intensity, line))
211
212
213 w1 = None
214 w2 = None
215 if w1_col != None:
216 try:
217 w1 = float(line[w1_col])
218 except ValueError:
219 raise RelaxError("The chemical shift from the line %s is invalid." % line)
220 if w2_col != None:
221 try:
222 w2 = float(line[w2_col])
223 except ValueError:
224 raise RelaxError("The chemical shift from the line %s is invalid." % line)
225
226
227 peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensities, intensity_name=spectra)
228
229
231 """Extract showApod information for spectrum fourier transformed with NMRPipe.
232
233 @keyword file: The filename of the NMRPipe fourier transformed file.
234 @type file: str
235 @keyword dir: The directory where the file is located.
236 @type dir: str
237 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod
238 @type path_to_command: str
239 @return: The output from showApod as list of lines.
240 @rtype: list of lines
241 """
242
243
244 file_path = get_file_path(file_name=file_name, dir=dir)
245
246 if not subprocess_module:
247 raise RelaxError("Python module 'subprocess' not found, cannot call showApod.")
248
249
250 Temp = subprocess.Popen([path_to_command, file_path], stdout=subprocess.PIPE)
251
252
253 (output, errput) = Temp.communicate()
254
255
256 return_value = Temp.wait()
257
258
259 if hasattr(output, 'decode'):
260 output = output.decode()
261
262 return output.splitlines()
263
264
265 -def show_apod_rmsd(file_name=None, dir=None, path_to_command='showApod'):
266 """Extract showApod 'Noise Std Dev' for spectrum fourier transformed with NMRPipe.
267
268 @keyword file: The filename of the NMRPipe fourier transformed file.
269 @type file: str
270 @keyword dir: The directory where the file is located.
271 @type dir: str
272 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod
273 @type path_to_command: str
274 @return: The Noise Std Dev from line: 'REMARK Automated Noise Std Dev in Processed Data'
275 @rtype: float
276 """
277
278
279 show_apod_lines = show_apod_extract(file_name=file_name, dir=dir, path_to_command=path_to_command)
280
281
282 found = False
283 for line in show_apod_lines:
284
285 if line[:49] == 'REMARK Automated Noise Std Dev in Processed Data:':
286
287 rmsd = float(line[49:].split()[0])
288 return rmsd
289
290 if not found:
291 print(show_apod_lines)
292 raise RelaxError("Could not find the line: 'REMARK Automated Noise Std Dev in Processed Data:', from the output of showApod.")
293
294
296 """Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd'
297
298 @keyword file: The filename of the NMRPipe fourier transformed file.
299 @type file: str
300 @keyword dir: The directory where the file is located.
301 @type dir: str
302 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod
303 @type path_to_command: str
304 @keyword outdir: The directory where to write the file. If 'None', then write in same directory.
305 @type outdir: str
306 @param force: Boolean argument which if True causes the file to be overwritten if it already exists.
307 @type force: bool
308 @return: Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'. This will be a file path.
309 @rtype: str
310 """
311
312
313 apod_rmsd = show_apod_rmsd(file_name=file_name, dir=dir, path_to_command=path_to_command)
314
315
316 file_name_root = file_root(file_name)
317
318
319 extension = ".rmsd"
320
321
322 file_name_out = file_name_root + extension
323
324
325 if outdir == None:
326 write_outdir = dir
327 else:
328 write_outdir = outdir
329
330
331 wfile, wfile_path = open_write_file(file_name=file_name_out, dir=write_outdir, force=force, verbosity=1, return_path=True)
332
333
334 out_write_data = [['%s'%apod_rmsd]]
335
336
337 write_data(out=wfile, headings=None, data=out_write_data, sep=None)
338
339
340 wfile.close()
341
342
343 return wfile_path
344
345
347 """Searches 'dir' for files with extension 'file_ext'. Extract showApod 'Noise Std Dev' from showApod, and write to file with same filename and ending '.rmsd'.
348
349 @keyword file_ext: The extension for files which is NMRPipe fourier transformed file.
350 @type file_ext: str
351 @keyword dir: The directory where the files is located.
352 @type dir: str
353 @keyword path_to_command: If showApod not in PATH, then specify absolute path as: /path/to/showApod
354 @type path_to_command: str
355 @keyword outdir: The directory where to write the files. If 'None', then write in same directory.
356 @type outdir: str
357 @param force: Boolean argument which if True causes the file to be overwritten if it already exists.
358 @type force: bool
359 @return: Write the 'Noise Std Dev' from showApod to a file with same file filename, with ending '.rmsd'. This will be a list of file paths.
360 @rtype: list of str
361 """
362
363
364 dir_files = abspath(dir)
365
366
367 glob_pat = '*%s' % file_ext
368
369
370 file_list = glob(dir_files + sep + glob_pat)
371
372
373 file_list_sorted = sort_filenames(filenames=file_list, rev=False)
374
375
376 rmsd_files = []
377 for ft_file in file_list_sorted:
378
379 rmsd_file = show_apod_rmsd_to_file(file_name=ft_file, path_to_command=path_to_command, outdir=outdir, force=force)
380
381
382 rmsd_files.append(rmsd_file)
383
384 return rmsd_files
385