1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for handling the molecule, residue, and spin sequence data."""
25
26
27 import sys
28 from warnings import warn
29
30
31 from lib.check_types import is_float
32 from lib.errors import RelaxError, RelaxInvalidSeqError
33 from lib.io import extract_data, open_write_file, strip, write_data
34 from lib.selection import spin_id_to_data_list
35 from lib.warnings import RelaxWarning, RelaxFileEmptyWarning
36
37
38 -def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None):
39 """Generator function for reading the spin specific data from file.
40
41 Description
42 ===========
43
44 This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
45
46
47 @keyword file: The name of the file to open.
48 @type file: str
49 @keyword dir: The directory containing the file (defaults to the current directory if None).
50 @type dir: str or None
51 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
52 @type file_data: list of lists
53 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
54 @type spin_id_col: int or None
55 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
56 @type mol_name_col: int or None
57 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
58 @type res_name_col: int or None
59 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
60 @type res_num_col: int or None
61 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
62 @type spin_name_col: int or None
63 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
64 @type spin_num_col: int or None
65 @keyword data_col: The column containing the data.
66 @type data_col: int or None
67 @keyword error_col: The column containing the errors.
68 @type error_col: int or None
69 @keyword sep: The column separator which, if None, defaults to whitespace.
70 @type sep: str or None
71 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
72 @type spin_id: None or str
73 @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded.
74 @rtype: str, list of [str, float], or list of [str, float, float]
75 """
76
77
78 col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col]
79 col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col']
80 for i in range(len(col_args)):
81 if col_args[i] == 0:
82 raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i])
83 if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col):
84 raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.")
85
86
87 min_col_num = max(filter(None, [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col]))
88
89
90 if not file_data:
91
92 file_data = extract_data(file, dir)
93
94
95 if spin_id_col != None:
96 file_data = strip(file_data, comments=False)
97 else:
98 file_data = strip(file_data)
99
100
101 if not file_data:
102 warn(RelaxFileEmptyWarning(file))
103 return
104
105
106 missing_data = True
107 for line in file_data:
108
109 if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]:
110 line[spin_id_col-1] = eval(line[spin_id_col-1])
111
112
113
114 try:
115 validate_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col)
116 except RelaxInvalidSeqError:
117
118 msg = sys.exc_info()[1]
119 string = msg.__str__()[12:-1]
120
121
122 warn(RelaxWarning(string))
123
124
125 continue
126
127
128 if spin_id_col:
129
130 if line[spin_id_col-1] == '#':
131 warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line))
132 continue
133
134 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1])
135
136
137 else:
138
139 mol_name = None
140 if mol_name_col != None and line[mol_name_col-1] != 'None':
141 mol_name = line[mol_name_col-1]
142
143
144 res_num = None
145 if res_num_col != None:
146 try:
147 if line[res_num_col-1] == 'None':
148 res_num = None
149 else:
150 res_num = int(line[res_num_col-1])
151 except ValueError:
152 warn(RelaxWarning("Invalid residue number, skipping the line %s" % line))
153 continue
154
155
156 res_name = None
157 if res_name_col != None and line[res_name_col-1] != 'None':
158 res_name = line[res_name_col-1]
159
160
161 spin_num = None
162 if spin_num_col != None:
163 try:
164 if line[spin_num_col-1] == 'None':
165 spin_num = None
166 else:
167 spin_num = int(line[spin_num_col-1])
168 except ValueError:
169 warn(RelaxWarning("Invalid spin number, skipping the line %s" % line))
170 continue
171
172
173 spin_name = None
174 if spin_name_col != None and line[spin_name_col-1] != 'None':
175 spin_name = line[spin_name_col-1]
176
177
178 value = None
179 if data_col != None:
180 try:
181
182 if line[data_col-1] == 'None':
183 value = None
184
185
186 else:
187 value = float(line[data_col-1])
188
189
190 except ValueError:
191 warn(RelaxWarning("Invalid data, skipping the line %s" % line))
192 continue
193
194
195 error = None
196 if error_col != None:
197 try:
198
199 if line[error_col-1] == 'None':
200 error = None
201
202
203 else:
204 error = float(line[error_col-1])
205
206
207 except ValueError:
208 warn(RelaxWarning("Invalid errors, skipping the line %s" % line))
209 continue
210
211
212 missing_data = False
213
214
215 if data_col and error_col:
216 yield mol_name, res_num, res_name, spin_num, spin_name, value, error
217 elif data_col:
218 yield mol_name, res_num, res_name, spin_num, spin_name, value
219 elif error_col:
220 yield mol_name, res_num, res_name, spin_num, spin_name, error
221 else:
222 yield mol_name, res_num, res_name, spin_num, spin_name
223
224
225 if missing_data:
226 raise RelaxError("No corresponding data could be found within the file.")
227
228
229 -def validate_sequence(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):
230 """Test if the sequence data is valid.
231
232 The only function this performs is to raise a RelaxError if the data is invalid.
233
234
235 @param data: The sequence data.
236 @type data: list of lists.
237 @keyword spin_id_col: The column containing the spin ID strings.
238 @type spin_id_col: int or None
239 @param mol_name_col: The column containing the molecule name information.
240 @type mol_name_col: int or None
241 @param res_name_col: The column containing the residue name information.
242 @type res_name_col: int or None
243 @param res_num_col: The column containing the residue number information.
244 @type res_num_col: int or None
245 @param spin_name_col: The column containing the spin name information.
246 @type spin_name_col: int or None
247 @param spin_num_col: The column containing the spin number information.
248 @type spin_num_col: int or None
249 """
250
251
252 if spin_id_col:
253 if len(data) < spin_id_col:
254 raise RelaxInvalidSeqError(data, "the Spin ID data is missing")
255
256
257 if mol_name_col:
258 if len(data) < mol_name_col:
259 raise RelaxInvalidSeqError(data, "the molecule name data is missing")
260
261
262 if res_num_col:
263
264 if len(data) < res_num_col:
265 raise RelaxInvalidSeqError(data, "the residue number data is missing")
266
267
268 try:
269 res_num = eval(data[res_num_col-1])
270 if not (res_num == None or isinstance(res_num, int)):
271 raise ValueError
272 except:
273 raise RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
274
275
276 if res_name_col:
277 if len(data) < res_name_col:
278 raise RelaxInvalidSeqError(data, "the residue name data is missing")
279
280
281 if spin_num_col:
282
283 if len(data) < spin_num_col:
284 raise RelaxInvalidSeqError(data, "the spin number data is missing")
285
286
287 try:
288 res_num = eval(data[res_num_col-1])
289 if not (res_num == None or isinstance(res_num, int)):
290 raise ValueError
291 except:
292 raise RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[res_num_col-1])
293
294
295 if spin_name_col:
296 if len(data) < spin_name_col:
297 raise RelaxInvalidSeqError(data, "the spin name data is missing")
298
299
300 if data_col:
301 if len(data) < data_col:
302 raise RelaxInvalidSeqError(data, "the data is missing")
303
304
305 if error_col:
306 if len(data) < error_col:
307 raise RelaxInvalidSeqError(data, "the error data is missing")
308
309
310 -def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
311 """Generator function for reading the spin specific data from file.
312
313 Description
314 ===========
315
316 This function writes a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
317
318
319 @param file: The name of the file to write the data to (or alternatively an already opened file object).
320 @type file: str or file object
321 @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
322 @type dir: str or None
323 @keyword sep: The column separator which, if None, defaults to whitespace.
324 @type sep: str or None
325 @keyword spin_ids: The list of spin ID strings.
326 @type spin_ids: None or list of str
327 @keyword mol_names: The list of molecule names.
328 @type mol_names: None or list of str
329 @keyword res_nums: The list of residue numbers.
330 @type res_nums: None or list of int
331 @keyword res_names: The list of residue names.
332 @type res_names: None or list of str
333 @keyword spin_nums: The list of spin numbers.
334 @type spin_nums: None or list of int
335 @keyword spin_names: The list of spin names.
336 @type spin_names: None or list of str
337 @keyword force: A flag which if True will cause an existing file to be overwritten.
338 @type force: bool
339 @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired.
340 @type data: list or list of lists
341 @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
342 @type data_name: str or list of str
343 @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object.
344 @type error: list or list of lists
345 @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
346 @type error_name: str or list of str
347 @keyword float_format: A float formatting string to use for the data and error whenever a float is found.
348 @type float_format: str
349 """
350
351
352 if data:
353
354 if isinstance(data[0], list):
355
356 if not isinstance(data_name, list):
357 raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)
358
359
360 if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
361 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
362
363
364 else:
365
366 if not isinstance(data_name, str):
367 raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)
368
369
370 if error and len(data) != len(error):
371 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
372
373
374 if error:
375
376 if isinstance(error[0], list):
377
378 if not isinstance(error_name, list):
379 raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)
380
381
382 else:
383
384 if not isinstance(error_name, str):
385 raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)
386
387
388 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
389 arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
390 N = None
391 first_arg = None
392 first_arg_name = None
393 for i in range(len(args)):
394 if isinstance(args[i], list):
395
396 if N == None:
397 N = len(args[i])
398 first_arg = args[i]
399 first_arg_name = arg_names[i]
400
401
402 if len(args[i]) != N:
403 raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))
404
405
406 if N == None:
407 raise RelaxError("No spin ID data is present.")
408
409
410 if data and len(data) != N:
411 raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
412 if error and len(error) != N:
413 raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))
414
415
416 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
417 arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']
418
419
420
421 headings = []
422 file_data = []
423
424
425 for i in range(len(args)):
426 if args[i]:
427 headings.append(arg_names[i])
428
429
430 if data:
431
432 if isinstance(data[0], list):
433
434 for i in range(len(data[0])):
435
436 headings.append(data_name[i])
437
438
439 if error:
440 headings.append(error_name[i])
441
442
443 else:
444
445 headings.append(data_name)
446
447
448 if error:
449 headings.append(error_name)
450
451
452 elif error:
453
454 if isinstance(error[0], list):
455 for i in range(len(error[0])):
456 headings.append(error_name[i])
457
458
459 else:
460 headings.append(error_name)
461
462
463 if headings == []:
464 headings = None
465
466
467 for spin_index in range(N):
468
469 file_data.append([])
470
471
472 for i in range(len(args)):
473 if args[i]:
474 value = args[i][spin_index]
475 if not isinstance(value, str):
476 value = repr(value)
477 file_data[-1].append(value)
478
479
480 if data:
481
482 if isinstance(data[0], list):
483
484 for i in range(len(data[0])):
485
486 if is_float(data[spin_index][i]):
487 file_data[-1].append(float_format % data[spin_index][i])
488 else:
489 file_data[-1].append(repr(data[spin_index][i]))
490
491
492 if error:
493 if is_float(error[spin_index][i]):
494 file_data[-1].append(float_format % error[spin_index][i])
495 else:
496 file_data[-1].append(repr(error[spin_index][i]))
497
498
499 else:
500
501 if is_float(data[spin_index]):
502 file_data[-1].append(float_format % data[spin_index])
503 else:
504 file_data[-1].append(repr(data[spin_index]))
505
506
507 if error:
508 if is_float(error[spin_index]):
509 file_data[-1].append(float_format % error[spin_index])
510 else:
511 file_data[-1].append(repr(error[spin_index]))
512
513
514 elif error:
515
516 if isinstance(error[0], list):
517 for i in range(len(error[0])):
518 file_data[-1].append(repr(error[spin_index][i]))
519
520
521 else:
522 file_data[-1].append(repr(error[spin_index]))
523
524
525 if file_data == [] or file_data == [[]]:
526 return
527
528
529 file = open_write_file(file_name=file, dir=dir, force=force)
530
531
532 write_data(out=file, headings=headings, data=file_data, sep=sep)
533