1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """Module for handling the molecule, residue, and spin sequence data."""
26
27
28 from warnings import warn
29
30
31 from lib.checks import Check
32 from lib.check_types import is_float
33 from lib.errors import RelaxError, RelaxInvalidSeqError
34 from lib.float import isFinite
35 from lib.io import extract_data, open_write_file, strip, write_data
36 from lib.selection import spin_id_to_data_list
37 from lib.warnings import RelaxWarning, RelaxFileEmptyWarning
38
39
40
41 AA_CODES = {
42 "ALA": "A",
43 "ARG": "R",
44 "ASN": "N",
45 "ASP": "D",
46 "CYS": "C",
47 "GLU": "E",
48 "GLN": "Q",
49 "GLY": "G",
50 "HIS": "H",
51 "ILE": "I",
52 "LEU": "L",
53 "LYS": "K",
54 "MET": "M",
55 "PHE": "F",
56 "PRO": "P",
57 "SER": "S",
58 "THR": "T",
59 "TRP": "W",
60 "TYR": "Y",
61 "VAL": "V",
62 }
63
64
66 """Convert the given three letter amino acid code to the corresponding one letter code.
67
68 Any non-standard residues will be converted to '*'.
69
70
71 @param code: The three letter amino acid code to convert.
72 @type code: str
73 @return: The corresponding one letter amino acid code, or '*'.
74 @rtype: str
75 """
76
77
78 upper_code = code.upper()
79
80
81 if upper_code in AA_CODES:
82 return AA_CODES[upper_code]
83
84
85 return '*'
86
87
88 -def check_sequence_func(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):
89 """Test if the sequence data is valid.
90
91 The only function this performs is to raise a RelaxError if the data is invalid.
92
93
94 @param data: The sequence data.
95 @type data: list of lists.
96 @keyword spin_id_col: The column containing the spin ID strings.
97 @type spin_id_col: int or None
98 @param mol_name_col: The column containing the molecule name information.
99 @type mol_name_col: int or None
100 @param res_name_col: The column containing the residue name information.
101 @type res_name_col: int or None
102 @param res_num_col: The column containing the residue number information.
103 @type res_num_col: int or None
104 @param spin_name_col: The column containing the spin name information.
105 @type spin_name_col: int or None
106 @param spin_num_col: The column containing the spin number information.
107 @type spin_num_col: int or None
108 """
109
110
111 if spin_id_col:
112 if len(data) < spin_id_col:
113 return RelaxInvalidSeqError(data, "the Spin ID data is missing")
114
115
116 if mol_name_col:
117 if len(data) < mol_name_col:
118 return RelaxInvalidSeqError(data, "the molecule name data is missing")
119
120
121 if res_num_col:
122
123 if len(data) < res_num_col:
124 return RelaxInvalidSeqError(data, "the residue number data is missing")
125
126
127 try:
128 res_num = eval(data[res_num_col-1])
129 if not (res_num == None or isinstance(res_num, int)):
130 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
131 except:
132 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
133
134
135 if res_name_col:
136 if len(data) < res_name_col:
137 return RelaxInvalidSeqError(data, "the residue name data is missing")
138
139
140 if spin_num_col:
141
142 if len(data) < spin_num_col:
143 return RelaxInvalidSeqError(data, "the spin number data is missing")
144
145
146 try:
147 spin_num = eval(data[spin_num_col-1])
148 if not (spin_num == None or isinstance(spin_num, int)):
149 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
150 except:
151 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
152
153
154 if spin_name_col:
155 if len(data) < spin_name_col:
156 return RelaxInvalidSeqError(data, "the spin name data is missing")
157
158
159 if data_col:
160 if len(data) < data_col:
161 return RelaxInvalidSeqError(data, "the data is missing")
162
163
164 if error_col:
165 if len(data) < error_col:
166 return RelaxInvalidSeqError(data, "the error data is missing")
167
168
169 check_sequence = Check(check_sequence_func)
170
171
172 -def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, raise_flag=True):
173 """Generator function for reading the spin specific data from file.
174
175 Description
176 ===========
177
178 This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
179
180
181 @keyword file: The name of the file to open.
182 @type file: str
183 @keyword dir: The directory containing the file (defaults to the current directory if None).
184 @type dir: str or None
185 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
186 @type file_data: list of lists
187 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
188 @type spin_id_col: int or None
189 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
190 @type mol_name_col: int or None
191 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
192 @type res_name_col: int or None
193 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
194 @type res_num_col: int or None
195 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
196 @type spin_name_col: int or None
197 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
198 @type spin_num_col: int or None
199 @keyword data_col: The column containing the data.
200 @type data_col: int or None
201 @keyword error_col: The column containing the errors.
202 @type error_col: int or None
203 @keyword sep: The column separator which, if None, defaults to whitespace.
204 @type sep: str or None
205 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
206 @type spin_id: None or str
207 @keyword raise_flag: A flag which if True will cause a RelaxError to be raised if no data can be found.
208 @type raise_flag: bool
209 @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded.
210 @rtype: str, list of [str, float], or list of [str, float, float]
211 """
212
213
214 col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col]
215 col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col']
216 for i in range(len(col_args)):
217 if col_args[i] == 0:
218 raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i])
219 if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col):
220 raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.")
221
222
223 min_col_num = max([_f for _f in [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col] if _f])
224
225
226 if not file_data:
227
228 file_data = extract_data(file, dir, sep=sep)
229
230
231 if spin_id_col != None:
232 file_data = strip(file_data, comments=False)
233 else:
234 file_data = strip(file_data)
235
236
237 if not file_data:
238 warn(RelaxFileEmptyWarning(file))
239 return
240
241
242 missing_data = True
243 for line in file_data:
244
245 if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]:
246 line[spin_id_col-1] = eval(line[spin_id_col-1])
247
248
249
250 if not check_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, escalate=1):
251 continue
252
253
254 if spin_id_col:
255
256 if line[spin_id_col-1] == '#':
257 warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line))
258 continue
259
260 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1])
261
262
263 else:
264
265 mol_name = None
266 if mol_name_col != None and line[mol_name_col-1] != 'None':
267 mol_name = line[mol_name_col-1]
268
269
270 res_num = None
271 if res_num_col != None:
272 try:
273 if line[res_num_col-1] == 'None':
274 res_num = None
275 else:
276 res_num = int(line[res_num_col-1])
277 except ValueError:
278 warn(RelaxWarning("Invalid residue number, skipping the line %s" % line))
279 continue
280
281
282 res_name = None
283 if res_name_col != None and line[res_name_col-1] != 'None':
284 res_name = line[res_name_col-1]
285
286
287 spin_num = None
288 if spin_num_col != None:
289 try:
290 if line[spin_num_col-1] == 'None':
291 spin_num = None
292 else:
293 spin_num = int(line[spin_num_col-1])
294 except ValueError:
295 warn(RelaxWarning("Invalid spin number, skipping the line %s" % line))
296 continue
297
298
299 spin_name = None
300 if spin_name_col != None and line[spin_name_col-1] != 'None':
301 spin_name = line[spin_name_col-1]
302
303
304 value = None
305 if data_col != None:
306 try:
307
308 if line[data_col-1] == 'None':
309 value = None
310
311
312 else:
313 value = float(line[data_col-1])
314
315
316 if not isFinite(value):
317 warn(RelaxWarning("The value is not finite, skipping the line %s" % line))
318 continue
319
320
321 except ValueError:
322 warn(RelaxWarning("Invalid data, skipping the line %s" % line))
323 continue
324
325
326 error = None
327 if error_col != None:
328 try:
329
330 if line[error_col-1] == 'None':
331 error = None
332
333
334 else:
335 error = float(line[error_col-1])
336
337
338 if not isFinite(error):
339 warn(RelaxWarning("The error is not finite, skipping the line %s" % line))
340 continue
341
342
343 except ValueError:
344 warn(RelaxWarning("Invalid errors, skipping the line %s" % line))
345 continue
346
347
348 missing_data = False
349
350
351 if data_col and error_col:
352 yield mol_name, res_num, res_name, spin_num, spin_name, value, error
353 elif data_col:
354 yield mol_name, res_num, res_name, spin_num, spin_name, value
355 elif error_col:
356 yield mol_name, res_num, res_name, spin_num, spin_name, error
357 else:
358 yield mol_name, res_num, res_name, spin_num, spin_name
359
360
361 if raise_flag and missing_data:
362 raise RelaxError("No corresponding data could be found within the file.")
363
364
365 -def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
366 """Generator function for reading the spin specific data from file.
367
368 Description
369 ===========
370
371 This function writes a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
372
373
374 @param file: The name of the file to write the data to (or alternatively an already opened file object).
375 @type file: str or file object
376 @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
377 @type dir: str or None
378 @keyword sep: The column separator which, if None, defaults to whitespace.
379 @type sep: str or None
380 @keyword spin_ids: The list of spin ID strings.
381 @type spin_ids: None or list of str
382 @keyword mol_names: The list of molecule names.
383 @type mol_names: None or list of str
384 @keyword res_nums: The list of residue numbers.
385 @type res_nums: None or list of int
386 @keyword res_names: The list of residue names.
387 @type res_names: None or list of str
388 @keyword spin_nums: The list of spin numbers.
389 @type spin_nums: None or list of int
390 @keyword spin_names: The list of spin names.
391 @type spin_names: None or list of str
392 @keyword force: A flag which if True will cause an existing file to be overwritten.
393 @type force: bool
394 @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired.
395 @type data: list or list of lists
396 @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
397 @type data_name: str or list of str
398 @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object.
399 @type error: list or list of lists
400 @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
401 @type error_name: str or list of str
402 @keyword float_format: A float formatting string to use for the data and error whenever a float is found.
403 @type float_format: str
404 """
405
406
407 if data:
408
409 if isinstance(data[0], list):
410
411 if not isinstance(data_name, list):
412 raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)
413
414
415 if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
416 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
417
418
419 else:
420
421 if not isinstance(data_name, str):
422 raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)
423
424
425 if error and len(data) != len(error):
426 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
427
428
429 if error:
430
431 if isinstance(error[0], list):
432
433 if not isinstance(error_name, list):
434 raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)
435
436
437 else:
438
439 if not isinstance(error_name, str):
440 raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)
441
442
443 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
444 arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
445 N = None
446 first_arg = None
447 first_arg_name = None
448 for i in range(len(args)):
449 if isinstance(args[i], list):
450
451 if N == None:
452 N = len(args[i])
453 first_arg = args[i]
454 first_arg_name = arg_names[i]
455
456
457 if len(args[i]) != N:
458 raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))
459
460
461 if N == None:
462 raise RelaxError("No spin ID data is present.")
463
464
465 if data and len(data) != N:
466 raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
467 if error and len(error) != N:
468 raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))
469
470
471 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
472 arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']
473
474
475
476 headings = []
477 file_data = []
478
479
480 for i in range(len(args)):
481 if args[i]:
482 headings.append(arg_names[i])
483
484
485 if data:
486
487 if isinstance(data[0], list):
488
489 for i in range(len(data[0])):
490
491 headings.append(data_name[i])
492
493
494 if error:
495 headings.append(error_name[i])
496
497
498 else:
499
500 headings.append(data_name)
501
502
503 if error:
504 headings.append(error_name)
505
506
507 elif error:
508
509 if isinstance(error[0], list):
510 for i in range(len(error[0])):
511 headings.append(error_name[i])
512
513
514 else:
515 headings.append(error_name)
516
517
518 if headings == []:
519 headings = None
520
521
522 for spin_index in range(N):
523
524 file_data.append([])
525
526
527 for i in range(len(args)):
528 if args[i]:
529 value = args[i][spin_index]
530 if not isinstance(value, str):
531 value = repr(value)
532 file_data[-1].append(value)
533
534
535 if data:
536
537 if isinstance(data[0], list):
538
539 for i in range(len(data[0])):
540
541 if is_float(data[spin_index][i]):
542 file_data[-1].append(float_format % data[spin_index][i])
543 else:
544 file_data[-1].append(repr(data[spin_index][i]))
545
546
547 if error:
548 if is_float(error[spin_index][i]):
549 file_data[-1].append(float_format % error[spin_index][i])
550 else:
551 file_data[-1].append(repr(error[spin_index][i]))
552
553
554 else:
555
556 if is_float(data[spin_index]):
557 file_data[-1].append(float_format % data[spin_index])
558 else:
559 file_data[-1].append(repr(data[spin_index]))
560
561
562 if error:
563 if is_float(error[spin_index]):
564 file_data[-1].append(float_format % error[spin_index])
565 else:
566 file_data[-1].append(repr(error[spin_index]))
567
568
569 elif error:
570
571 if isinstance(error[0], list):
572 for i in range(len(error[0])):
573 file_data[-1].append(repr(error[spin_index][i]))
574
575
576 else:
577 file_data[-1].append(repr(error[spin_index]))
578
579
580 if file_data == [] or file_data == [[]]:
581 return
582
583
584 file = open_write_file(file_name=file, dir=dir, force=force)
585
586
587 write_data(out=file, headings=headings, data=file_data, sep=sep)
588