1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for handling the molecule, residue, and spin sequence data."""
25
26
27 from warnings import warn
28
29
30 from lib.checks import Check
31 from lib.check_types import is_float
32 from lib.errors import RelaxError, RelaxInvalidSeqError
33 from lib.float import isFinite
34 from lib.io import extract_data, open_write_file, strip, write_data
35 from lib.selection import spin_id_to_data_list
36 from lib.warnings import RelaxWarning, RelaxFileEmptyWarning
37
38
39
40 AA_CODES = {
41 "ALA": "A",
42 "ARG": "R",
43 "ASN": "N",
44 "ASP": "D",
45 "CYS": "C",
46 "GLU": "E",
47 "GLN": "Q",
48 "GLY": "G",
49 "HIS": "H",
50 "ILE": "I",
51 "LEU": "L",
52 "LYS": "K",
53 "MET": "M",
54 "PHE": "F",
55 "PRO": "P",
56 "SER": "S",
57 "THR": "T",
58 "TRP": "W",
59 "TYR": "Y",
60 "VAL": "V",
61 }
62
63
65 """Convert the given three letter amino acid code to the corresponding one letter code.
66
67 Any non-standard residues will be converted to '*'.
68
69
70 @param code: The three letter amino acid code to convert.
71 @type code: str
72 @return: The corresponding one letter amino acid code, or '*'.
73 @rtype: str
74 """
75
76
77 upper_code = code.upper()
78
79
80 if upper_code in AA_CODES:
81 return AA_CODES[upper_code]
82
83
84 return '*'
85
86
87 -def check_sequence_func(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):
88 """Test if the sequence data is valid.
89
90 The only function this performs is to raise a RelaxError if the data is invalid.
91
92
93 @param data: The sequence data.
94 @type data: list of lists.
95 @keyword spin_id_col: The column containing the spin ID strings.
96 @type spin_id_col: int or None
97 @param mol_name_col: The column containing the molecule name information.
98 @type mol_name_col: int or None
99 @param res_name_col: The column containing the residue name information.
100 @type res_name_col: int or None
101 @param res_num_col: The column containing the residue number information.
102 @type res_num_col: int or None
103 @param spin_name_col: The column containing the spin name information.
104 @type spin_name_col: int or None
105 @param spin_num_col: The column containing the spin number information.
106 @type spin_num_col: int or None
107 """
108
109
110 if spin_id_col:
111 if len(data) < spin_id_col:
112 return RelaxInvalidSeqError(data, "the Spin ID data is missing")
113
114
115 if mol_name_col:
116 if len(data) < mol_name_col:
117 return RelaxInvalidSeqError(data, "the molecule name data is missing")
118
119
120 if res_num_col:
121
122 if len(data) < res_num_col:
123 return RelaxInvalidSeqError(data, "the residue number data is missing")
124
125
126 try:
127 res_num = eval(data[res_num_col-1])
128 if not (res_num == None or isinstance(res_num, int)):
129 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
130 except:
131 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
132
133
134 if res_name_col:
135 if len(data) < res_name_col:
136 return RelaxInvalidSeqError(data, "the residue name data is missing")
137
138
139 if spin_num_col:
140
141 if len(data) < spin_num_col:
142 return RelaxInvalidSeqError(data, "the spin number data is missing")
143
144
145 try:
146 spin_num = eval(data[spin_num_col-1])
147 if not (spin_num == None or isinstance(spin_num, int)):
148 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
149 except:
150 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
151
152
153 if spin_name_col:
154 if len(data) < spin_name_col:
155 return RelaxInvalidSeqError(data, "the spin name data is missing")
156
157
158 if data_col:
159 if len(data) < data_col:
160 return RelaxInvalidSeqError(data, "the data is missing")
161
162
163 if error_col:
164 if len(data) < error_col:
165 return RelaxInvalidSeqError(data, "the error data is missing")
166
167
168 check_sequence = Check(check_sequence_func)
169
170
171 -def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, raise_flag=True):
172 """Generator function for reading the spin specific data from file.
173
174 Description
175 ===========
176
177 This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
178
179
180 @keyword file: The name of the file to open.
181 @type file: str
182 @keyword dir: The directory containing the file (defaults to the current directory if None).
183 @type dir: str or None
184 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
185 @type file_data: list of lists
186 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
187 @type spin_id_col: int or None
188 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
189 @type mol_name_col: int or None
190 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
191 @type res_name_col: int or None
192 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
193 @type res_num_col: int or None
194 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
195 @type spin_name_col: int or None
196 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
197 @type spin_num_col: int or None
198 @keyword data_col: The column containing the data.
199 @type data_col: int or None
200 @keyword error_col: The column containing the errors.
201 @type error_col: int or None
202 @keyword sep: The column separator which, if None, defaults to whitespace.
203 @type sep: str or None
204 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
205 @type spin_id: None or str
206 @keyword raise_flag: A flag which if True will cause a RelaxError to be raised if no data can be found.
207 @type raise_flag: bool
208 @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded.
209 @rtype: str, list of [str, float], or list of [str, float, float]
210 """
211
212
213 col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col]
214 col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col']
215 for i in range(len(col_args)):
216 if col_args[i] == 0:
217 raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i])
218 if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col):
219 raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.")
220
221
222 min_col_num = max([_f for _f in [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col] if _f])
223
224
225 if not file_data:
226
227 file_data = extract_data(file, dir, sep=sep)
228
229
230 if spin_id_col != None:
231 file_data = strip(file_data, comments=False)
232 else:
233 file_data = strip(file_data)
234
235
236 if not file_data:
237 warn(RelaxFileEmptyWarning(file))
238 return
239
240
241 missing_data = True
242 for line in file_data:
243
244 if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]:
245 line[spin_id_col-1] = eval(line[spin_id_col-1])
246
247
248
249 if not check_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, escalate=1):
250 continue
251
252
253 if spin_id_col:
254
255 if line[spin_id_col-1] == '#':
256 warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line))
257 continue
258
259 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1])
260
261
262 else:
263
264 mol_name = None
265 if mol_name_col != None and line[mol_name_col-1] != 'None':
266 mol_name = line[mol_name_col-1]
267
268
269 res_num = None
270 if res_num_col != None:
271 try:
272 if line[res_num_col-1] == 'None':
273 res_num = None
274 else:
275 res_num = int(line[res_num_col-1])
276 except ValueError:
277 warn(RelaxWarning("Invalid residue number, skipping the line %s" % line))
278 continue
279
280
281 res_name = None
282 if res_name_col != None and line[res_name_col-1] != 'None':
283 res_name = line[res_name_col-1]
284
285
286 spin_num = None
287 if spin_num_col != None:
288 try:
289 if line[spin_num_col-1] == 'None':
290 spin_num = None
291 else:
292 spin_num = int(line[spin_num_col-1])
293 except ValueError:
294 warn(RelaxWarning("Invalid spin number, skipping the line %s" % line))
295 continue
296
297
298 spin_name = None
299 if spin_name_col != None and line[spin_name_col-1] != 'None':
300 spin_name = line[spin_name_col-1]
301
302
303 value = None
304 if data_col != None:
305 try:
306
307 if line[data_col-1] == 'None':
308 value = None
309
310
311 else:
312 value = float(line[data_col-1])
313
314
315 if not isFinite(value):
316 warn(RelaxWarning("The value is not finite, skipping the line %s" % line))
317 continue
318
319
320 except ValueError:
321 warn(RelaxWarning("Invalid data, skipping the line %s" % line))
322 continue
323
324
325 error = None
326 if error_col != None:
327 try:
328
329 if line[error_col-1] == 'None':
330 error = None
331
332
333 else:
334 error = float(line[error_col-1])
335
336
337 if not isFinite(error):
338 warn(RelaxWarning("The error is not finite, skipping the line %s" % line))
339 continue
340
341
342 except ValueError:
343 warn(RelaxWarning("Invalid errors, skipping the line %s" % line))
344 continue
345
346
347 missing_data = False
348
349
350 if data_col and error_col:
351 yield mol_name, res_num, res_name, spin_num, spin_name, value, error
352 elif data_col:
353 yield mol_name, res_num, res_name, spin_num, spin_name, value
354 elif error_col:
355 yield mol_name, res_num, res_name, spin_num, spin_name, error
356 else:
357 yield mol_name, res_num, res_name, spin_num, spin_name
358
359
360 if raise_flag and missing_data:
361 raise RelaxError("No corresponding data could be found within the file.")
362
363
364 -def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
365 """Generator function for reading the spin specific data from file.
366
367 Description
368 ===========
369
370 This function writes a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
371
372
373 @param file: The name of the file to write the data to (or alternatively an already opened file object).
374 @type file: str or file object
375 @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
376 @type dir: str or None
377 @keyword sep: The column separator which, if None, defaults to whitespace.
378 @type sep: str or None
379 @keyword spin_ids: The list of spin ID strings.
380 @type spin_ids: None or list of str
381 @keyword mol_names: The list of molecule names.
382 @type mol_names: None or list of str
383 @keyword res_nums: The list of residue numbers.
384 @type res_nums: None or list of int
385 @keyword res_names: The list of residue names.
386 @type res_names: None or list of str
387 @keyword spin_nums: The list of spin numbers.
388 @type spin_nums: None or list of int
389 @keyword spin_names: The list of spin names.
390 @type spin_names: None or list of str
391 @keyword force: A flag which if True will cause an existing file to be overwritten.
392 @type force: bool
393 @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired.
394 @type data: list or list of lists
395 @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
396 @type data_name: str or list of str
397 @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object.
398 @type error: list or list of lists
399 @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
400 @type error_name: str or list of str
401 @keyword float_format: A float formatting string to use for the data and error whenever a float is found.
402 @type float_format: str
403 """
404
405
406 if data:
407
408 if isinstance(data[0], list):
409
410 if not isinstance(data_name, list):
411 raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)
412
413
414 if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
415 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
416
417
418 else:
419
420 if not isinstance(data_name, str):
421 raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)
422
423
424 if error and len(data) != len(error):
425 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
426
427
428 if error:
429
430 if isinstance(error[0], list):
431
432 if not isinstance(error_name, list):
433 raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)
434
435
436 else:
437
438 if not isinstance(error_name, str):
439 raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)
440
441
442 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
443 arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
444 N = None
445 first_arg = None
446 first_arg_name = None
447 for i in range(len(args)):
448 if isinstance(args[i], list):
449
450 if N == None:
451 N = len(args[i])
452 first_arg = args[i]
453 first_arg_name = arg_names[i]
454
455
456 if len(args[i]) != N:
457 raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))
458
459
460 if N == None:
461 raise RelaxError("No spin ID data is present.")
462
463
464 if data and len(data) != N:
465 raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
466 if error and len(error) != N:
467 raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))
468
469
470 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
471 arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']
472
473
474
475 headings = []
476 file_data = []
477
478
479 for i in range(len(args)):
480 if args[i]:
481 headings.append(arg_names[i])
482
483
484 if data:
485
486 if isinstance(data[0], list):
487
488 for i in range(len(data[0])):
489
490 headings.append(data_name[i])
491
492
493 if error:
494 headings.append(error_name[i])
495
496
497 else:
498
499 headings.append(data_name)
500
501
502 if error:
503 headings.append(error_name)
504
505
506 elif error:
507
508 if isinstance(error[0], list):
509 for i in range(len(error[0])):
510 headings.append(error_name[i])
511
512
513 else:
514 headings.append(error_name)
515
516
517 if headings == []:
518 headings = None
519
520
521 for spin_index in range(N):
522
523 file_data.append([])
524
525
526 for i in range(len(args)):
527 if args[i]:
528 value = args[i][spin_index]
529 if not isinstance(value, str):
530 value = repr(value)
531 file_data[-1].append(value)
532
533
534 if data:
535
536 if isinstance(data[0], list):
537
538 for i in range(len(data[0])):
539
540 if is_float(data[spin_index][i]):
541 file_data[-1].append(float_format % data[spin_index][i])
542 else:
543 file_data[-1].append(repr(data[spin_index][i]))
544
545
546 if error:
547 if is_float(error[spin_index][i]):
548 file_data[-1].append(float_format % error[spin_index][i])
549 else:
550 file_data[-1].append(repr(error[spin_index][i]))
551
552
553 else:
554
555 if is_float(data[spin_index]):
556 file_data[-1].append(float_format % data[spin_index])
557 else:
558 file_data[-1].append(repr(data[spin_index]))
559
560
561 if error:
562 if is_float(error[spin_index]):
563 file_data[-1].append(float_format % error[spin_index])
564 else:
565 file_data[-1].append(repr(error[spin_index]))
566
567
568 elif error:
569
570 if isinstance(error[0], list):
571 for i in range(len(error[0])):
572 file_data[-1].append(repr(error[spin_index][i]))
573
574
575 else:
576 file_data[-1].append(repr(error[spin_index]))
577
578
579 if file_data == [] or file_data == [[]]:
580 return
581
582
583 file = open_write_file(file_name=file, dir=dir, force=force)
584
585
586 write_data(out=file, headings=headings, data=file_data, sep=sep)
587