1 from __future__ import absolute_import
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for handling the molecule, residue, and spin sequence data."""
25
26
27 import sys
28 from warnings import warn
29
30
31 from lib.checks import Check
32 from lib.check_types import is_float
33 from lib.errors import RelaxError, RelaxInvalidSeqError
34 from lib.io import extract_data, open_write_file, strip, write_data
35 from lib.selection import spin_id_to_data_list
36 from lib.warnings import RelaxWarning, RelaxFileEmptyWarning
37
38
39
40 AA_CODES = {
41 "ALA": "A",
42 "ARG": "R",
43 "ASN": "N",
44 "ASP": "D",
45 "CYS": "C",
46 "GLU": "E",
47 "GLN": "Q",
48 "GLY": "G",
49 "HIS": "H",
50 "ILE": "I",
51 "LEU": "L",
52 "LYS": "K",
53 "MET": "M",
54 "PHE": "F",
55 "PRO": "P",
56 "SER": "S",
57 "THR": "T",
58 "TRP": "W",
59 "TYR": "Y",
60 "VAL": "V",
61 }
62
63
65 """Convert the given three letter amino acid code to the corresponding one letter code.
66
67 Any non-standard residues will be converted to '*'.
68
69
70 @param code: The three letter amino acid code to convert.
71 @type code: str
72 @return: The corresponding one letter amino acid code, or '*'.
73 @rtype: str
74 """
75
76
77 upper_code = code.upper()
78
79
80 if upper_code in AA_CODES:
81 return AA_CODES[upper_code]
82
83
84 return '*'
85
86
87 -def check_sequence_func(data, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None):
88 """Test if the sequence data is valid.
89
90 The only function this performs is to raise a RelaxError if the data is invalid.
91
92
93 @param data: The sequence data.
94 @type data: list of lists.
95 @keyword spin_id_col: The column containing the spin ID strings.
96 @type spin_id_col: int or None
97 @param mol_name_col: The column containing the molecule name information.
98 @type mol_name_col: int or None
99 @param res_name_col: The column containing the residue name information.
100 @type res_name_col: int or None
101 @param res_num_col: The column containing the residue number information.
102 @type res_num_col: int or None
103 @param spin_name_col: The column containing the spin name information.
104 @type spin_name_col: int or None
105 @param spin_num_col: The column containing the spin number information.
106 @type spin_num_col: int or None
107 """
108
109
110 if spin_id_col:
111 if len(data) < spin_id_col:
112 return RelaxInvalidSeqError(data, "the Spin ID data is missing")
113
114
115 if mol_name_col:
116 if len(data) < mol_name_col:
117 return RelaxInvalidSeqError(data, "the molecule name data is missing")
118
119
120 if res_num_col:
121
122 if len(data) < res_num_col:
123 return RelaxInvalidSeqError(data, "the residue number data is missing")
124
125
126 try:
127 res_num = eval(data[res_num_col-1])
128 if not (res_num == None or isinstance(res_num, int)):
129 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
130 except:
131 return RelaxInvalidSeqError(data, "the residue number data '%s' is invalid" % data[res_num_col-1])
132
133
134 if res_name_col:
135 if len(data) < res_name_col:
136 return RelaxInvalidSeqError(data, "the residue name data is missing")
137
138
139 if spin_num_col:
140
141 if len(data) < spin_num_col:
142 return RelaxInvalidSeqError(data, "the spin number data is missing")
143
144
145 try:
146 spin_num = eval(data[spin_num_col-1])
147 if not (spin_num == None or isinstance(spin_num, int)):
148 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
149 except:
150 return RelaxInvalidSeqError(data, "the spin number data '%s' is invalid" % data[spin_num_col-1])
151
152
153 if spin_name_col:
154 if len(data) < spin_name_col:
155 return RelaxInvalidSeqError(data, "the spin name data is missing")
156
157
158 if data_col:
159 if len(data) < data_col:
160 return RelaxInvalidSeqError(data, "the data is missing")
161
162
163 if error_col:
164 if len(data) < error_col:
165 return RelaxInvalidSeqError(data, "the error data is missing")
166
167
168 check_sequence = Check(check_sequence_func)
169
170
171 -def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, raise_flag=True):
172 """Generator function for reading the spin specific data from file.
173
174 Description
175 ===========
176
177 This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
178
179
180 @keyword file: The name of the file to open.
181 @type file: str
182 @keyword dir: The directory containing the file (defaults to the current directory if None).
183 @type dir: str or None
184 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
185 @type file_data: list of lists
186 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
187 @type spin_id_col: int or None
188 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
189 @type mol_name_col: int or None
190 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
191 @type res_name_col: int or None
192 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
193 @type res_num_col: int or None
194 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
195 @type spin_name_col: int or None
196 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
197 @type spin_num_col: int or None
198 @keyword data_col: The column containing the data.
199 @type data_col: int or None
200 @keyword error_col: The column containing the errors.
201 @type error_col: int or None
202 @keyword sep: The column separator which, if None, defaults to whitespace.
203 @type sep: str or None
204 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
205 @type spin_id: None or str
206 @keyword raise_flag: A flag which if True will cause a RelaxError to be raised if no data can be found.
207 @type raise_flag: bool
208 @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded.
209 @rtype: str, list of [str, float], or list of [str, float, float]
210 """
211
212
213 col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col]
214 col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col']
215 for i in range(len(col_args)):
216 if col_args[i] == 0:
217 raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i])
218 if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col):
219 raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.")
220
221
222 min_col_num = max([_f for _f in [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col] if _f])
223
224
225 if not file_data:
226
227 file_data = extract_data(file, dir, sep=sep)
228
229
230 if spin_id_col != None:
231 file_data = strip(file_data, comments=False)
232 else:
233 file_data = strip(file_data)
234
235
236 if not file_data:
237 warn(RelaxFileEmptyWarning(file))
238 return
239
240
241 missing_data = True
242 for line in file_data:
243
244 if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]:
245 line[spin_id_col-1] = eval(line[spin_id_col-1])
246
247
248
249 if not check_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, escalate=1):
250 continue
251
252
253 if spin_id_col:
254
255 if line[spin_id_col-1] == '#':
256 warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line))
257 continue
258
259 mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1])
260
261
262 else:
263
264 mol_name = None
265 if mol_name_col != None and line[mol_name_col-1] != 'None':
266 mol_name = line[mol_name_col-1]
267
268
269 res_num = None
270 if res_num_col != None:
271 try:
272 if line[res_num_col-1] == 'None':
273 res_num = None
274 else:
275 res_num = int(line[res_num_col-1])
276 except ValueError:
277 warn(RelaxWarning("Invalid residue number, skipping the line %s" % line))
278 continue
279
280
281 res_name = None
282 if res_name_col != None and line[res_name_col-1] != 'None':
283 res_name = line[res_name_col-1]
284
285
286 spin_num = None
287 if spin_num_col != None:
288 try:
289 if line[spin_num_col-1] == 'None':
290 spin_num = None
291 else:
292 spin_num = int(line[spin_num_col-1])
293 except ValueError:
294 warn(RelaxWarning("Invalid spin number, skipping the line %s" % line))
295 continue
296
297
298 spin_name = None
299 if spin_name_col != None and line[spin_name_col-1] != 'None':
300 spin_name = line[spin_name_col-1]
301
302
303 value = None
304 if data_col != None:
305 try:
306
307 if line[data_col-1] == 'None':
308 value = None
309
310
311 else:
312 value = float(line[data_col-1])
313
314
315 except ValueError:
316 warn(RelaxWarning("Invalid data, skipping the line %s" % line))
317 continue
318
319
320 error = None
321 if error_col != None:
322 try:
323
324 if line[error_col-1] == 'None':
325 error = None
326
327
328 else:
329 error = float(line[error_col-1])
330
331
332 except ValueError:
333 warn(RelaxWarning("Invalid errors, skipping the line %s" % line))
334 continue
335
336
337 missing_data = False
338
339
340 if data_col and error_col:
341 yield mol_name, res_num, res_name, spin_num, spin_name, value, error
342 elif data_col:
343 yield mol_name, res_num, res_name, spin_num, spin_name, value
344 elif error_col:
345 yield mol_name, res_num, res_name, spin_num, spin_name, error
346 else:
347 yield mol_name, res_num, res_name, spin_num, spin_name
348
349
350 if raise_flag and missing_data:
351 raise RelaxError("No corresponding data could be found within the file.")
352
353
354 -def write_spin_data(file, dir=None, sep=None, spin_ids=None, mol_names=None, res_nums=None, res_names=None, spin_nums=None, spin_names=None, force=False, data=None, data_name=None, error=None, error_name=None, float_format="%20.15g"):
355 """Generator function for reading the spin specific data from file.
356
357 Description
358 ===========
359
360 This function writes a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number.
361
362
363 @param file: The name of the file to write the data to (or alternatively an already opened file object).
364 @type file: str or file object
365 @keyword dir: The directory to place the file into (defaults to the current directory if None and the file argument is not a file object).
366 @type dir: str or None
367 @keyword sep: The column separator which, if None, defaults to whitespace.
368 @type sep: str or None
369 @keyword spin_ids: The list of spin ID strings.
370 @type spin_ids: None or list of str
371 @keyword mol_names: The list of molecule names.
372 @type mol_names: None or list of str
373 @keyword res_nums: The list of residue numbers.
374 @type res_nums: None or list of int
375 @keyword res_names: The list of residue names.
376 @type res_names: None or list of str
377 @keyword spin_nums: The list of spin numbers.
378 @type spin_nums: None or list of int
379 @keyword spin_names: The list of spin names.
380 @type spin_names: None or list of str
381 @keyword force: A flag which if True will cause an existing file to be overwritten.
382 @type force: bool
383 @keyword data: A list of the data to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired.
384 @type data: list or list of lists
385 @keyword data_name: A name corresponding to the data argument. If the data argument is a list of lists, then this must also be a list with the same length as the second dimension of the data arg.
386 @type data_name: str or list of str
387 @keyword error: A list of the errors to write out. The first dimension corresponds to the spins. A second dimension can also be given if multiple data sets across multiple columns are desired. These will be inter-dispersed between the data columns, if the data is given. If the data arg is not None, then this must have the same dimensions as that object.
388 @type error: list or list of lists
389 @keyword error_name: A name corresponding to the error argument. If the error argument is a list of lists, then this must also be a list with the same length at the second dimension of the error arg.
390 @type error_name: str or list of str
391 @keyword float_format: A float formatting string to use for the data and error whenever a float is found.
392 @type float_format: str
393 """
394
395
396 if data:
397
398 if isinstance(data[0], list):
399
400 if not isinstance(data_name, list):
401 raise RelaxError("The data_name arg '%s' must be a list as the data argument is a list of lists." % data_name)
402
403
404 if error and (len(data) != len(error) or len(data[0]) != len(error[0])):
405 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
406
407
408 else:
409
410 if not isinstance(data_name, str):
411 raise RelaxError("The data_name arg '%s' must be a string as the data argument is a simple list." % data_name)
412
413
414 if error and len(data) != len(error):
415 raise RelaxError("The data arg:\n%s\n\ndoes not have the same dimensions as the error arg:\n%s." % (data, error))
416
417
418 if error:
419
420 if isinstance(error[0], list):
421
422 if not isinstance(error_name, list):
423 raise RelaxError("The error_name arg '%s' must be a list as the error argument is a list of lists." % error_name)
424
425
426 else:
427
428 if not isinstance(error_name, str):
429 raise RelaxError("The error_name arg '%s' must be a string as the error argument is a simple list." % error_name)
430
431
432 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
433 arg_names = ['spin_ids', 'mol_names', 'res_nums', 'res_names', 'spin_nums', 'spin_names']
434 N = None
435 first_arg = None
436 first_arg_name = None
437 for i in range(len(args)):
438 if isinstance(args[i], list):
439
440 if N == None:
441 N = len(args[i])
442 first_arg = args[i]
443 first_arg_name = arg_names[i]
444
445
446 if len(args[i]) != N:
447 raise RelaxError("The %s and %s arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, arg_names[i], len(first_arg), len(args[i])))
448
449
450 if N == None:
451 raise RelaxError("No spin ID data is present.")
452
453
454 if data and len(data) != N:
455 raise RelaxError("The %s and data arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(data)))
456 if error and len(error) != N:
457 raise RelaxError("The %s and error arguments do not have the same number of spins ('%s' vs. '%s' respectively)." % (first_arg_name, len(first_arg), len(error)))
458
459
460 args = [spin_ids, mol_names, res_nums, res_names, spin_nums, spin_names]
461 arg_names = ['spin_id', 'mol_name', 'res_num', 'res_name', 'spin_num', 'spin_name']
462
463
464
465 headings = []
466 file_data = []
467
468
469 for i in range(len(args)):
470 if args[i]:
471 headings.append(arg_names[i])
472
473
474 if data:
475
476 if isinstance(data[0], list):
477
478 for i in range(len(data[0])):
479
480 headings.append(data_name[i])
481
482
483 if error:
484 headings.append(error_name[i])
485
486
487 else:
488
489 headings.append(data_name)
490
491
492 if error:
493 headings.append(error_name)
494
495
496 elif error:
497
498 if isinstance(error[0], list):
499 for i in range(len(error[0])):
500 headings.append(error_name[i])
501
502
503 else:
504 headings.append(error_name)
505
506
507 if headings == []:
508 headings = None
509
510
511 for spin_index in range(N):
512
513 file_data.append([])
514
515
516 for i in range(len(args)):
517 if args[i]:
518 value = args[i][spin_index]
519 if not isinstance(value, str):
520 value = repr(value)
521 file_data[-1].append(value)
522
523
524 if data:
525
526 if isinstance(data[0], list):
527
528 for i in range(len(data[0])):
529
530 if is_float(data[spin_index][i]):
531 file_data[-1].append(float_format % data[spin_index][i])
532 else:
533 file_data[-1].append(repr(data[spin_index][i]))
534
535
536 if error:
537 if is_float(error[spin_index][i]):
538 file_data[-1].append(float_format % error[spin_index][i])
539 else:
540 file_data[-1].append(repr(error[spin_index][i]))
541
542
543 else:
544
545 if is_float(data[spin_index]):
546 file_data[-1].append(float_format % data[spin_index])
547 else:
548 file_data[-1].append(repr(data[spin_index]))
549
550
551 if error:
552 if is_float(error[spin_index]):
553 file_data[-1].append(float_format % error[spin_index])
554 else:
555 file_data[-1].append(repr(error[spin_index]))
556
557
558 elif error:
559
560 if isinstance(error[0], list):
561 for i in range(len(error[0])):
562 file_data[-1].append(repr(error[spin_index][i]))
563
564
565 else:
566 file_data[-1].append(repr(error[spin_index]))
567
568
569 if file_data == [] or file_data == [[]]:
570 return
571
572
573 file = open_write_file(file_name=file, dir=dir, force=force)
574
575
576 write_data(out=file, headings=headings, data=file_data, sep=sep)
577