1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for the manipulation of RDC data."""
25
26
27 from copy import deepcopy
28 from math import pi, sqrt
29 from numpy import float64, ones, zeros
30 from numpy.linalg import norm
31 import sys
32 from warnings import warn
33
34
35 from float import nan
36 from generic_fns import grace, pipes
37 from generic_fns.align_tensor import get_tensor_index
38 from generic_fns.mol_res_spin import exists_mol_res_spin_data, return_spin, spin_loop
39 from maths_fns.rdc import ave_rdc_tensor
40 from physical_constants import dipolar_constant, return_gyromagnetic_ratio
41 from relax_errors import RelaxError, RelaxNoRDCError, RelaxNoSequenceError, RelaxSpinTypeError
42 from relax_io import open_write_file, read_spin_data, write_spin_data
43 from relax_warnings import RelaxWarning, RelaxNoSpinWarning
44
45
47 """Back calculate the RDC from the alignment tensor and unit bond vectors.
48
49 @keyword align_id: The alignment tensor ID string.
50 @type align_id: str
51 """
52
53
54 if align_id and align_id not in cdp.align_ids:
55 raise RelaxError, "The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids)
56
57
58 if align_id:
59 align_ids = [align_id]
60 else:
61 align_ids = cdp.align_ids
62
63
64 for align_id in align_ids:
65
66 if not hasattr(cdp, 'rdc_ids'):
67 cdp.rdc_ids = []
68
69
70 if align_id not in cdp.rdc_ids:
71 cdp.rdc_ids.append(align_id)
72
73
74 weights = ones(cdp.N, float64) / cdp.N
75
76
77 unit_vect = zeros((cdp.N, 3), float64)
78
79
80 for spin in spin_loop():
81
82 if not hasattr(spin, 'bond_vect') and not hasattr(spin, 'xh_vect'):
83 continue
84
85
86 if not hasattr(spin, 'heteronuc_type'):
87 raise RelaxSpinTypeError
88
89
90 if hasattr(spin, 'bond_vect'):
91 vectors = spin.bond_vect
92 else:
93 vectors = spin.xh_vect
94
95
96 if type(vectors[0]) in [float, float64]:
97 vectors = [vectors]
98
99
100 gx = return_gyromagnetic_ratio(spin.heteronuc_type)
101 gh = return_gyromagnetic_ratio(spin.proton_type)
102
103
104 dj = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r)
105
106
107 for c in range(cdp.N):
108 unit_vect[c] = vectors[c] / norm(vectors[c])
109
110
111 if not hasattr(spin, 'rdc_bc'):
112 spin.rdc_bc = {}
113
114
115 for id in align_ids:
116 spin.rdc_bc[id] = ave_rdc_tensor(dj, unit_vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights)
117
118
119 -def convert(value, align_id, to_intern=False):
120 """Convert the RDC based on the 'D' or '2D' data type.
121
122 @param value: The value or error to convert.
123 @type value: float or None
124 @param align_id: The alignment tensor ID string.
125 @type align_id: str
126 @keyword to_intern: A flag which if True will convert to the internal D notation if needed, or if False will convert from the internal D notation to the external D or 2D format.
127 @type to_intern: bool
128 @return: The converted value.
129 @rtype: float or None
130 """
131
132
133 if value == None:
134 return None
135
136
137 factor = 1.0
138 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(align_id) and cdp.rdc_data_types[align_id] == '2D':
139
140 if to_intern:
141 factor = 2.0
142
143
144 else:
145 factor = 0.5
146
147
148 return value * factor
149
150
151 -def corr_plot(format=None, file=None, dir=None, force=False):
152 """Generate a correlation plot of the measured vs. back-calculated RDCs.
153
154 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file.
155 @type format: str or None
156 @keyword file: The file name or object to write to.
157 @type file: str or file object
158 @keyword dir: The name of the directory to place the file into (defaults to the current directory).
159 @type dir: str
160 @keyword force: A flag which if True will cause any pre-existing file to be overwritten.
161 @type force: bool
162 """
163
164
165 pipes.test()
166
167
168 if not exists_mol_res_spin_data():
169 raise RelaxNoSequenceError
170
171
172 if not hasattr(cdp, 'rdc_ids') or not cdp.rdc_ids:
173 warn(RelaxWarning("No RDC data exists, skipping file creation."))
174 return
175
176
177 file = open_write_file(file, dir, force)
178
179
180 data = []
181
182
183 data.append([[-100, -100, 0], [100, 100, 0]])
184
185
186 for align_id in cdp.rdc_ids:
187
188 data.append([])
189
190
191 err_flag = False
192 for spin in spin_loop():
193
194 if not spin.select:
195 continue
196
197
198 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
199 err_flag = True
200 break
201
202
203 for spin, spin_id in spin_loop(return_id=True):
204
205 if not spin.select:
206 continue
207
208
209 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not align_id in spin.rdc.keys() or not align_id in spin.rdc_bc.keys():
210 continue
211
212
213 data[-1].append([convert(spin.rdc_bc[align_id], align_id), convert(spin.rdc[align_id], align_id)])
214
215
216 if err_flag:
217 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
218 data[-1][-1].append(convert(spin.rdc_err[align_id], align_id))
219 else:
220 data[-1][-1].append(None)
221
222
223 data[-1][-1].append(spin_id)
224
225
226 size = len(data)
227
228
229 data = [data]
230
231
232 if err_flag:
233 graph_type = 'xydy'
234 else:
235 graph_type = 'xy'
236
237
238 if format == 'grace':
239
240 grace.write_xy_header(file=file, title="RDC correlation plot", sets=size, set_names=[None]+cdp.rdc_ids, linestyle=[2]+[0]*size, data_type=['rdc_bc', 'rdc'], axis_min=[-10, -10], axis_max=[10, 10], legend_pos=[1, 0.5])
241
242
243 grace.write_xy_data(data=data, file=file, graph_type=graph_type)
244
245
247 """Delete the RDC data corresponding to the alignment ID.
248
249 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted.
250 @type align_id: str or None
251 """
252
253
254 pipes.test()
255
256
257 if not exists_mol_res_spin_data():
258 raise RelaxNoSequenceError
259
260
261 if align_id and not align_id in cdp.rdc_ids:
262 raise RelaxError("The RDC alignment id '%s' does not exist" % align_id)
263
264
265 if not align_id:
266 ids = deepcopy(cdp.rdc_ids)
267 else:
268 ids = [align_id]
269
270
271 for id in ids:
272
273 cdp.rdc_ids.pop(cdp.rdc_ids.index(id))
274
275
276 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(id):
277 cdp.rdc_data_types.pop(id)
278
279
280 for spin in spin_loop():
281
282 if hasattr(spin, 'rdc') and spin.rdc.has_key(id):
283 spin.rdc.pop(id)
284
285
286 if hasattr(spin, 'rdc_err') and spin.rdc_err.has_key(id):
287 spin.rdc_err.pop(id)
288
289
290 if not hasattr(cdp, 'pcs_ids') or id not in cdp.pcs_ids:
291 cdp.align_ids.pop(id)
292
293
294 -def display(align_id=None, bc=False):
295 """Display the RDC data corresponding to the alignment ID.
296
297 @keyword align_id: The alignment tensor ID string.
298 @type align_id: str
299 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed.
300 @type bc: bool
301 """
302
303
304 write(align_id=align_id, file=sys.stdout, bc=bc)
305
306
308 """Calculate the Q-factors for the RDC data.
309
310 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins.
311 @type spin_id: None or str
312 """
313
314
315 if not hasattr(cdp, 'rdc_ids') or not len(cdp.rdc_ids):
316 warn(RelaxWarning("No RDC data exists, Q factors cannot be calculated."))
317 return
318
319
320 cdp.q_factors_rdc = {}
321 cdp.q_factors_rdc_norm2 = {}
322
323
324 for align_id in cdp.rdc_ids:
325
326 D2_sum = 0.0
327 sse = 0.0
328
329
330 dj = None
331 N = 0
332 spin_count = 0
333 rdc_data = False
334 rdc_bc_data = False
335 for spin in spin_loop(spin_id):
336
337 if not spin.select:
338 continue
339
340
341 spin_count += 1
342
343
344 if hasattr(spin, 'rdc') and spin.rdc.has_key(align_id):
345 rdc_data = True
346 if hasattr(spin, 'rdc_bc') and spin.rdc_bc.has_key(align_id):
347 rdc_bc_data = True
348
349
350 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not spin.rdc.has_key(align_id) or spin.rdc[align_id] == None or not spin.rdc_bc.has_key(align_id) or spin.rdc_bc[align_id] == None:
351 continue
352
353
354 sse = sse + (spin.rdc[align_id] - spin.rdc_bc[align_id])**2
355
356
357 D2_sum = D2_sum + spin.rdc[align_id]**2
358
359
360 gx = return_gyromagnetic_ratio(spin.heteronuc_type)
361 gh = return_gyromagnetic_ratio(spin.proton_type)
362
363
364 dj_new = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r)
365 if dj and dj_new != dj:
366 raise RelaxError("All the RDCs must come from the same nucleus type.")
367 else:
368 dj = dj_new
369
370
371 N = N + 1
372
373
374 if not spin_count:
375 warn(RelaxWarning("No spins have been used in the calculation."))
376 return
377 if not rdc_data:
378 warn(RelaxWarning("No RDC data can be found."))
379 return
380 if not rdc_bc_data:
381 warn(RelaxWarning("No back-calculated RDC data can be found."))
382 return
383
384
385 D = dj * cdp.align_tensors[cdp.align_ids.index(align_id)].A_diag
386 Da = 1.0/3.0 * (D[2, 2] - (D[0, 0]+D[1, 1])/2.0)
387 Dr = 1.0/3.0 * (D[0, 0] - D[1, 1])
388 if Da == 0:
389 R = nan
390 else:
391 R = Dr / Da
392 norm = 2.0 * (Da)**2 * (4.0 + 3.0*R**2)/5.0
393 if Da == 0.0:
394 norm = 1e-15
395
396
397 Q = sqrt(sse / N / norm)
398 cdp.q_factors_rdc[align_id] = Q
399 cdp.q_factors_rdc_norm2[align_id] = sqrt(sse / D2_sum)
400
401
402 cdp.q_rdc = 0.0
403 cdp.q_rdc_norm2 = 0.0
404 for id in cdp.q_factors_rdc:
405 cdp.q_rdc = cdp.q_rdc + cdp.q_factors_rdc[id]**2
406 for id in cdp.q_factors_rdc_norm2:
407 cdp.q_rdc_norm2 = cdp.q_rdc_norm2 + cdp.q_factors_rdc_norm2[id]**2
408 cdp.q_rdc = sqrt(cdp.q_rdc / len(cdp.q_factors_rdc))
409 cdp.q_rdc_norm2 = sqrt(cdp.q_rdc_norm2 / len(cdp.q_factors_rdc_norm2))
410
411
412 -def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, neg_g_corr=False):
413 """Read the RDC data from file.
414
415 @keyword align_id: The alignment tensor ID string.
416 @type align_id: str
417 @keyword file: The name of the file to open.
418 @type file: str
419 @keyword dir: The directory containing the file (defaults to the current directory if None).
420 @type dir: str or None
421 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
422 @type file_data: list of lists
423 @keyword data_type: A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D.
424 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
425 @type spin_id_col: int or None
426 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
427 @type mol_name_col: int or None
428 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
429 @type res_name_col: int or None
430 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
431 @type res_num_col: int or None
432 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
433 @type spin_name_col: int or None
434 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
435 @type spin_num_col: int or None
436 @keyword data_col: The column containing the RDC data in Hz.
437 @type data_col: int or None
438 @keyword error_col: The column containing the RDC errors.
439 @type error_col: int or None
440 @keyword sep: The column separator which, if None, defaults to whitespace.
441 @type sep: str or None
442 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
443 @type spin_id: None or str
444 @keyword neg_g_corr: A flag which is used to correct for the negative gyromagnetic ratio of 15N. If True, a sign inversion will be applied to all RDC values to be loaded.
445 @type neg_g_corr: bool
446 """
447
448
449 pipes.test()
450
451
452 if not exists_mol_res_spin_data():
453 raise RelaxNoSequenceError
454
455
456 if data_col == None and error_col == None:
457 raise RelaxError("One of either the data or error column must be supplied.")
458
459
460 if not hasattr(cdp, 'rdc_data_types'):
461 cdp.rdc_data_types = {}
462 if not cdp.rdc_data_types.has_key(align_id):
463 cdp.rdc_data_types[align_id] = data_type
464
465
466
467
468
469 spin_ids = []
470 values = []
471 errors = []
472 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id):
473
474 if data_col and error_col:
475 id, value, error = data
476 elif data_col:
477 id, value = data
478 error = None
479 else:
480 id, error = data
481 value = None
482
483
484 if error == 0.0:
485 raise RelaxError("An invalid error value of zero has been encountered.")
486
487
488 spin = return_spin(id)
489 if spin == None:
490 warn(RelaxNoSpinWarning(id))
491 continue
492
493
494 if data_col:
495
496 if not hasattr(spin, 'rdc'):
497 spin.rdc = {}
498
499
500 value = convert(value, align_id, to_intern=True)
501
502
503 if neg_g_corr and value != None:
504 value = -value
505
506
507 spin.rdc[align_id] = value
508
509
510 if error_col:
511
512 if not hasattr(spin, 'rdc_err'):
513 spin.rdc_err = {}
514
515
516 error = convert(error, align_id, to_intern=True)
517
518
519 spin.rdc_err[align_id] = error
520
521
522 spin_ids.append(id)
523 values.append(value)
524 errors.append(error)
525
526
527 write_spin_data(file=sys.stdout, spin_ids=spin_ids, data=values, data_name='RDCs', error=errors, error_name='RDC_error')
528
529
530
531
532
533
534 if not len(values):
535 return
536
537
538 if not hasattr(cdp, 'align_ids'):
539 cdp.align_ids = []
540 if not hasattr(cdp, 'rdc_ids'):
541 cdp.rdc_ids = []
542
543
544 if align_id not in cdp.align_ids:
545 cdp.align_ids.append(align_id)
546 if align_id not in cdp.rdc_ids:
547 cdp.rdc_ids.append(align_id)
548
549
550 -def weight(align_id=None, spin_id=None, weight=1.0):
551 """Set optimisation weights on the RDC data.
552
553 @keyword align_id: The alignment tensor ID string.
554 @type align_id: str
555 @keyword spin_id: The spin ID string.
556 @type spin_id: None or str
557 @keyword weight: The optimisation weight. The higher the value, the more importance the RDC will have.
558 @type weight: float or int.
559 """
560
561
562 if not exists_mol_res_spin_data():
563 raise RelaxNoSequenceError
564
565
566 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids:
567 raise RelaxNoRDCError(align_id)
568
569
570 for spin in spin_loop(spin_id):
571
572 if not hasattr(spin, 'rdc_weight'):
573 spin.rdc_weight = {}
574
575
576 spin.rdc_weight[align_id] = weight
577
578
579 -def write(align_id=None, file=None, dir=None, bc=False, force=False):
580 """Display the RDC data corresponding to the alignment ID.
581
582 @keyword align_id: The alignment tensor ID string.
583 @type align_id: str
584 @keyword file: The file name or object to write to.
585 @type file: str or file object
586 @keyword dir: The name of the directory to place the file into (defaults to the current directory).
587 @type dir: str
588 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written.
589 @type bc: bool
590 @keyword force: A flag which if True will cause any pre-existing file to be overwritten.
591 @type force: bool
592 """
593
594
595 pipes.test()
596
597
598 if not exists_mol_res_spin_data():
599 raise RelaxNoSequenceError
600
601
602 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids:
603 raise RelaxNoRDCError(align_id)
604
605
606 file = open_write_file(file, dir, force)
607
608
609 mol_names = []
610 res_nums = []
611 res_names = []
612 spin_nums = []
613 spin_names = []
614 values = []
615 errors = []
616 for spin, mol_name, res_num, res_name in spin_loop(full_info=True):
617
618 if not bc and (not hasattr(spin, 'rdc') or align_id not in spin.rdc.keys()):
619 continue
620 elif bc and (not hasattr(spin, 'rdc_bc') or align_id not in spin.rdc_bc.keys()):
621 continue
622
623
624 mol_names.append(mol_name)
625 res_nums.append(res_num)
626 res_names.append(res_name)
627 spin_nums.append(spin.num)
628 spin_names.append(spin.name)
629
630
631 if bc:
632 values.append(convert(spin.rdc_bc[align_id], align_id))
633 else:
634 values.append(convert(spin.rdc[align_id], align_id))
635
636
637 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
638 errors.append(convert(spin.rdc_err[align_id], align_id))
639 else:
640 errors.append(None)
641
642
643 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='RDCs', error=errors, error_name='RDC_error')
644