1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for the manipulation of RDC data."""
25
26
27 from copy import deepcopy
28 from math import pi, sqrt
29 from numpy import float64, ones, zeros
30 from numpy.linalg import norm
31 import sys
32 from warnings import warn
33
34
35 from float import nan
36 from generic_fns import grace, pipes
37 from generic_fns.align_tensor import get_tensor_index
38 from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, return_spin, spin_loop
39 from maths_fns.rdc import ave_rdc_tensor
40 from physical_constants import dipolar_constant, return_gyromagnetic_ratio
41 from relax_errors import RelaxError, RelaxNoRDCError, RelaxNoSequenceError, RelaxSpinTypeError
42 from relax_io import open_write_file, read_spin_data, write_spin_data
43 from relax_warnings import RelaxWarning, RelaxNoSpinWarning
44
45
47 """Back calculate the RDC from the alignment tensor and unit bond vectors.
48
49 @keyword align_id: The alignment tensor ID string.
50 @type align_id: str
51 """
52
53
54 if align_id and align_id not in cdp.align_ids:
55 raise RelaxError, "The alignment ID '%s' is not in the alignment ID list %s." % (align_id, cdp.align_ids)
56
57
58 if align_id:
59 align_ids = [align_id]
60 else:
61 align_ids = cdp.align_ids
62
63
64 for align_id in align_ids:
65
66 if not hasattr(cdp, 'rdc_ids'):
67 cdp.rdc_ids = []
68
69
70 if align_id not in cdp.rdc_ids:
71 cdp.rdc_ids.append(align_id)
72
73
74 weights = ones(cdp.N, float64) / cdp.N
75
76
77 unit_vect = zeros((cdp.N, 3), float64)
78
79
80 for spin in spin_loop():
81
82 if not hasattr(spin, 'bond_vect') and not hasattr(spin, 'xh_vect'):
83 continue
84
85
86 if not hasattr(spin, 'heteronuc_type'):
87 raise RelaxSpinTypeError
88
89
90 if hasattr(spin, 'bond_vect'):
91 vectors = spin.bond_vect
92 else:
93 vectors = spin.xh_vect
94
95
96 if type(vectors[0]) in [float, float64]:
97 vectors = [vectors]
98
99
100 gx = return_gyromagnetic_ratio(spin.heteronuc_type)
101 gh = return_gyromagnetic_ratio(spin.proton_type)
102
103
104 dj = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r)
105
106
107 for c in range(cdp.N):
108 unit_vect[c] = vectors[c] / norm(vectors[c])
109
110
111 if not hasattr(spin, 'rdc_bc'):
112 spin.rdc_bc = {}
113
114
115 for id in align_ids:
116 spin.rdc_bc[id] = ave_rdc_tensor(dj, unit_vect, cdp.N, cdp.align_tensors[get_tensor_index(id)].A, weights=weights)
117
118
119 -def convert(value, align_id, to_intern=False):
120 """Convert the RDC based on the 'D' or '2D' data type.
121
122 @param value: The value or error to convert.
123 @type value: float or None
124 @param align_id: The alignment tensor ID string.
125 @type align_id: str
126 @keyword to_intern: A flag which if True will convert to the internal D notation if needed, or if False will convert from the internal D notation to the external D or 2D format.
127 @type to_intern: bool
128 @return: The converted value.
129 @rtype: float or None
130 """
131
132
133 if value == None:
134 return None
135
136
137 factor = 1.0
138 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(align_id) and cdp.rdc_data_types[align_id] == '2D':
139
140 if to_intern:
141 factor = 2.0
142
143
144 else:
145 factor = 0.5
146
147
148 return value * factor
149
150
151 -def corr_plot(format=None, file=None, dir=None, force=False):
152 """Generate a correlation plot of the measured vs. back-calculated RDCs.
153
154 @keyword format: The format for the plot file. The following values are accepted: 'grace', a Grace plot; None, a plain text file.
155 @type format: str or None
156 @keyword file: The file name or object to write to.
157 @type file: str or file object
158 @keyword dir: The name of the directory to place the file into (defaults to the current directory).
159 @type dir: str
160 @keyword force: A flag which if True will cause any pre-existing file to be overwritten.
161 @type force: bool
162 """
163
164
165 pipes.test()
166
167
168 if not exists_mol_res_spin_data():
169 raise RelaxNoSequenceError
170
171
172 if not hasattr(cdp, 'rdc_ids') or not cdp.rdc_ids:
173 warn(RelaxWarning("No RDC data exists, skipping file creation."))
174 return
175
176
177 file = open_write_file(file, dir, force)
178
179
180 data = []
181
182
183 data.append([[-100, -100, 0], [100, 100, 0]])
184
185
186 for align_id in cdp.rdc_ids:
187
188 data.append([])
189
190
191 err_flag = False
192 for spin in spin_loop():
193
194 if not spin.select:
195 continue
196
197
198 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
199 err_flag = True
200 break
201
202
203 for spin, spin_id in spin_loop(return_id=True):
204
205 if not spin.select:
206 continue
207
208
209 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not align_id in spin.rdc.keys() or not align_id in spin.rdc_bc.keys():
210 continue
211
212
213 data[-1].append([convert(spin.rdc_bc[align_id], align_id), convert(spin.rdc[align_id], align_id)])
214
215
216 if err_flag:
217 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
218 data[-1][-1].append(convert(spin.rdc_err[align_id], align_id))
219 else:
220 data[-1][-1].append(None)
221
222
223 data[-1][-1].append(spin_id)
224
225
226 size = len(data)
227
228
229 data = [data]
230
231
232 if err_flag:
233 graph_type = 'xydy'
234 else:
235 graph_type = 'xy'
236
237
238 if format == 'grace':
239
240 grace.write_xy_header(file=file, title="RDC correlation plot", sets=size, set_names=[None]+cdp.rdc_ids, linestyle=[2]+[0]*size, data_type=['rdc_bc', 'rdc'], axis_min=[-10, -10], axis_max=[10, 10], legend_pos=[1, 0.5])
241
242
243 grace.write_xy_data(data=data, file=file, graph_type=graph_type)
244
245
247 """Delete the RDC data corresponding to the alignment ID.
248
249 @keyword align_id: The alignment tensor ID string. If not specified, all data will be deleted.
250 @type align_id: str or None
251 """
252
253
254 pipes.test()
255
256
257 if not exists_mol_res_spin_data():
258 raise RelaxNoSequenceError
259
260
261 if align_id and not align_id in cdp.rdc_ids:
262 raise RelaxError("The RDC alignment id '%s' does not exist" % align_id)
263
264
265 if not align_id:
266 ids = deepcopy(cdp.rdc_ids)
267 else:
268 ids = [align_id]
269
270
271 for id in ids:
272
273 cdp.rdc_ids.pop(cdp.rdc_ids.index(id))
274
275
276 if hasattr(cdp, 'rdc_data_types') and cdp.rdc_data_types.has_key(id):
277 cdp.rdc_data_types.pop(id)
278
279
280 for spin in spin_loop():
281
282 if hasattr(spin, 'rdc') and spin.rdc.has_key(id):
283 spin.rdc.pop(id)
284
285
286 if hasattr(spin, 'rdc_err') and spin.rdc_err.has_key(id):
287 spin.rdc_err.pop(id)
288
289
290 if not hasattr(cdp, 'pcs_ids') or id not in cdp.pcs_ids:
291 cdp.align_ids.pop(id)
292
293
294 -def display(align_id=None, bc=False):
295 """Display the RDC data corresponding to the alignment ID.
296
297 @keyword align_id: The alignment tensor ID string.
298 @type align_id: str
299 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be displayed.
300 @type bc: bool
301 """
302
303
304 write(align_id=align_id, file=sys.stdout, bc=bc)
305
306
308 """Calculate the Q-factors for the RDC data.
309
310 @keyword spin_id: The spin ID string used to restrict the Q-factor calculation to a subset of all spins.
311 @type spin_id: None or str
312 """
313
314
315 if not hasattr(cdp, 'rdc_ids') or not len(cdp.rdc_ids):
316 warn(RelaxWarning("No RDC data exists, Q factors cannot be calculated."))
317 return
318
319
320 cdp.q_factors_rdc = {}
321 cdp.q_factors_rdc_norm2 = {}
322
323
324 for align_id in cdp.rdc_ids:
325
326 D2_sum = 0.0
327 sse = 0.0
328
329
330 dj = None
331 N = 0
332 spin_count = 0
333 rdc_data = False
334 rdc_bc_data = False
335 for spin in spin_loop(spin_id):
336
337 if not spin.select:
338 continue
339
340
341 spin_count += 1
342
343
344 if hasattr(spin, 'rdc') and spin.rdc.has_key(align_id):
345 rdc_data = True
346 if hasattr(spin, 'rdc_bc') and spin.rdc_bc.has_key(align_id):
347 rdc_bc_data = True
348
349
350 if not hasattr(spin, 'rdc') or not hasattr(spin, 'rdc_bc') or not spin.rdc.has_key(align_id) or spin.rdc[align_id] == None or not spin.rdc_bc.has_key(align_id) or spin.rdc_bc[align_id] == None:
351 continue
352
353
354 sse = sse + (spin.rdc[align_id] - spin.rdc_bc[align_id])**2
355
356
357 D2_sum = D2_sum + spin.rdc[align_id]**2
358
359
360 gx = return_gyromagnetic_ratio(spin.heteronuc_type)
361 gh = return_gyromagnetic_ratio(spin.proton_type)
362
363
364 dj_new = 3.0/(2.0*pi) * dipolar_constant(gx, gh, spin.r)
365 if dj and dj_new != dj:
366 raise RelaxError("All the RDCs must come from the same nucleus type.")
367 else:
368 dj = dj_new
369
370
371 N = N + 1
372
373
374 if not spin_count:
375 warn(RelaxWarning("No spins have been used in the calculation."))
376 return
377 if not rdc_data:
378 warn(RelaxWarning("No RDC data can be found."))
379 return
380 if not rdc_bc_data:
381 warn(RelaxWarning("No back-calculated RDC data can be found."))
382 return
383
384
385 D = dj * cdp.align_tensors[cdp.align_ids.index(align_id)].A_diag
386 Da = 1.0/3.0 * (D[2, 2] - (D[0, 0]+D[1, 1])/2.0)
387 Dr = 1.0/3.0 * (D[0, 0] - D[1, 1])
388 if Da == 0:
389 R = nan
390 else:
391 R = Dr / Da
392 norm = 2.0 * (Da)**2 * (4.0 + 3.0*R**2)/5.0
393 if Da == 0.0:
394 norm = 1e-15
395
396
397 Q = sqrt(sse / N / norm)
398 cdp.q_factors_rdc[align_id] = Q
399 cdp.q_factors_rdc_norm2[align_id] = sqrt(sse / D2_sum)
400
401
402 cdp.q_rdc = 0.0
403 cdp.q_rdc_norm2 = 0.0
404 for id in cdp.q_factors_rdc:
405 cdp.q_rdc = cdp.q_rdc + cdp.q_factors_rdc[id]**2
406 for id in cdp.q_factors_rdc_norm2:
407 cdp.q_rdc_norm2 = cdp.q_rdc_norm2 + cdp.q_factors_rdc_norm2[id]**2
408 cdp.q_rdc = sqrt(cdp.q_rdc / len(cdp.q_factors_rdc))
409 cdp.q_rdc_norm2 = sqrt(cdp.q_rdc_norm2 / len(cdp.q_factors_rdc_norm2))
410
411
412 -def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, neg_g_corr=False):
413 """Read the RDC data from file.
414
415 @keyword align_id: The alignment tensor ID string.
416 @type align_id: str
417 @keyword file: The name of the file to open.
418 @type file: str
419 @keyword dir: The directory containing the file (defaults to the current directory if None).
420 @type dir: str or None
421 @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column.
422 @type file_data: list of lists
423 @keyword data_type: A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D.
424 @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none.
425 @type spin_id_col: int or None
426 @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None.
427 @type mol_name_col: int or None
428 @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None.
429 @type res_name_col: int or None
430 @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None.
431 @type res_num_col: int or None
432 @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None.
433 @type spin_name_col: int or None
434 @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None.
435 @type spin_num_col: int or None
436 @keyword data_col: The column containing the RDC data in Hz.
437 @type data_col: int or None
438 @keyword error_col: The column containing the RDC errors.
439 @type error_col: int or None
440 @keyword sep: The column separator which, if None, defaults to whitespace.
441 @type sep: str or None
442 @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins.
443 @type spin_id: None or str
444 @keyword neg_g_corr: A flag which is used to correct for the negative gyromagnetic ratio of 15N. If True, a sign inversion will be applied to all RDC values to be loaded.
445 @type neg_g_corr: bool
446 """
447
448
449 pipes.test()
450
451
452 if not exists_mol_res_spin_data():
453 raise RelaxNoSequenceError
454
455
456 if data_col == None and error_col == None:
457 raise RelaxError("One of either the data or error column must be supplied.")
458
459
460 if not hasattr(cdp, 'rdc_data_types'):
461 cdp.rdc_data_types = {}
462 if not cdp.rdc_data_types.has_key(align_id):
463 cdp.rdc_data_types[align_id] = data_type
464
465
466
467
468
469 mol_names = []
470 res_nums = []
471 res_names = []
472 spin_nums = []
473 spin_names = []
474 values = []
475 errors = []
476 for data in read_spin_data(file=file, dir=dir, file_data=file_data, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, sep=sep, spin_id=spin_id):
477
478 if data_col and error_col:
479 mol_name, res_num, res_name, spin_num, spin_name, value, error = data
480 elif data_col:
481 mol_name, res_num, res_name, spin_num, spin_name, value = data
482 error = None
483 else:
484 mol_name, res_num, res_name, spin_num, spin_name, error = data
485 value = None
486
487
488 if error == 0.0:
489 raise RelaxError("An invalid error value of zero has been encountered.")
490
491
492 id = generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name)
493 spin = return_spin(id)
494 if spin == None:
495 warn(RelaxNoSpinWarning(id))
496 continue
497
498
499 if data_col:
500
501 if not hasattr(spin, 'rdc'):
502 spin.rdc = {}
503
504
505 value = convert(value, align_id, to_intern=True)
506
507
508 if neg_g_corr and value != None:
509 value = -value
510
511
512 spin.rdc[align_id] = value
513
514
515 if error_col:
516
517 if not hasattr(spin, 'rdc_err'):
518 spin.rdc_err = {}
519
520
521 error = convert(error, align_id, to_intern=True)
522
523
524 spin.rdc_err[align_id] = error
525
526
527 mol_names.append(mol_name)
528 res_nums.append(res_num)
529 res_names.append(res_name)
530 spin_nums.append(spin_num)
531 spin_names.append(spin_name)
532 values.append(value)
533 errors.append(error)
534
535
536 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='RDCs', error=errors, error_name='RDC_error')
537
538
539
540
541
542
543 if not len(values):
544 return
545
546
547 if not hasattr(cdp, 'align_ids'):
548 cdp.align_ids = []
549 if not hasattr(cdp, 'rdc_ids'):
550 cdp.rdc_ids = []
551
552
553 if align_id not in cdp.align_ids:
554 cdp.align_ids.append(align_id)
555 if align_id not in cdp.rdc_ids:
556 cdp.rdc_ids.append(align_id)
557
558
559 -def weight(align_id=None, spin_id=None, weight=1.0):
560 """Set optimisation weights on the RDC data.
561
562 @keyword align_id: The alignment tensor ID string.
563 @type align_id: str
564 @keyword spin_id: The spin ID string.
565 @type spin_id: None or str
566 @keyword weight: The optimisation weight. The higher the value, the more importance the RDC will have.
567 @type weight: float or int.
568 """
569
570
571 if not exists_mol_res_spin_data():
572 raise RelaxNoSequenceError
573
574
575 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids:
576 raise RelaxNoRDCError(align_id)
577
578
579 for spin in spin_loop(spin_id):
580
581 if not hasattr(spin, 'rdc_weight'):
582 spin.rdc_weight = {}
583
584
585 spin.rdc_weight[align_id] = weight
586
587
588 -def write(align_id=None, file=None, dir=None, bc=False, force=False):
589 """Display the RDC data corresponding to the alignment ID.
590
591 @keyword align_id: The alignment tensor ID string.
592 @type align_id: str
593 @keyword file: The file name or object to write to.
594 @type file: str or file object
595 @keyword dir: The name of the directory to place the file into (defaults to the current directory).
596 @type dir: str
597 @keyword bc: The back-calculation flag which if True will cause the back-calculated rather than measured data to be written.
598 @type bc: bool
599 @keyword force: A flag which if True will cause any pre-existing file to be overwritten.
600 @type force: bool
601 """
602
603
604 pipes.test()
605
606
607 if not exists_mol_res_spin_data():
608 raise RelaxNoSequenceError
609
610
611 if not hasattr(cdp, 'rdc_ids') or align_id not in cdp.rdc_ids:
612 raise RelaxNoRDCError(align_id)
613
614
615 file = open_write_file(file, dir, force)
616
617
618 mol_names = []
619 res_nums = []
620 res_names = []
621 spin_nums = []
622 spin_names = []
623 values = []
624 errors = []
625 for spin, mol_name, res_num, res_name in spin_loop(full_info=True):
626
627 if not bc and (not hasattr(spin, 'rdc') or align_id not in spin.rdc.keys()):
628 continue
629 elif bc and (not hasattr(spin, 'rdc_bc') or align_id not in spin.rdc_bc.keys()):
630 continue
631
632
633 mol_names.append(mol_name)
634 res_nums.append(res_num)
635 res_names.append(res_name)
636 spin_nums.append(spin.num)
637 spin_names.append(spin.name)
638
639
640 if bc:
641 values.append(convert(spin.rdc_bc[align_id], align_id))
642 else:
643 values.append(convert(spin.rdc[align_id], align_id))
644
645
646 if hasattr(spin, 'rdc_err') and align_id in spin.rdc_err.keys():
647 errors.append(convert(spin.rdc_err[align_id], align_id))
648 else:
649 errors.append(None)
650
651
652 write_spin_data(file=file, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names, data=values, data_name='RDCs', error=errors, error_name='RDC_error')
653