1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Determination of relative stereochemistry in organic molecules.
24
25 The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given
26 set of structures. The discrimination is performed by comparing the sets of ensembles using NOE
27 violations and RDC Q-factors.
28
29 This script is split into multiple stages:
30
31 1. The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to
32 100,000) of M members (NUM_MODELS, usually ~10). The original snapshot files are expected to be
33 named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g.
34 "snapshots/R647.pdb". The ensembles will be placed into the "ensembles" directory.
35
36 2. The NOE violation analysis.
37
38 3. The superimposition of ensembles. For each ensemble, Molmol is used for superimposition
39 using the fit to first algorithm. The superimposed ensembles will be placed into the
40 "ensembles_superimposed" directory. This stage is not necessary for the NOE analysis.
41
42 4. The RDC Q-factor analysis.
43
44 5. Generation of Grace graphs.
45
46 6. Final ordering of ensembles using the combined RDC and NOE Q-factors, whereby the NOE
47 Q-factor is defined as::
48
49 Q^2 = U / sum(NOE_i^2),
50
51 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. The
52 denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM. The
53 combined Q is given by::
54
55 Q_total^2 = Q_NOE^2 + Q_RDC^2.
56 """
57
58
59 import dep_check
60
61
62 from math import pi, sqrt
63 from os import F_OK, access, getcwd, sep
64 from random import randint
65 from re import search
66 if dep_check.subprocess_module:
67 from subprocess import PIPE, Popen
68 import sys
69
70
71 from pipe_control.grace import write_xy_data, write_xy_header
72 from lib.physical_constants import dipolar_constant, g1H, g13C
73 from prompt.interpreter import Interpreter
74 from lib.errors import RelaxError
75 from lib.io import mkdir_nofail
76 from status import Status; status = Status()
77
78
79
81 """Class for performing the relative stereochemistry analysis."""
82
83 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
84 """Set up for the stereochemistry analysis.
85
86 @keyword stage: Stage of analysis (see the module docstring above for the options).
87 @type stage: int
88 @keyword results_dir: The optional directory to place all results files into.
89 @type results_dir: None or str
90 @keyword num_ens: Number of ensembles.
91 @type num_ens: int
92 @keyword num_models: Ensemble size.
93 @type num_models: int
94 @keyword configs: All the configurations.
95 @type configs: list of str
96 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations).
97 @type snapshot_dir: list of str
98 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations).
99 @type snapshot_min: list of int
100 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations).
101 @type snapshot_max: list of int
102 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]].
103 @type pseudo: list of list of str and list of str
104 @keyword noe_file: The name of the NOE restraint file.
105 @type noe_file: str
106 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared).
107 @type noe_norm: float
108 @keyword rdc_name: The label for this RDC data set.
109 @type rdc_name: str
110 @keyword rdc_file: The name of the RDC file.
111 @type rdc_file: str
112 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file.
113 @type rdc_spin_id1_col: None or int
114 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file.
115 @type rdc_spin_id2_col: None or int
116 @keyword rdc_data_col: The data column of the RDC file.
117 @type rdc_data_col: int
118 @keyword rdc_error_col: The error column of the RDC file.
119 @type rdc_error_col: int
120 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument.
121 @type bond_length: float or None
122 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data.
123 @type bond_length_file: float or None
124 @keyword log: Log file output flag (only for certain stages).
125 @type log: bool
126 @keyword bucket_num: Number of buckets for the distribution plots.
127 @type bucket_num: int
128 @keyword lower_lim_noe: Distribution plot limits.
129 @type lower_lim_noe: int
130 @keyword upper_lim_noe: Distribution plot limits.
131 @type upper_lim_noe: int
132 @keyword lower_lim_rdc: Distribution plot limits.
133 @type lower_lim_rdc: int
134 @keyword upper_lim_rdc: Distribution plot limits.
135 @type upper_lim_rdc: int
136 """
137
138
139 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis')
140
141
142 status.init_auto_analysis('stereochem', type='stereochem')
143 status.current_analysis = 'auto stereochem analysis'
144
145
146 self.stage = stage
147 self.results_dir = results_dir
148 self.num_ens = num_ens
149 self.num_models = num_models
150 self.configs = configs
151 self.snapshot_dir = snapshot_dir
152 self.snapshot_min = snapshot_min
153 self.snapshot_max = snapshot_max
154 self.pseudo = pseudo
155 self.noe_file = noe_file
156 self.noe_norm = noe_norm
157 self.rdc_name = rdc_name
158 self.rdc_file = rdc_file
159 self.rdc_spin_id1_col = rdc_spin_id1_col
160 self.rdc_spin_id2_col = rdc_spin_id2_col
161 self.rdc_data_col = rdc_data_col
162 self.rdc_error_col = rdc_error_col
163 self.bond_length = bond_length
164 self.bond_length_file = bond_length_file
165 self.log = log
166 self.bucket_num = bucket_num
167 self.lower_lim_noe = lower_lim_noe
168 self.upper_lim_noe = upper_lim_noe
169 self.lower_lim_rdc = lower_lim_rdc
170 self.upper_lim_rdc = upper_lim_rdc
171
172
173 self.interpreter = Interpreter(show_script=False, raise_relax_error=True)
174 self.interpreter.populate_self()
175 self.interpreter.on(verbose=False)
176
177
178 if self.results_dir:
179 mkdir_nofail(self.results_dir)
180
181
182 else:
183 self.results_dir = getcwd()
184
185
186 if self.log:
187 mkdir_nofail(self.results_dir + sep + "logs")
188
189
190 status.auto_analysis['stereochem'].fin = True
191 status.current_analysis = None
192 status.exec_lock.release()
193
194
196 """Execute the given stage of the analysis."""
197
198
199 self.stdout_orig = sys.stdout
200
201
202 if self.stage == 1:
203 self.sample()
204
205
206 elif self.stage == 2:
207 self.noe_viol()
208
209
210 elif self.stage == 3:
211 self.superimpose()
212
213
214 elif self.stage == 4:
215 self.rdc_analysis()
216
217
218 elif self.stage == 5:
219 self.grace_plots()
220
221
222 elif self.stage == 6:
223 self.combined_q()
224
225
226 else:
227 raise RelaxError("The stage number %s is unknown." % self.stage)
228
229
230 sys.stdout = self.stdout_orig
231
232
234 """Calculate the combined Q-factor.
235
236 The combined Q is defined as::
237
238 Q_total^2 = Q_NOE^2 + Q_RDC^2,
239
240 and the NOE Q-factor as::
241
242 Q^2 = U / sum(NOE_i^2),
243
244 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.
245 """
246
247
248 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
249 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted")
250 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
251 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted")
252
253
254 for i in range(len(self.configs)):
255
256 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i])
257
258
259 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
260 noe_lines = file.readlines()
261 file.close()
262
263
264 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
265 rdc_lines = file.readlines()
266 file.close()
267
268
269 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w')
270 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w')
271
272
273 data = []
274 for j in range(1, len(noe_lines)):
275
276 ens = int(noe_lines[j].split()[0])
277 noe_viol = float(noe_lines[j].split()[1])
278 q_rdc = float(rdc_lines[j].split()[1])
279
280
281 q_noe = sqrt(noe_viol/self.noe_norm)
282
283
284 q = sqrt(q_noe**2 + q_rdc**2)
285
286
287 out.write("%-20i%20.15f\n" % (ens, q))
288
289
290 data.append([q, ens])
291
292
293 data.sort()
294
295
296 for i in range(len(data)):
297 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0]))
298
299
300 out.close()
301 out_sorted.close()
302
303
305 """Create the distribution data structure."""
306
307
308 bin_width = (upper - lower)/float(inc)
309
310
311 dist = []
312 for i in range(inc):
313 dist.append([bin_width*i+lower, 0])
314
315
316 for val in values:
317
318 bin = int((val - lower)/bin_width)
319
320
321 if bin < 0 or bin >= inc:
322 print("Outside of the limits: '%s'" % val)
323 continue
324
325
326 dist[bin][1] = dist[bin][1] + 1
327
328
329 total_pr = 0.0
330 for i in range(inc):
331 dist[i][1] = dist[i][1] / float(len(values))
332 total_pr = total_pr + dist[i][1]
333
334 print("Total Pr: %s" % total_pr)
335
336
337 return dist
338
339
341 """Generate grace plots of the results."""
342
343
344 n = len(self.configs)
345
346
347 defaults = [4, 2]
348 colours = []
349 for i in range(n):
350
351 if i < len(defaults):
352 colours.append(defaults[i])
353
354
355 else:
356 colours.append(0)
357
358
359 ens_text = ''
360 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1]
361 num_ens = self.num_ens
362 for i in range(len(dividers)):
363
364 num = int(num_ens / dividers[i])
365
366
367 if num:
368 text = repr(num)
369 elif not num and ens_text:
370 text = '000'
371 else:
372 continue
373
374
375 ens_text = ens_text + text
376
377
378 if i < len(dividers)-1:
379 ens_text = ens_text + ','
380
381
382 num_ens = num_ens - dividers[i]*num
383
384
385 subtitle = '%s ensembles of %s' % (ens_text, self.num_models)
386
387
388 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
389
390 print("Generating NOE violation Grace plots.")
391
392
393 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w')
394 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w')
395
396
397 data = []
398 dist = []
399 for i in range(n):
400
401 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted")
402 lines = file.readlines()
403 file.close()
404
405
406 data.append([])
407
408
409 noe_viols = []
410 for j in range(1, len(lines)):
411
412 viol = float(lines[j].split()[1])
413 noe_viols.append(viol)
414
415
416 data[i].append([j, viol])
417
418
419 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe))
420
421
422 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]])
423 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]])
424
425
426 write_xy_data([data], file=grace_curve, graph_type='xy')
427 write_xy_data([dist], file=grace_dist, graph_type='xy')
428
429
430 grace_curve.close()
431 grace_dist.close()
432
433
434 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
435
436 print("Generating RDC Q-factor Grace plots.")
437
438
439 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w')
440 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w')
441
442
443 data = []
444 dist = []
445 for i in range(n):
446
447 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted")
448 lines = file.readlines()
449 file.close()
450
451
452 data.append([])
453
454
455 values = []
456 for j in range(1, len(lines)):
457
458 value = float(lines[j].split()[1])
459 values.append(value)
460
461
462 data[i].append([j, value])
463
464
465 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc))
466
467
468 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]])
469 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]])
470
471
472 write_xy_data([data], file=grace_curve, graph_type='xy')
473 write_xy_data([dist], file=grace_dist, graph_type='xy')
474
475
476 grace_curve.close()
477 grace_dist.close()
478
479
480 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
481
482 print("Generating NOE-RDC correlation Grace plots.")
483
484
485 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w')
486 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w')
487
488
489 data = []
490 data_scaled = []
491 for i in range(len(self.configs)):
492
493 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
494 noe_lines = file.readlines()
495 file.close()
496
497
498 data.append([])
499 data_scaled.append([])
500
501
502 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
503 rdc_lines = file.readlines()
504 file.close()
505
506
507 for j in range(1, len(noe_lines)):
508
509 noe_viol = float(noe_lines[j].split()[1])
510 q_factor = float(rdc_lines[j].split()[1])
511
512
513 data[i].append([noe_viol, q_factor])
514 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor])
515
516
517 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
518 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
519 write_xy_data([data], file=grace_file, graph_type='xy')
520 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
521
522
524 """NOE violation calculations."""
525
526
527 if self.log:
528 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w')
529
530
531 dir = self.results_dir + sep + "NOE_results"
532 mkdir_nofail(dir=dir)
533
534
535 for config in self.configs:
536
537 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
538
539
540 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w')
541 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w')
542 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
543 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
544
545
546 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state")
547
548
549 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
550
551
552 self.interpreter.structure.load_spins("@H*", ave_pos=False)
553
554
555 for i in range(len(self.pseudo)):
556 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
557 self.interpreter.sequence.display()
558
559
560 self.interpreter.noe.read_restraints(file=self.noe_file)
561
562
563 self.interpreter.n_state_model.select_model(model="fixed")
564
565
566 print("\n"*2 + "# Set up complete #" + "\n"*10)
567
568
569 noe_viol = []
570 for ens in range(self.num_ens):
571
572 if self.log:
573 sys.stdout.write(config + repr(ens) + "\n")
574 sys.stderr.write(config + repr(ens) + "\n")
575
576
577 self.interpreter.structure.delete()
578
579
580 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
581
582
583 self.interpreter.structure.get_pos(ave_pos=False)
584
585
586 self.interpreter.calc()
587
588
589 cdp.sum_viol = 0.0
590 for i in range(len(cdp.ave_dist)):
591 if cdp.quad_pot[i][2]:
592 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2]
593
594
595 noe_viol.append([cdp.sum_viol, ens])
596 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol))
597
598
599 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
600
601
602 noe_viol.sort()
603
604
605 for i in range(len(noe_viol)):
606 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
607
608
610 """Perform the RDC part of the analysis."""
611
612
613 if self.log:
614 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w')
615
616
617 d = 0.0
618 if self.bond_length != None:
619 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length)
620
621
622 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name
623 mkdir_nofail(dir=dir)
624
625
626 for config in self.configs:
627
628 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
629
630
631 out = open(self.results_dir+sep+"Q_factors_" + config, 'w')
632 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w')
633 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)"))
634 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)"))
635
636
637 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state")
638
639
640 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
641
642
643 self.interpreter.structure.load_spins(ave_pos=False)
644
645
646 for i in range(len(self.pseudo)):
647 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
648 self.interpreter.sequence.display()
649
650
651 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col)
652
653
654 if self.bond_length != None:
655 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
656 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length)
657 else:
658 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
659
660
661 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*')
662 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*')
663 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*')
664
665
666 self.interpreter.n_state_model.select_model(model="fixed")
667
668
669 print("\n"*2 + "# Set up complete #" + "\n"*10)
670
671
672 q_factors = []
673 for ens in range(self.num_ens):
674
675 if self.log:
676 sys.stdout.write(config + repr(ens) + "\n")
677 sys.stderr.write(config + repr(ens) + "\n")
678
679
680 self.interpreter.structure.delete()
681
682
683 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
684
685
686 self.interpreter.structure.get_pos(ave_pos=False)
687 if self.bond_length != None:
688 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
689 else:
690 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
691 self.interpreter.interatom.unit_vectors(ave=False)
692
693
694
695 self.interpreter.minimise("simplex", constraints=False)
696
697
698 q_factors.append([cdp.q_rdc, ens])
699 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2))
700
701
702 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A
703 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D
704
705
706 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
707
708
709 q_factors.sort()
710
711
712 for i in range(len(q_factors)):
713 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
714
715
717 """Generate the ensembles by random sampling of the snapshots."""
718
719
720 mkdir_nofail(dir=self.results_dir + sep + "ensembles")
721
722
723 for conf_index in range(len(self.configs)):
724
725 for ens in range(self.num_ens):
726
727 rand = []
728 for j in range(self.num_models):
729 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index]))
730
731
732 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand))
733
734
735 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb"
736
737
738 out = open(self.results_dir+sep+file_name, 'w')
739
740
741 out.write("REM Structures: " + repr(rand) + "\n")
742
743
744 for j in range(self.num_models):
745
746 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb"
747
748
749 out.write(open(rand_name).read())
750
751
752 out.close()
753
754
756 """Superimpose the ensembles using fit to first in Molmol."""
757
758
759 mkdir_nofail("ensembles_superimposed")
760
761
762 if self.log:
763 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w')
764 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w')
765
766
767 for config in ["R", "S"]:
768
769 for ens in range(self.num_ens):
770
771 file_in = "ensembles" + sep + config + repr(ens) + ".pdb"
772 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb"
773
774
775 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
776 if self.log:
777 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
778
779
780 if access(self.results_dir+sep+file_out, F_OK):
781 continue
782
783
784 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False)
785
786
787 pipe.stdin.write("InitAll yes\n")
788
789
790 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n")
791
792
793 pipe.stdin.write("Fit to_first 'selected'\n")
794 pipe.stdin.write("Fit to_mean 'selected'\n")
795
796
797 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n")
798
799
800 pipe.stdin.close()
801
802
803 sys.stdout.write(pipe.stdout.read())
804 if self.log:
805 log.write(pipe.stderr.read())
806
807
808 pipe.stdout.close()
809 pipe.stderr.close()
810
811
812 self.interpreter.reset()
813 self.interpreter.pipe.create('out', 'N-state')
814 self.interpreter.structure.read_pdb(file_out)
815
816
817 for model in cdp.structure.structural_data:
818
819 mol = model.mol[0]
820
821
822 for i in range(len(mol.atom_name)):
823
824 if search('H', mol.atom_name[i]):
825 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0]
826
827
828 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
829