1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Determination of relative stereochemistry in organic molecules.
24
25 The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given set of structures. The discrimination is performed by comparing the sets of ensembles using NOE violations and RDC Q factors.
26
27 This script is split into multiple stages:
28
29 1. The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 100,000) of M members (NUM_MODELS, usually ~10). The original snapshot files are expected to be named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. "snapshots/R647.pdb". The ensembles will be placed into the "ensembles" directory.
30
31 2. The NOE violation analysis.
32
33 3. The superimposition of ensembles. For each ensemble, Molmol is used for superimposition using the fit to first algorithm. The superimposed ensembles will be placed into the "ensembles_superimposed" directory. This stage is not necessary for the NOE analysis.
34
35 4. The RDC Q factor analysis.
36
37 5. Generation of Grace graphs.
38
39 6. Final ordering of ensembles using the combined RDC and NOE Q factors, whereby the NOE Q factor is defined as::
40
41 Q^2 = U / sum(NOE_i^2),
42
43 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. The denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM. The combined Q is given by::
44
45 Q_total^2 = Q_NOE^2 + Q_RDC^2.
46 """
47
48
49 import dep_check
50
51
52 from math import pi, sqrt
53 from os import F_OK, access, getcwd, sep
54 from random import randint
55 from re import search
56 if dep_check.subprocess_module:
57 from subprocess import PIPE, Popen
58 import sys
59
60
61 from lib.periodic_table import periodic_table
62 from lib.physical_constants import dipolar_constant
63 from lib.plotting.api import write_xy_data, write_xy_header
64 from prompt.interpreter import Interpreter
65 from lib.errors import RelaxError
66 from lib.io import mkdir_nofail
67 from status import Status; status = Status()
68
69
70
72 """Class for performing the relative stereochemistry analysis."""
73
74 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
75 """Set up for the stereochemistry analysis.
76
77 @keyword stage: Stage of analysis (see the module docstring above for the options).
78 @type stage: int
79 @keyword results_dir: The optional directory to place all results files into.
80 @type results_dir: None or str
81 @keyword num_ens: Number of ensembles.
82 @type num_ens: int
83 @keyword num_models: Ensemble size.
84 @type num_models: int
85 @keyword configs: All the configurations.
86 @type configs: list of str
87 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations).
88 @type snapshot_dir: list of str
89 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations).
90 @type snapshot_min: list of int
91 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations).
92 @type snapshot_max: list of int
93 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]].
94 @type pseudo: list of list of str and list of str
95 @keyword noe_file: The name of the NOE restraint file.
96 @type noe_file: str
97 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared).
98 @type noe_norm: float
99 @keyword rdc_name: The label for this RDC data set.
100 @type rdc_name: str
101 @keyword rdc_file: The name of the RDC file.
102 @type rdc_file: str
103 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file.
104 @type rdc_spin_id1_col: None or int
105 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file.
106 @type rdc_spin_id2_col: None or int
107 @keyword rdc_data_col: The data column of the RDC file.
108 @type rdc_data_col: int
109 @keyword rdc_error_col: The error column of the RDC file.
110 @type rdc_error_col: int
111 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument.
112 @type bond_length: float or None
113 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data.
114 @type bond_length_file: float or None
115 @keyword log: Log file output flag (only for certain stages).
116 @type log: bool
117 @keyword bucket_num: Number of buckets for the distribution plots.
118 @type bucket_num: int
119 @keyword lower_lim_noe: Distribution plot limits.
120 @type lower_lim_noe: int
121 @keyword upper_lim_noe: Distribution plot limits.
122 @type upper_lim_noe: int
123 @keyword lower_lim_rdc: Distribution plot limits.
124 @type lower_lim_rdc: int
125 @keyword upper_lim_rdc: Distribution plot limits.
126 @type upper_lim_rdc: int
127 """
128
129
130 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis')
131
132
133 status.init_auto_analysis('stereochem', type='stereochem')
134 status.current_analysis = 'auto stereochem analysis'
135
136
137 self.stage = stage
138 self.results_dir = results_dir
139 self.num_ens = num_ens
140 self.num_models = num_models
141 self.configs = configs
142 self.snapshot_dir = snapshot_dir
143 self.snapshot_min = snapshot_min
144 self.snapshot_max = snapshot_max
145 self.pseudo = pseudo
146 self.noe_file = noe_file
147 self.noe_norm = noe_norm
148 self.rdc_name = rdc_name
149 self.rdc_file = rdc_file
150 self.rdc_spin_id1_col = rdc_spin_id1_col
151 self.rdc_spin_id2_col = rdc_spin_id2_col
152 self.rdc_data_col = rdc_data_col
153 self.rdc_error_col = rdc_error_col
154 self.bond_length = bond_length
155 self.bond_length_file = bond_length_file
156 self.log = log
157 self.bucket_num = bucket_num
158 self.lower_lim_noe = lower_lim_noe
159 self.upper_lim_noe = upper_lim_noe
160 self.lower_lim_rdc = lower_lim_rdc
161 self.upper_lim_rdc = upper_lim_rdc
162
163
164 self.interpreter = Interpreter(show_script=False, raise_relax_error=True)
165 self.interpreter.populate_self()
166 self.interpreter.on(verbose=False)
167
168
169 if self.results_dir:
170 mkdir_nofail(self.results_dir)
171
172
173 else:
174 self.results_dir = getcwd()
175
176
177 if self.log:
178 mkdir_nofail(self.results_dir + sep + "logs")
179
180
181 status.auto_analysis['stereochem'].fin = True
182 status.current_analysis = None
183 status.exec_lock.release()
184
185
187 """Execute the given stage of the analysis."""
188
189
190 self.stdout_orig = sys.stdout
191
192
193 if self.stage == 1:
194 self.sample()
195
196
197 elif self.stage == 2:
198 self.noe_viol()
199
200
201 elif self.stage == 3:
202 self.superimpose()
203
204
205 elif self.stage == 4:
206 self.rdc_analysis()
207
208
209 elif self.stage == 5:
210 self.grace_plots()
211
212
213 elif self.stage == 6:
214 self.combined_q()
215
216
217 else:
218 raise RelaxError("The stage number %s is unknown." % self.stage)
219
220
221 sys.stdout = self.stdout_orig
222
223
225 """Calculate the combined Q factor.
226
227 The combined Q is defined as::
228
229 Q_total^2 = Q_NOE^2 + Q_RDC^2,
230
231 and the NOE Q factor as::
232
233 Q^2 = U / sum(NOE_i^2),
234
235 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.
236 """
237
238
239 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
240 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted")
241 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
242 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted")
243
244
245 for i in range(len(self.configs)):
246
247 print("Creating the combined Q factor file for configuration '%s'." % self.configs[i])
248
249
250 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
251 noe_lines = file.readlines()
252 file.close()
253
254
255 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
256 rdc_lines = file.readlines()
257 file.close()
258
259
260 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w')
261 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w')
262
263
264 data = []
265 for j in range(1, len(noe_lines)):
266
267 ens = int(noe_lines[j].split()[0])
268 noe_viol = float(noe_lines[j].split()[1])
269 q_rdc = float(rdc_lines[j].split()[1])
270
271
272 q_noe = sqrt(noe_viol/self.noe_norm)
273
274
275 q = sqrt(q_noe**2 + q_rdc**2)
276
277
278 out.write("%-20i%20.15f\n" % (ens, q))
279
280
281 data.append([q, ens])
282
283
284 data.sort()
285
286
287 for i in range(len(data)):
288 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0]))
289
290
291 out.close()
292 out_sorted.close()
293
294
296 """Create the distribution data structure."""
297
298
299 bin_width = (upper - lower)/float(inc)
300
301
302 dist = []
303 for i in range(inc):
304 dist.append([bin_width*i+lower, 0])
305
306
307 for val in values:
308
309 bin = int((val - lower)/bin_width)
310
311
312 if bin < 0 or bin >= inc:
313 print("Outside of the limits: '%s'" % val)
314 continue
315
316
317 dist[bin][1] = dist[bin][1] + 1
318
319
320 total_pr = 0.0
321 for i in range(inc):
322 dist[i][1] = dist[i][1] / float(len(values))
323 total_pr = total_pr + dist[i][1]
324
325 print("Total Pr: %s" % total_pr)
326
327
328 return dist
329
330
332 """Generate grace plots of the results."""
333
334
335 n = len(self.configs)
336
337
338 defaults = [4, 2]
339 colours = []
340 for i in range(n):
341
342 if i < len(defaults):
343 colours.append(defaults[i])
344
345
346 else:
347 colours.append(0)
348
349
350 ens_text = ''
351 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1]
352 num_ens = self.num_ens
353 for i in range(len(dividers)):
354
355 num = int(num_ens / dividers[i])
356
357
358 if num:
359 text = repr(num)
360 elif not num and ens_text:
361 text = '000'
362 else:
363 continue
364
365
366 ens_text = ens_text + text
367
368
369 if i < len(dividers)-1:
370 ens_text = ens_text + ','
371
372
373 num_ens = num_ens - dividers[i]*num
374
375
376 subtitle = '%s ensembles of %s' % (ens_text, self.num_models)
377
378
379 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
380
381 print("Generating NOE violation Grace plots.")
382
383
384 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w')
385 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w')
386
387
388 data = []
389 dist = []
390 for i in range(n):
391
392 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted")
393 lines = file.readlines()
394 file.close()
395
396
397 data.append([])
398
399
400 noe_viols = []
401 for j in range(1, len(lines)):
402
403 viol = float(lines[j].split()[1])
404 noe_viols.append(viol)
405
406
407 data[i].append([j, viol])
408
409
410 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe))
411
412
413 write_xy_header(format='grace', file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]])
414 write_xy_header(format='grace', file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]])
415
416
417 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy')
418 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy')
419
420
421 grace_curve.close()
422 grace_dist.close()
423
424
425 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
426
427 print("Generating RDC Q factor Grace plots.")
428
429
430 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w')
431 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w')
432
433
434 data = []
435 dist = []
436 for i in range(n):
437
438 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted")
439 lines = file.readlines()
440 file.close()
441
442
443 data.append([])
444
445
446 values = []
447 for j in range(1, len(lines)):
448
449 value = float(lines[j].split()[1])
450 values.append(value)
451
452
453 data[i].append([j, value])
454
455
456 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc))
457
458
459 write_xy_header(format='grace', file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]])
460 write_xy_header(format='grace', file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]])
461
462
463 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy')
464 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy')
465
466
467 grace_curve.close()
468 grace_dist.close()
469
470
471 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
472
473 print("Generating NOE-RDC correlation Grace plots.")
474
475
476 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w')
477 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w')
478
479
480 data = []
481 data_scaled = []
482 for i in range(len(self.configs)):
483
484 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
485 noe_lines = file.readlines()
486 file.close()
487
488
489 data.append([])
490 data_scaled.append([])
491
492
493 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
494 rdc_lines = file.readlines()
495 file.close()
496
497
498 for j in range(1, len(noe_lines)):
499
500 noe_viol = float(noe_lines[j].split()[1])
501 q_factor = float(rdc_lines[j].split()[1])
502
503
504 data[i].append([noe_viol, q_factor])
505 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor])
506
507
508 write_xy_header(format='grace', file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
509 write_xy_header(format='grace', file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
510 write_xy_data(format='grace', data=[data], file=grace_file, graph_type='xy')
511 write_xy_data(format='grace', data=[data_scaled], file=grace_file_scaled, graph_type='xy')
512
513
515 """NOE violation calculations."""
516
517
518 if self.log:
519 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w')
520
521
522 dir = self.results_dir + sep + "NOE_results"
523 mkdir_nofail(dir=dir)
524
525
526 for config in self.configs:
527
528 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
529
530
531 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w')
532 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w')
533 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
534 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
535
536
537 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state")
538
539
540 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
541
542
543 self.interpreter.structure.load_spins("@H*", ave_pos=False)
544
545
546 for i in range(len(self.pseudo)):
547 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
548 self.interpreter.sequence.display()
549
550
551 self.interpreter.noe.read_restraints(file=self.noe_file)
552
553
554 self.interpreter.n_state_model.select_model(model="fixed")
555
556
557 print("\n"*2 + "# Set up complete #" + "\n"*10)
558
559
560 noe_viol = []
561 for ens in range(self.num_ens):
562
563 if self.log:
564 sys.stdout.write(config + repr(ens) + "\n")
565 sys.stderr.write(config + repr(ens) + "\n")
566
567
568 self.interpreter.structure.delete()
569
570
571 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
572
573
574 self.interpreter.structure.get_pos(ave_pos=False)
575
576
577 self.interpreter.minimise.calculate()
578
579
580 cdp.sum_viol = 0.0
581 for i in range(len(cdp.ave_dist)):
582 if cdp.quad_pot[i][2]:
583 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2]
584
585
586 noe_viol.append([cdp.sum_viol, ens])
587 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol))
588
589
590 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
591
592
593 noe_viol.sort()
594
595
596 for i in range(len(noe_viol)):
597 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
598
599
601 """Perform the RDC part of the analysis."""
602
603
604 if self.log:
605 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w')
606
607
608 d = 0.0
609 if self.bond_length != None:
610 d = 3.0 / (2.0*pi) * dipolar_constant(periodic_table.gyromagnetic_ratio('13C'), periodic_table.gyromagnetic_ratio('1H'), self.bond_length)
611
612
613 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name
614 mkdir_nofail(dir=dir)
615
616
617 for config in self.configs:
618
619 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
620
621
622 out = open(self.results_dir+sep+"Q_factors_" + config, 'w')
623 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w')
624 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)"))
625 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)"))
626
627
628 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state")
629
630
631 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
632
633
634 self.interpreter.structure.load_spins(ave_pos=False)
635
636
637 for i in range(len(self.pseudo)):
638 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
639 self.interpreter.sequence.display()
640
641
642 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col)
643
644
645 if self.bond_length != None:
646 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
647 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length)
648 else:
649 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
650
651
652 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*')
653 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*')
654 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*')
655
656
657 self.interpreter.n_state_model.select_model(model="fixed")
658
659
660 print("\n"*2 + "# Set up complete #" + "\n"*10)
661
662
663 q_factors = []
664 for ens in range(self.num_ens):
665
666 if self.log:
667 sys.stdout.write(config + repr(ens) + "\n")
668 sys.stderr.write(config + repr(ens) + "\n")
669
670
671 self.interpreter.structure.delete()
672
673
674 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
675
676
677 self.interpreter.structure.get_pos(ave_pos=False)
678 if self.bond_length != None:
679 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
680 else:
681 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
682 self.interpreter.interatom.unit_vectors(ave=False)
683
684
685
686 self.interpreter.minimise.execute("simplex", constraints=False)
687
688
689 q_factors.append([cdp.q_rdc_norm_squared_sum, ens])
690 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc_norm_squared_sum, cdp.q_rdc_norm_squared_sum))
691
692
693 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A
694 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D
695
696
697 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
698
699
700 q_factors.sort()
701
702
703 for i in range(len(q_factors)):
704 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
705
706
708 """Generate the ensembles by random sampling of the snapshots."""
709
710
711 mkdir_nofail(dir=self.results_dir + sep + "ensembles")
712
713
714 for conf_index in range(len(self.configs)):
715
716 for ens in range(self.num_ens):
717
718 rand = []
719 for j in range(self.num_models):
720 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index]))
721
722
723 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand))
724
725
726 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb"
727
728
729 out = open(self.results_dir+sep+file_name, 'w')
730
731
732 out.write("REM Structures: " + repr(rand) + "\n")
733
734
735 for j in range(self.num_models):
736
737 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb"
738
739
740 out.write(open(rand_name).read())
741
742
743 out.close()
744
745
747 """Superimpose the ensembles using fit to first in Molmol."""
748
749
750 mkdir_nofail("ensembles_superimposed")
751
752
753 if self.log:
754 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w')
755 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w')
756
757
758 for config in ["R", "S"]:
759
760 for ens in range(self.num_ens):
761
762 file_in = "ensembles" + sep + config + repr(ens) + ".pdb"
763 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb"
764
765
766 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
767 if self.log:
768 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
769
770
771 if access(self.results_dir+sep+file_out, F_OK):
772 continue
773
774
775 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False)
776
777
778 pipe.stdin.write("InitAll yes\n")
779
780
781 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n")
782
783
784 pipe.stdin.write("Fit to_first 'selected'\n")
785 pipe.stdin.write("Fit to_mean 'selected'\n")
786
787
788 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n")
789
790
791 pipe.stdin.close()
792
793
794 sys.stdout.write(pipe.stdout.read())
795 if self.log:
796 log.write(pipe.stderr.read())
797
798
799 pipe.stdout.close()
800 pipe.stderr.close()
801
802
803 self.interpreter.reset()
804 self.interpreter.pipe.create('out', 'N-state')
805 self.interpreter.structure.read_pdb(file_out)
806
807
808 for model in cdp.structure.structural_data:
809
810 mol = model.mol[0]
811
812
813 for i in range(len(mol.atom_name)):
814
815 if search('H', mol.atom_name[i]):
816 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0]
817
818
819 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
820