1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Determination of relative stereochemistry in organic molecules.
24
25 The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given set of structures. The discrimination is performed by comparing the sets of ensembles using NOE violations and RDC Q factors.
26
27 This script is split into multiple stages:
28
29 1. The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 100,000) of M members (NUM_MODELS, usually ~10). The original snapshot files are expected to be named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. "snapshots/R647.pdb". The ensembles will be placed into the "ensembles" directory.
30
31 2. The NOE violation analysis.
32
33 3. The superimposition of ensembles. For each ensemble, Molmol is used for superimposition using the fit to first algorithm. The superimposed ensembles will be placed into the "ensembles_superimposed" directory. This stage is not necessary for the NOE analysis.
34
35 4. The RDC Q factor analysis.
36
37 5. Generation of Grace graphs.
38
39 6. Final ordering of ensembles using the combined RDC and NOE Q factors, whereby the NOE Q factor is defined as::
40
41 Q^2 = U / sum(NOE_i^2),
42
43 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. The denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM. The combined Q is given by::
44
45 Q_total^2 = Q_NOE^2 + Q_RDC^2.
46 """
47
48
49 import dep_check
50
51
52 from math import pi, sqrt
53 from os import F_OK, access, getcwd, sep
54 from random import randint
55 from re import search
56 if dep_check.subprocess_module:
57 from subprocess import PIPE, Popen
58 import sys
59 from time import time
60
61
62 from lib.periodic_table import periodic_table
63 from lib.physical_constants import dipolar_constant
64 from lib.plotting.api import write_xy_data, write_xy_header
65 from lib.text.sectioning import title
66 from lib.timing import print_elapsed_time
67 from prompt.interpreter import Interpreter
68 from lib.errors import RelaxError
69 from lib.io import mkdir_nofail
70 from status import Status; status = Status()
71
72
73
75 """Class for performing the relative stereochemistry analysis."""
76
77 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
78 """Set up for the stereochemistry analysis.
79
80 @keyword stage: Stage of analysis (see the module docstring above for the options).
81 @type stage: int
82 @keyword results_dir: The optional directory to place all results files into.
83 @type results_dir: None or str
84 @keyword num_ens: Number of ensembles.
85 @type num_ens: int
86 @keyword num_models: Ensemble size.
87 @type num_models: int
88 @keyword configs: All the configurations.
89 @type configs: list of str
90 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations).
91 @type snapshot_dir: list of str
92 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations).
93 @type snapshot_min: list of int
94 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations).
95 @type snapshot_max: list of int
96 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]].
97 @type pseudo: list of list of str and list of str
98 @keyword noe_file: The name of the NOE restraint file.
99 @type noe_file: str
100 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared).
101 @type noe_norm: float
102 @keyword rdc_name: The label for this RDC data set.
103 @type rdc_name: str
104 @keyword rdc_file: The name of the RDC file.
105 @type rdc_file: str
106 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file.
107 @type rdc_spin_id1_col: None or int
108 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file.
109 @type rdc_spin_id2_col: None or int
110 @keyword rdc_data_col: The data column of the RDC file.
111 @type rdc_data_col: int
112 @keyword rdc_error_col: The error column of the RDC file.
113 @type rdc_error_col: int
114 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument.
115 @type bond_length: float or None
116 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data.
117 @type bond_length_file: float or None
118 @keyword log: Log file output flag (only for certain stages).
119 @type log: bool
120 @keyword bucket_num: Number of buckets for the distribution plots.
121 @type bucket_num: int
122 @keyword lower_lim_noe: Distribution plot limits.
123 @type lower_lim_noe: int
124 @keyword upper_lim_noe: Distribution plot limits.
125 @type upper_lim_noe: int
126 @keyword lower_lim_rdc: Distribution plot limits.
127 @type lower_lim_rdc: int
128 @keyword upper_lim_rdc: Distribution plot limits.
129 @type upper_lim_rdc: int
130 """
131
132
133 title(file=sys.stdout, text="Stereochemistry auto-analysis")
134
135
136 try:
137
138 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis')
139
140
141 status.init_auto_analysis('stereochem', type='stereochem')
142 status.current_analysis = 'auto stereochem analysis'
143
144
145 self.stage = stage
146 self.results_dir = results_dir
147 self.num_ens = num_ens
148 self.num_models = num_models
149 self.configs = configs
150 self.snapshot_dir = snapshot_dir
151 self.snapshot_min = snapshot_min
152 self.snapshot_max = snapshot_max
153 self.pseudo = pseudo
154 self.noe_file = noe_file
155 self.noe_norm = noe_norm
156 self.rdc_name = rdc_name
157 self.rdc_file = rdc_file
158 self.rdc_spin_id1_col = rdc_spin_id1_col
159 self.rdc_spin_id2_col = rdc_spin_id2_col
160 self.rdc_data_col = rdc_data_col
161 self.rdc_error_col = rdc_error_col
162 self.bond_length = bond_length
163 self.bond_length_file = bond_length_file
164 self.log = log
165 self.bucket_num = bucket_num
166 self.lower_lim_noe = lower_lim_noe
167 self.upper_lim_noe = upper_lim_noe
168 self.lower_lim_rdc = lower_lim_rdc
169 self.upper_lim_rdc = upper_lim_rdc
170
171
172 self.interpreter = Interpreter(show_script=False, raise_relax_error=True)
173 self.interpreter.populate_self()
174 self.interpreter.on(verbose=False)
175
176
177 if self.results_dir:
178 mkdir_nofail(self.results_dir)
179
180
181 else:
182 self.results_dir = getcwd()
183
184
185 if self.log:
186 mkdir_nofail(self.results_dir + sep + "logs")
187
188
189 finally:
190
191 title(file=sys.stdout, text="Completion of the stereochemistry auto-analysis")
192 print_elapsed_time(time() - status.start_time)
193
194
195 status.auto_analysis['stereochem'].fin = True
196 status.current_analysis = None
197 status.exec_lock.release()
198
199
201 """Execute the given stage of the analysis."""
202
203
204 self.stdout_orig = sys.stdout
205
206
207 if self.stage == 1:
208 self.sample()
209
210
211 elif self.stage == 2:
212 self.noe_viol()
213
214
215 elif self.stage == 3:
216 self.superimpose()
217
218
219 elif self.stage == 4:
220 self.rdc_analysis()
221
222
223 elif self.stage == 5:
224 self.grace_plots()
225
226
227 elif self.stage == 6:
228 self.combined_q()
229
230
231 else:
232 raise RelaxError("The stage number %s is unknown." % self.stage)
233
234
235 sys.stdout = self.stdout_orig
236
237
239 """Calculate the combined Q factor.
240
241 The combined Q is defined as::
242
243 Q_total^2 = Q_NOE^2 + Q_RDC^2,
244
245 and the NOE Q factor as::
246
247 Q^2 = U / sum(NOE_i^2),
248
249 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.
250 """
251
252
253 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
254 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted")
255 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
256 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted")
257
258
259 for i in range(len(self.configs)):
260
261 print("Creating the combined Q factor file for configuration '%s'." % self.configs[i])
262
263
264 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
265 noe_lines = file.readlines()
266 file.close()
267
268
269 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
270 rdc_lines = file.readlines()
271 file.close()
272
273
274 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w')
275 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w')
276
277
278 data = []
279 for j in range(1, len(noe_lines)):
280
281 ens = int(noe_lines[j].split()[0])
282 noe_viol = float(noe_lines[j].split()[1])
283 q_rdc = float(rdc_lines[j].split()[1])
284
285
286 q_noe = sqrt(noe_viol/self.noe_norm)
287
288
289 q = sqrt(q_noe**2 + q_rdc**2)
290
291
292 out.write("%-20i%20.15f\n" % (ens, q))
293
294
295 data.append([q, ens])
296
297
298 data.sort()
299
300
301 for i in range(len(data)):
302 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0]))
303
304
305 out.close()
306 out_sorted.close()
307
308
310 """Create the distribution data structure."""
311
312
313 bin_width = (upper - lower)/float(inc)
314
315
316 dist = []
317 for i in range(inc):
318 dist.append([bin_width*i+lower, 0])
319
320
321 for val in values:
322
323 bin = int((val - lower)/bin_width)
324
325
326 if bin < 0 or bin >= inc:
327 print("Outside of the limits: '%s'" % val)
328 continue
329
330
331 dist[bin][1] = dist[bin][1] + 1
332
333
334 total_pr = 0.0
335 for i in range(inc):
336 dist[i][1] = dist[i][1] / float(len(values))
337 total_pr = total_pr + dist[i][1]
338
339 print("Total Pr: %s" % total_pr)
340
341
342 return dist
343
344
346 """Generate grace plots of the results."""
347
348
349 n = len(self.configs)
350
351
352 defaults = [4, 2]
353 colours = []
354 for i in range(n):
355
356 if i < len(defaults):
357 colours.append(defaults[i])
358
359
360 else:
361 colours.append(0)
362
363
364 ens_text = ''
365 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1]
366 num_ens = self.num_ens
367 for i in range(len(dividers)):
368
369 num = int(num_ens / dividers[i])
370
371
372 if num:
373 text = repr(num)
374 elif not num and ens_text:
375 text = '000'
376 else:
377 continue
378
379
380 ens_text = ens_text + text
381
382
383 if i < len(dividers)-1:
384 ens_text = ens_text + ','
385
386
387 num_ens = num_ens - dividers[i]*num
388
389
390 subtitle = '%s ensembles of %s' % (ens_text, self.num_models)
391
392
393 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
394
395 print("Generating NOE violation Grace plots.")
396
397
398 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w')
399 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w')
400
401
402 data = []
403 dist = []
404 for i in range(n):
405
406 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted")
407 lines = file.readlines()
408 file.close()
409
410
411 data.append([])
412
413
414 noe_viols = []
415 for j in range(1, len(lines)):
416
417 viol = float(lines[j].split()[1])
418 noe_viols.append(viol)
419
420
421 data[i].append([j, viol])
422
423
424 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe))
425
426
427 write_xy_header(format='grace', file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]])
428 write_xy_header(format='grace', file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]])
429
430
431 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy')
432 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy')
433
434
435 grace_curve.close()
436 grace_dist.close()
437
438
439 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
440
441 print("Generating RDC Q factor Grace plots.")
442
443
444 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w')
445 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w')
446
447
448 data = []
449 dist = []
450 for i in range(n):
451
452 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted")
453 lines = file.readlines()
454 file.close()
455
456
457 data.append([])
458
459
460 values = []
461 for j in range(1, len(lines)):
462
463 value = float(lines[j].split()[1])
464 values.append(value)
465
466
467 data[i].append([j, value])
468
469
470 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc))
471
472
473 write_xy_header(format='grace', file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]])
474 write_xy_header(format='grace', file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]])
475
476
477 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy')
478 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy')
479
480
481 grace_curve.close()
482 grace_dist.close()
483
484
485 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
486
487 print("Generating NOE-RDC correlation Grace plots.")
488
489
490 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w')
491 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w')
492
493
494 data = []
495 data_scaled = []
496 for i in range(len(self.configs)):
497
498 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
499 noe_lines = file.readlines()
500 file.close()
501
502
503 data.append([])
504 data_scaled.append([])
505
506
507 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
508 rdc_lines = file.readlines()
509 file.close()
510
511
512 for j in range(1, len(noe_lines)):
513
514 noe_viol = float(noe_lines[j].split()[1])
515 q_factor = float(rdc_lines[j].split()[1])
516
517
518 data[i].append([noe_viol, q_factor])
519 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor])
520
521
522 write_xy_header(format='grace', file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
523 write_xy_header(format='grace', file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]])
524 write_xy_data(format='grace', data=[data], file=grace_file, graph_type='xy')
525 write_xy_data(format='grace', data=[data_scaled], file=grace_file_scaled, graph_type='xy')
526
527
529 """NOE violation calculations."""
530
531
532 if self.log:
533 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w')
534
535
536 dir = self.results_dir + sep + "NOE_results"
537 mkdir_nofail(dir=dir)
538
539
540 for config in self.configs:
541
542 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
543
544
545 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w')
546 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w')
547 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
548 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
549
550
551 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state")
552
553
554 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
555
556
557 self.interpreter.structure.load_spins("@H*", ave_pos=False)
558
559
560 for i in range(len(self.pseudo)):
561 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
562 self.interpreter.sequence.display()
563
564
565 self.interpreter.noe.read_restraints(file=self.noe_file)
566
567
568 self.interpreter.n_state_model.select_model(model="fixed")
569
570
571 print("\n"*2 + "# Set up complete #" + "\n"*10)
572
573
574 noe_viol = []
575 for ens in range(self.num_ens):
576
577 if self.log:
578 sys.stdout.write(config + repr(ens) + "\n")
579 sys.stderr.write(config + repr(ens) + "\n")
580
581
582 self.interpreter.structure.delete()
583
584
585 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
586
587
588 self.interpreter.structure.get_pos(ave_pos=False)
589
590
591 self.interpreter.minimise.calculate()
592
593
594 cdp.sum_viol = 0.0
595 for i in range(len(cdp.ave_dist)):
596 if cdp.quad_pot[i][2]:
597 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2]
598
599
600 noe_viol.append([cdp.sum_viol, ens])
601 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol))
602
603
604 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
605
606
607 noe_viol.sort()
608
609
610 for i in range(len(noe_viol)):
611 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
612
613
615 """Perform the RDC part of the analysis."""
616
617
618 if self.log:
619 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w')
620
621
622 d = 0.0
623 if self.bond_length != None:
624 d = 3.0 / (2.0*pi) * dipolar_constant(periodic_table.gyromagnetic_ratio('13C'), periodic_table.gyromagnetic_ratio('1H'), self.bond_length)
625
626
627 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name
628 mkdir_nofail(dir=dir)
629
630
631 for config in self.configs:
632
633 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
634
635
636 out = open(self.results_dir+sep+"Q_factors_" + config, 'w')
637 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w')
638 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)"))
639 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)"))
640
641
642 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state")
643
644
645 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
646
647
648 self.interpreter.structure.load_spins(ave_pos=False)
649
650
651 for i in range(len(self.pseudo)):
652 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
653 self.interpreter.sequence.display()
654
655
656 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col)
657
658
659 if self.bond_length != None:
660 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
661 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length)
662 else:
663 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
664
665
666 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*')
667 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*')
668 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*')
669
670
671 self.interpreter.n_state_model.select_model(model="fixed")
672
673
674 print("\n"*2 + "# Set up complete #" + "\n"*10)
675
676
677 q_factors = []
678 for ens in range(self.num_ens):
679
680 if self.log:
681 sys.stdout.write(config + repr(ens) + "\n")
682 sys.stderr.write(config + repr(ens) + "\n")
683
684
685 self.interpreter.structure.delete()
686
687
688 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)))
689
690
691 self.interpreter.structure.get_pos(ave_pos=False)
692 if self.bond_length != None:
693 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length)
694 else:
695 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3)
696 self.interpreter.interatom.unit_vectors(ave=False)
697
698
699
700 self.interpreter.minimise.execute("simplex", constraints=False)
701
702
703 q_factors.append([cdp.q_rdc_norm_squared_sum, ens])
704 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc_norm_squared_sum, cdp.q_rdc_norm_squared_sum))
705
706
707 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A
708 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D
709
710
711 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
712
713
714 q_factors.sort()
715
716
717 for i in range(len(q_factors)):
718 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
719
720
722 """Generate the ensembles by random sampling of the snapshots."""
723
724
725 mkdir_nofail(dir=self.results_dir + sep + "ensembles")
726
727
728 for conf_index in range(len(self.configs)):
729
730 for ens in range(self.num_ens):
731
732 rand = []
733 for j in range(self.num_models):
734 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index]))
735
736
737 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand))
738
739
740 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb"
741
742
743 out = open(self.results_dir+sep+file_name, 'w')
744
745
746 out.write("REM Structures: " + repr(rand) + "\n")
747
748
749 for j in range(self.num_models):
750
751 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb"
752
753
754 out.write(open(rand_name).read())
755
756
757 out.close()
758
759
761 """Superimpose the ensembles using fit to first in Molmol."""
762
763
764 mkdir_nofail("ensembles_superimposed")
765
766
767 if self.log:
768 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w')
769 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w')
770
771
772 for config in ["R", "S"]:
773
774 for ens in range(self.num_ens):
775
776 file_in = "ensembles" + sep + config + repr(ens) + ".pdb"
777 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb"
778
779
780 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
781 if self.log:
782 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
783
784
785 if access(self.results_dir+sep+file_out, F_OK):
786 continue
787
788
789 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False)
790
791
792 pipe.stdin.write("InitAll yes\n")
793
794
795 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n")
796
797
798 pipe.stdin.write("Fit to_first 'selected'\n")
799 pipe.stdin.write("Fit to_mean 'selected'\n")
800
801
802 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n")
803
804
805 pipe.stdin.close()
806
807
808 sys.stdout.write(pipe.stdout.read())
809 if self.log:
810 log.write(pipe.stderr.read())
811
812
813 pipe.stdout.close()
814 pipe.stderr.close()
815
816
817 self.interpreter.reset()
818 self.interpreter.pipe.create('out', 'N-state')
819 self.interpreter.structure.read_pdb(file_out)
820
821
822 for model in cdp.structure.structural_data:
823
824 mol = model.mol[0]
825
826
827 for i in range(len(mol.atom_name)):
828
829 if search('H', mol.atom_name[i]):
830 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0]
831
832
833 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
834