1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Determination of relative stereochemistry in organic molecules.
25
26 The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given
27 set of structures. The discrimination is performed by comparing the sets of ensembles using NOE
28 violations and RDC Q-factors.
29
30 This script is split into multiple stages:
31
32 1. The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to
33 100,000) of M members (NUM_MODELS, usually ~10). The original snapshot files are expected to be
34 named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g.
35 "snapshots/R647.pdb". The ensembles will be placed into the "ensembles" directory.
36
37 2. The NOE violation analysis.
38
39 3. The superimposition of ensembles. For each ensemble, Molmol is used for superimposition
40 using the fit to first algorithm. The superimposed ensembles will be placed into the
41 "ensembles_superimposed" directory. This stage is not necessary for the NOE analysis.
42
43 4. The RDC Q-factor analysis.
44
45 5. Generation of Grace graphs.
46
47 6. Final ordering of ensembles using the combined RDC and NOE Q-factors, whereby the NOE
48 Q-factor is defined as::
49
50 Q^2 = U / sum(NOE_i^2),
51
52 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. The
53 denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM. The
54 combined Q is given by::
55
56 Q_total^2 = Q_NOE^2 + Q_RDC^2.
57 """
58
59
60 from math import pi, sqrt
61 from os import F_OK, access, getcwd, popen3, sep
62 from random import randint
63 from re import search
64 from string import split
65 import sys
66
67
68 from generic_fns import pipes
69 from generic_fns.grace import write_xy_data, write_xy_header
70 from generic_fns.selection import spin_loop
71 from physical_constants import dipolar_constant, g1H, g13C
72 from prompt.interpreter import Interpreter
73 from relax_errors import RelaxError
74 from relax_io import mkdir_nofail
75 from status import Status; status = Status()
76
77
78
80 """Class for performing the relative stereochemistry analysis."""
81
82 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id_col=None, rdc_mol_name_col=None, rdc_res_num_col=None, rdc_res_name_col=None, rdc_spin_num_col=None, rdc_spin_name_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
83 """Set up for the stereochemistry analysis.
84
85 @keyword stage: Stage of analysis (see the module docstring above for the options).
86 @type stage: int
87 @keyword results_dir: The optional directory to place all results files into.
88 @type results_dir: None or str
89 @keyword num_ens: Number of ensembles.
90 @type num_ens: int
91 @keyword num_models: Ensemble size.
92 @type num_models: int
93 @keyword configs: All the configurations.
94 @type configs: list of str
95 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations).
96 @type snapshot_dir: list of str
97 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations).
98 @type snapshot_min: list of int
99 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations).
100 @type snapshot_max: list of int
101 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]].
102 @type pseudo: list of list of str and list of str
103 @keyword noe_file: The name of the NOE restraint file.
104 @type noe_file: str
105 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared).
106 @type noe_norm: float
107 @keyword rdc_name: The label for this RDC data set.
108 @type rdc_name: str
109 @keyword rdc_file: The name of the RDC file.
110 @type rdc_file: str
111 @keyword rdc_spin_id_col: The spin ID column of the RDC file.
112 @type rdc_spin_id_col: None or int
113 @keyword rdc_mol_name_col: The molecule name column of the RDC file.
114 @type rdc_mol_name_col: None or int
115 @keyword rdc_res_num_col: The residue number column of the RDC file.
116 @type rdc_res_num_col: None or int
117 @keyword rdc_res_name_col: The residue name column of the RDC file.
118 @type rdc_res_name_col: None or int
119 @keyword rdc_spin_num_col: The spin number column of the RDC file.
120 @type rdc_spin_num_col: None or int
121 @keyword rdc_spin_name_col: The spin name column of the RDC file.
122 @type rdc_spin_name_col: None or int
123 @keyword rdc_data_col: The data column of the RDC file.
124 @type rdc_data_col: int
125 @keyword rdc_error_col: The error column of the RDC file.
126 @type rdc_error_col: int
127 @keyword bond_length: The bond length value in meters.
128 @type bond_length: float
129 @keyword log: Log file output flag (only for certain stages).
130 @type log: bool
131 @keyword bucket_num: Number of buckets for the distribution plots.
132 @type bucket_num: int
133 @keyword lower_lim_noe: Distribution plot limits.
134 @type lower_lim_noe: int
135 @keyword upper_lim_noe: Distribution plot limits.
136 @type upper_lim_noe: int
137 @keyword lower_lim_rdc: Distribution plot limits.
138 @type lower_lim_rdc: int
139 @keyword upper_lim_rdc: Distribution plot limits.
140 @type upper_lim_rdc: int
141 """
142
143
144 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis')
145
146
147 status.init_auto_analysis('stereochem', type='stereochem')
148 status.current_analysis = 'auto stereochem analysis'
149
150
151 self.stage = stage
152 self.results_dir = results_dir
153 self.num_ens = num_ens
154 self.num_models = num_models
155 self.configs = configs
156 self.snapshot_dir = snapshot_dir
157 self.snapshot_min = snapshot_min
158 self.snapshot_max = snapshot_max
159 self.pseudo = pseudo
160 self.noe_file = noe_file
161 self.noe_norm = noe_norm
162 self.rdc_name = rdc_name
163 self.rdc_file = rdc_file
164 self.rdc_spin_id_col = rdc_spin_id_col
165 self.rdc_mol_name_col = rdc_mol_name_col
166 self.rdc_res_num_col = rdc_res_num_col
167 self.rdc_res_name_col = rdc_res_name_col
168 self.rdc_spin_num_col = rdc_spin_num_col
169 self.rdc_spin_name_col = rdc_spin_name_col
170 self.rdc_data_col = rdc_data_col
171 self.rdc_error_col = rdc_error_col
172 self.bond_length = bond_length
173 self.log = log
174 self.bucket_num = bucket_num
175 self.lower_lim_noe = lower_lim_noe
176 self.upper_lim_noe = upper_lim_noe
177 self.lower_lim_rdc = lower_lim_rdc
178 self.upper_lim_rdc = upper_lim_rdc
179
180
181 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True)
182 self.interpreter.populate_self()
183 self.interpreter.on(verbose=False)
184
185
186 if self.results_dir:
187 mkdir_nofail(self.results_dir)
188
189
190 else:
191 self.results_dir = getcwd()
192
193
194 if self.log:
195 mkdir_nofail(self.results_dir + sep + "logs")
196
197
198 status.auto_analysis['stereochem'].fin = True
199 status.current_analysis = None
200 status.exec_lock.release()
201
202
204 """Execute the given stage of the analysis."""
205
206
207 self.stdout_orig = sys.stdout
208
209
210 if self.stage == 1:
211 self.sample()
212
213
214 elif self.stage == 2:
215 self.noe_viol()
216
217
218 elif self.stage == 3:
219 self.superimpose()
220
221
222 elif self.stage == 4:
223 self.rdc_analysis()
224
225
226 elif self.stage == 5:
227 self.grace_plots()
228
229
230 elif self.stage == 6:
231 self.combined_q()
232
233
234 else:
235 raise RelaxError("The stage number %s is unknown." % self.stage)
236
237
238 sys.stdout = self.stdout_orig
239
240
242 """Calculate the combined Q-factor.
243
244 The combined Q is defined as::
245
246 Q_total^2 = Q_NOE^2 + Q_RDC^2,
247
248 and the NOE Q-factor as::
249
250 Q^2 = U / sum(NOE_i^2),
251
252 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.
253 """
254
255
256 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
257 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted")
258 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
259 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted")
260
261
262 for i in range(len(self.configs)):
263
264 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i])
265
266
267 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
268 noe_lines = file.readlines()
269 file.close()
270
271
272 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
273 rdc_lines = file.readlines()
274 file.close()
275
276
277 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w')
278 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w')
279
280
281 data = []
282 for j in range(1, len(noe_lines)):
283
284 ens = int(split(noe_lines[j])[0])
285 noe_viol = float(split(noe_lines[j])[1])
286 q_rdc = float(split(rdc_lines[j])[1])
287
288
289 q_noe = sqrt(noe_viol/self.noe_norm)
290
291
292 q = sqrt(q_noe**2 + q_rdc**2)
293
294
295 out.write("%-20i%20.15f\n" % (ens, q))
296
297
298 data.append([q, ens])
299
300
301 data.sort()
302
303
304 for i in range(len(data)):
305 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0]))
306
307
308 out.close()
309 out_sorted.close()
310
311
313 """Create the distribution data structure."""
314
315
316 bin_width = (upper - lower)/float(inc)
317
318
319 dist = []
320 for i in range(inc):
321 dist.append([bin_width*i+lower, 0])
322
323
324 for val in values:
325
326 bin = int((val - lower)/bin_width)
327
328
329 if bin < 0 or bin >= inc:
330 print("Outside of the limits: '%s'" % val)
331 continue
332
333
334 dist[bin][1] = dist[bin][1] + 1
335
336
337 total_pr = 0.0
338 for i in range(inc):
339 dist[i][1] = dist[i][1] / float(len(values))
340 total_pr = total_pr + dist[i][1]
341
342 print("Total Pr: %s" % total_pr)
343
344
345 return dist
346
347
349 """Generate grace plots of the results."""
350
351
352 n = len(self.configs)
353
354
355 defaults = [4, 2]
356 colours = []
357 for i in range(n):
358
359 if i < len(defaults):
360 colours.append(defaults[i])
361
362
363 else:
364 colours.append(0)
365
366
367 ens_text = ''
368 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1]
369 num_ens = self.num_ens
370 for i in range(len(dividers)):
371
372 num = int(num_ens / dividers[i])
373
374
375 if num:
376 text = repr(num)
377 elif not num and ens_text:
378 text = '000'
379 else:
380 continue
381
382
383 ens_text = ens_text + text
384
385
386 if i < len(dividers)-1:
387 ens_text = ens_text + ','
388
389
390 num_ens = num_ens - dividers[i]*num
391
392
393 subtitle = '%s ensembles of %s' % (ens_text, self.num_models)
394
395
396 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK):
397
398 print("Generating NOE violation Grace plots.")
399
400
401 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w')
402 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w')
403
404
405 data = []
406 dist = []
407 for i in range(n):
408
409 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted")
410 lines = file.readlines()
411 file.close()
412
413
414 data.append([])
415
416
417 noe_viols = []
418 for j in range(1, len(lines)):
419
420 viol = float(split(lines[j])[1])
421 noe_viols.append(viol)
422
423
424 data[i].append([j, viol])
425
426
427 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe))
428
429
430 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', 'NOE violation (Angstrom\S2\N)'], axis_min=[0, 0], axis_max=[self.num_ens, 200], legend_pos=[0.3, 0.8])
431 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['NOE violation (Angstrom\S2\N)', 'Frequency'], axis_min=[0, 0], axis_max=[200, 0.2], legend_pos=[1.1, 0.8])
432
433
434 write_xy_data([data], file=grace_curve, graph_type='xy')
435 write_xy_data([dist], file=grace_dist, graph_type='xy')
436
437
438 grace_curve.close()
439 grace_dist.close()
440
441
442 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
443
444 print("Generating RDC Q-factor Grace plots.")
445
446
447 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w')
448 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w')
449
450
451 data = []
452 dist = []
453 for i in range(n):
454
455 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted")
456 lines = file.readlines()
457 file.close()
458
459
460 data.append([])
461
462
463 values = []
464 for j in range(1, len(lines)):
465
466 value = float(split(lines[j])[1])
467 values.append(value)
468
469
470 data[i].append([j, value])
471
472
473 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc))
474
475
476 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[self.num_ens, 2], legend_pos=[0.3, 0.8])
477 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency'], axis_min=[0, 0], axis_max=[2, 0.2], legend_pos=[1.1, 0.8])
478
479
480 write_xy_data([data], file=grace_curve, graph_type='xy')
481 write_xy_data([dist], file=grace_dist, graph_type='xy')
482
483
484 grace_curve.close()
485 grace_dist.close()
486
487
488 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK):
489
490 print("Generating NOE-RDC correlation Grace plots.")
491
492
493 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w')
494 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w')
495
496
497 data = []
498 data_scaled = []
499 for i in range(len(self.configs)):
500
501 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i])
502 noe_lines = file.readlines()
503 file.close()
504
505
506 data.append([])
507 data_scaled.append([])
508
509
510 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i])
511 rdc_lines = file.readlines()
512 file.close()
513
514
515 for j in range(1, len(noe_lines)):
516
517 noe_viol = float(split(noe_lines[j])[1])
518 q_factor = float(split(rdc_lines[j])[1])
519
520
521 data[i].append([noe_viol, q_factor])
522 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor])
523
524
525 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['NOE violation (Angstrom\S2\N)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[noe_viols[-1]+10, values[-1]+0.1], legend_pos=[1.1, 0.8])
526 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[1, 1], legend_pos=[1.1, 0.8])
527 write_xy_data([data], file=grace_file, graph_type='xy')
528 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
529
530
532 """NOE violation calculations."""
533
534
535 if self.log:
536 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w')
537
538
539 dir = self.results_dir + sep + "NOE_results"
540 mkdir_nofail(dir=dir)
541
542
543 for config in self.configs:
544
545 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
546
547
548 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w')
549 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w')
550 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
551 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation"))
552
553
554 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state")
555
556
557 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal")
558
559
560 self.interpreter.structure.load_spins("@H*", ave_pos=False)
561
562
563 for i in range(len(self.pseudo)):
564 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
565 self.interpreter.sequence.display()
566
567
568 self.interpreter.noe.read_restraints(file=self.noe_file)
569
570
571 self.interpreter.n_state_model.select_model(model="fixed")
572
573
574 print("\n"*2 + "# Set up complete #" + "\n"*10)
575
576
577 noe_viol = []
578 for ens in range(self.num_ens):
579
580 if self.log:
581 sys.stdout.write(config + repr(ens) + "\n")
582 sys.stderr.write(config + repr(ens) + "\n")
583
584
585 self.interpreter.structure.delete()
586
587
588 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal")
589
590
591 self.interpreter.structure.get_pos(ave_pos=False)
592
593
594 self.interpreter.calc()
595
596
597 cdp.sum_viol = 0.0
598 for i in range(len(cdp.ave_dist)):
599 if cdp.quad_pot[i][2]:
600 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2]
601
602
603 noe_viol.append([cdp.sum_viol, ens])
604 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol))
605
606
607 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
608
609
610 noe_viol.sort()
611
612
613 for i in range(len(noe_viol)):
614 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
615
616
618 """Perform the RDC part of the analysis."""
619
620
621 if self.log:
622 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w')
623
624
625 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length)
626
627
628 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name
629 mkdir_nofail(dir=dir)
630
631
632 for config in self.configs:
633
634 print("\n"*10 + "# Set up for config " + config + " #" + "\n")
635
636
637 out = open(self.results_dir+sep+"Q_factors_" + config, 'w')
638 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w')
639 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)"))
640 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)"))
641
642
643 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state")
644
645
646 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal")
647
648
649 self.interpreter.structure.load_spins("@H*", ave_pos=False)
650
651
652 for i in range(len(self.pseudo)):
653 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear")
654 self.interpreter.sequence.display()
655
656
657 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id_col=self.rdc_spin_id_col, mol_name_col=self.rdc_mol_name_col, res_num_col=self.rdc_res_num_col, res_name_col=self.rdc_res_name_col, spin_num_col=self.rdc_spin_num_col, spin_name_col=self.rdc_spin_name_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col)
658
659
660 self.interpreter.value.set(self.bond_length, "r", spin_id="@H*")
661 self.interpreter.value.set(self.bond_length, "r", spin_id="@Q*")
662 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@H*")
663 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@Q*")
664 self.interpreter.value.set("1H", "proton_type", spin_id="@H*")
665 self.interpreter.value.set("1H", "proton_type", spin_id="@Q*")
666
667
668 self.interpreter.n_state_model.select_model(model="fixed")
669
670
671 print("\n"*2 + "# Set up complete #" + "\n"*10)
672
673
674 q_factors = []
675 for ens in range(self.num_ens):
676
677 if self.log:
678 sys.stdout.write(config + repr(ens) + "\n")
679 sys.stderr.write(config + repr(ens) + "\n")
680
681
682 self.interpreter.structure.delete()
683
684
685 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal")
686
687
688 self.interpreter.structure.vectors(spin_id="@H*", attached="*C*", ave=False)
689
690
691
692 self.interpreter.minimise("simplex", constraints=False)
693
694
695 q_factors.append([cdp.q_rdc, ens])
696 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2))
697
698
699 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A
700 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D
701
702
703 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True)
704
705
706 q_factors.sort()
707
708
709 for i in range(len(q_factors)):
710 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
711
712
714 """Generate the ensembles by random sampling of the snapshots."""
715
716
717 mkdir_nofail(dir=self.results_dir + sep + "ensembles")
718
719
720 for conf_index in range(len(self.configs)):
721
722 for ens in range(self.num_ens):
723
724 rand = []
725 for j in range(self.num_models):
726 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index]))
727
728
729 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand))
730
731
732 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb"
733
734
735 out = open(self.results_dir+sep+file_name, 'w')
736
737
738 out.write("REM Structures: " + repr(rand) + "\n")
739
740
741 for j in range(self.num_models):
742
743 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb"
744
745
746 out.write(open(rand_name).read())
747
748
749 out.close()
750
751
753 """Superimpose the ensembles using fit to first in Molmol."""
754
755
756 mkdir_nofail("ensembles_superimposed")
757
758
759 if self.log:
760 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w')
761 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w')
762
763
764 for config in ["R", "S"]:
765
766 for ens in range(self.num_ens):
767
768 file_in = "ensembles" + sep + config + repr(ens) + ".pdb"
769 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb"
770
771
772 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
773 if self.log:
774 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out))
775
776
777 if access(self.results_dir+sep+file_out, F_OK):
778 continue
779
780
781 stdin, stdout, stderr = popen3("molmol -t -f -", 'w', 0)
782
783
784 stdin.write("InitAll yes\n")
785
786
787 stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n")
788
789
790 stdin.write("Fit to_first 'selected'\n")
791 stdin.write("Fit to_mean 'selected'\n")
792
793
794 stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n")
795
796
797 stdin.close()
798
799
800 sys.stdout.write(stdout.read())
801 if self.log:
802 log.write(stderr.read())
803
804
805 stdout.close()
806 stderr.close()
807
808
809 self.interpreter.reset()
810 self.interpreter.pipe.create('out', 'N-state')
811 self.interpreter.structure.read_pdb(file_out, parser="internal")
812
813
814 for model in cdp.structure.structural_data:
815
816 mol = model.mol[0]
817
818
819 for i in range(len(mol.atom_name)):
820
821 if search('H', mol.atom_name[i]):
822 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0]
823
824
825 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
826