auto_analyses.stereochem

83 """Class for performing the relative stereochemistry analysis.""" 84

85 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):

86 """Set up for the stereochemistry analysis. 87 88 @keyword stage: Stage of analysis (see the module docstring above for the options). 89 @type stage: int 90 @keyword results_dir: The optional directory to place all results files into. 91 @type results_dir: None or str 92 @keyword num_ens: Number of ensembles. 93 @type num_ens: int 94 @keyword num_models: Ensemble size. 95 @type num_models: int 96 @keyword configs: All the configurations. 97 @type configs: list of str 98 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 99 @type snapshot_dir: list of str 100 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 101 @type snapshot_min: list of int 102 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 103 @type snapshot_max: list of int 104 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 105 @type pseudo: list of list of str and list of str 106 @keyword noe_file: The name of the NOE restraint file. 107 @type noe_file: str 108 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 109 @type noe_norm: float 110 @keyword rdc_name: The label for this RDC data set. 111 @type rdc_name: str 112 @keyword rdc_file: The name of the RDC file. 113 @type rdc_file: str 114 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 115 @type rdc_spin_id1_col: None or int 116 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 117 @type rdc_spin_id2_col: None or int 118 @keyword rdc_data_col: The data column of the RDC file. 119 @type rdc_data_col: int 120 @keyword rdc_error_col: The error column of the RDC file. 121 @type rdc_error_col: int 122 @keyword bond_length: The bond length value in meters. 123 @type bond_length: float 124 @keyword log: Log file output flag (only for certain stages). 125 @type log: bool 126 @keyword bucket_num: Number of buckets for the distribution plots. 127 @type bucket_num: int 128 @keyword lower_lim_noe: Distribution plot limits. 129 @type lower_lim_noe: int 130 @keyword upper_lim_noe: Distribution plot limits. 131 @type upper_lim_noe: int 132 @keyword lower_lim_rdc: Distribution plot limits. 133 @type lower_lim_rdc: int 134 @keyword upper_lim_rdc: Distribution plot limits. 135 @type upper_lim_rdc: int 136 """ 137 138 # Execution lock. 139 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 140 141 # Set up the analysis status object. 142 status.init_auto_analysis('stereochem', type='stereochem') 143 status.current_analysis = 'auto stereochem analysis' 144 145 # Store all the args. 146 self.stage = stage 147 self.results_dir = results_dir 148 self.num_ens = num_ens 149 self.num_models = num_models 150 self.configs = configs 151 self.snapshot_dir = snapshot_dir 152 self.snapshot_min = snapshot_min 153 self.snapshot_max = snapshot_max 154 self.pseudo = pseudo 155 self.noe_file = noe_file 156 self.noe_norm = noe_norm 157 self.rdc_name = rdc_name 158 self.rdc_file = rdc_file 159 self.rdc_spin_id1_col = rdc_spin_id1_col 160 self.rdc_spin_id2_col = rdc_spin_id2_col 161 self.rdc_data_col = rdc_data_col 162 self.rdc_error_col = rdc_error_col 163 self.bond_length = bond_length 164 self.log = log 165 self.bucket_num = bucket_num 166 self.lower_lim_noe = lower_lim_noe 167 self.upper_lim_noe = upper_lim_noe 168 self.lower_lim_rdc = lower_lim_rdc 169 self.upper_lim_rdc = upper_lim_rdc 170 171 # Load the interpreter. 172 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) 173 self.interpreter.populate_self() 174 self.interpreter.on(verbose=False) 175 176 # Create the results directory. 177 if self.results_dir: 178 mkdir_nofail(self.results_dir) 179 180 # Or use the current working directory. 181 else: 182 self.results_dir = getcwd() 183 184 # Create a directory for log files. 185 if self.log: 186 mkdir_nofail(self.results_dir + sep + "logs") 187 188 # Finish and unlock execution. 189 status.auto_analysis['stereochem'].fin = True 190 status.current_analysis = None 191 status.exec_lock.release()

192 193

194 - def run(self):

195 """Execute the given stage of the analysis.""" 196 197 # Store the original STDOUT. 198 self.stdout_orig = sys.stdout 199 200 # Sampling of snapshots. 201 if self.stage == 1: 202 self.sample() 203 204 # NOE violation analysis. 205 elif self.stage == 2: 206 self.noe_viol() 207 208 # Ensemble superimposition. 209 elif self.stage == 3: 210 self.superimpose() 211 212 # RDC Q-factor analysis. 213 elif self.stage == 4: 214 self.rdc_analysis() 215 216 # Grace plot creation. 217 elif self.stage == 5: 218 self.grace_plots() 219 220 # Final combined Q ordering. 221 elif self.stage == 6: 222 self.combined_q() 223 224 # Unknown stage. 225 else: 226 raise RelaxError("The stage number %s is unknown." % self.stage) 227 228 # Restore STDOUT. 229 sys.stdout = self.stdout_orig

230 231

232 - def combined_q(self):

233 """Calculate the combined Q-factor. 234 235 The combined Q is defined as:: 236 237 Q_total^2 = Q_NOE^2 + Q_RDC^2, 238 239 and the NOE Q-factor as:: 240 241 Q^2 = U / sum(NOE_i^2), 242 243 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 244 """ 245 246 # Checks. 247 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 248 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 249 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 250 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 251 252 # Loop over the configurations. 253 for i in range(len(self.configs)): 254 # Print out. 255 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) 256 257 # Open the NOE results file and read the data. 258 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 259 noe_lines = file.readlines() 260 file.close() 261 262 # Open the RDC results file and read the data. 263 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 264 rdc_lines = file.readlines() 265 file.close() 266 267 # The combined Q-factor file. 268 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 269 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 270 271 # Loop over the data (skipping the header line). 272 data = [] 273 for j in range(1, len(noe_lines)): 274 # Split the lines. 275 ens = int(noe_lines[j].split()[0]) 276 noe_viol = float(noe_lines[j].split()[1]) 277 q_rdc = float(rdc_lines[j].split()[1]) 278 279 # The NOE Q-factor. 280 q_noe = sqrt(noe_viol/self.noe_norm) 281 282 # Combined Q. 283 q = sqrt(q_noe**2 + q_rdc**2) 284 285 # Write out the unsorted list. 286 out.write("%-20i%20.15f\n" % (ens, q)) 287 288 # Store the values. 289 data.append([q, ens]) 290 291 # Sort the combined Q. 292 data.sort() 293 294 # Write the data. 295 for i in range(len(data)): 296 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 297 298 # Close the files. 299 out.close() 300 out_sorted.close()

301 302

303 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):

304 """Create the distribution data structure.""" 305 306 # The bin width. 307 bin_width = (upper - lower)/float(inc) 308 309 # Init the dist object. 310 dist = [] 311 for i in range(inc): 312 dist.append([bin_width*i+lower, 0]) 313 314 # Loop over the values. 315 for val in values: 316 # The bin. 317 bin = int((val - lower)/bin_width) 318 319 # Outside of the limits. 320 if bin < 0 or bin >= inc: 321 print("Outside of the limits: '%s'" % val) 322 continue 323 324 # Increment the count. 325 dist[bin][1] = dist[bin][1] + 1 326 327 # Convert the counts to frequencies. 328 total_pr = 0.0 329 for i in range(inc): 330 dist[i][1] = dist[i][1] / float(len(values)) 331 total_pr = total_pr + dist[i][1] 332 333 print("Total Pr: %s" % total_pr) 334 335 # Return the dist. 336 return dist

337 338

339 - def grace_plots(self):

340 """Generate grace plots of the results.""" 341 342 # The number of configs. 343 n = len(self.configs) 344 345 # The colours for the different configs. 346 defaults = [4, 2] # Blue and red. 347 colours = [] 348 for i in range(n): 349 # Default colours. 350 if i < len(defaults): 351 colours.append(defaults[i]) 352 353 # Otherwise black! 354 else: 355 colours.append(0) 356 357 # The ensemble number text. 358 ens_text = '' 359 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 360 num_ens = self.num_ens 361 for i in range(len(dividers)): 362 # The number. 363 num = int(num_ens / dividers[i]) 364 365 # The text. 366 if num: 367 text = repr(num) 368 elif not num and ens_text: 369 text = '000' 370 else: 371 continue 372 373 # Update the text. 374 ens_text = ens_text + text 375 376 # A comma. 377 if i < len(dividers)-1: 378 ens_text = ens_text + ',' 379 380 # Remove the front part of the number. 381 num_ens = num_ens - dividers[i]*num 382 383 # Subtitle for all graphs. 384 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 385 386 # NOE violations. 387 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 388 # Print out. 389 print("Generating NOE violation Grace plots.") 390 391 # Open the output files. 392 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 393 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 394 395 # Loop over the configurations. 396 data = [] 397 dist = [] 398 for i in range(n): 399 # Open the results file and read the data. 400 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 401 lines = file.readlines() 402 file.close() 403 404 # Add a new graph set. 405 data.append([]) 406 407 # Loop over the ensembles and extract the NOE violation. 408 noe_viols = [] 409 for j in range(1, len(lines)): 410 # Extract the violation. 411 viol = float(lines[j].split()[1]) 412 noe_viols.append(viol) 413 414 # Add to the data structure. 415 data[i].append([j, viol]) 416 417 # Calculate the R distribution. 418 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 419 420 # Headers. 421 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)'], axis_min=[0, 0], axis_max=[self.num_ens, 200], legend_pos=[0.3, 0.8]) 422 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['NOE violation (Angstrom\\S2\\N)', 'Frequency'], axis_min=[0, 0], axis_max=[200, 0.2], legend_pos=[1.1, 0.8]) 423 424 # Write the data. 425 write_xy_data([data], file=grace_curve, graph_type='xy') 426 write_xy_data([dist], file=grace_dist, graph_type='xy') 427 428 # Close the files. 429 grace_curve.close() 430 grace_dist.close() 431 432 # RDC Q-factors. 433 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 434 # Print out. 435 print("Generating RDC Q-factor Grace plots.") 436 437 # Open the Grace output files. 438 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 439 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 440 441 # Loop over the configurations. 442 data = [] 443 dist = [] 444 for i in range(n): 445 # Open the results file and read the data. 446 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 447 lines = file.readlines() 448 file.close() 449 450 # Add a new graph set. 451 data.append([]) 452 453 # Loop over the Q-factors. 454 values = [] 455 for j in range(1, len(lines)): 456 # Extract the violation. 457 value = float(lines[j].split()[1]) 458 values.append(value) 459 460 # Add to the data structure. 461 data[i].append([j, value]) 462 463 # Calculate the R distribution. 464 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 465 466 # Headers. 467 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[self.num_ens, 2], legend_pos=[0.3, 0.8]) 468 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency'], axis_min=[0, 0], axis_max=[2, 0.2], legend_pos=[1.1, 0.8]) 469 470 # Write the data. 471 write_xy_data([data], file=grace_curve, graph_type='xy') 472 write_xy_data([dist], file=grace_dist, graph_type='xy') 473 474 # Close the files. 475 grace_curve.close() 476 grace_dist.close() 477 478 # NOE-RDC correlation plots. 479 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 480 # Print out. 481 print("Generating NOE-RDC correlation Grace plots.") 482 483 # Open the Grace output files. 484 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 485 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 486 487 # Grace data. 488 data = [] 489 data_scaled = [] 490 for i in range(len(self.configs)): 491 # Open the NOE results file and read the data. 492 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 493 noe_lines = file.readlines() 494 file.close() 495 496 # Add a new graph set. 497 data.append([]) 498 data_scaled.append([]) 499 500 # Open the RDC results file and read the data. 501 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 502 rdc_lines = file.readlines() 503 file.close() 504 505 # Loop over the data. 506 for j in range(1, len(noe_lines)): 507 # Split the lines. 508 noe_viol = float(noe_lines[j].split()[1]) 509 q_factor = float(rdc_lines[j].split()[1]) 510 511 # Add the xy pair. 512 data[i].append([noe_viol, q_factor]) 513 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 514 515 # Write the data. 516 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[noe_viols[-1]+10, values[-1]+0.1], legend_pos=[1.1, 0.8]) 517 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[1, 1], legend_pos=[1.1, 0.8]) 518 write_xy_data([data], file=grace_file, graph_type='xy') 519 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')

520 521

522 - def noe_viol(self):

523 """NOE violation calculations.""" 524 525 # Redirect STDOUT to a log file. 526 if self.log: 527 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 528 529 # Create a directory for the save files. 530 dir = self.results_dir + sep + "NOE_results" 531 mkdir_nofail(dir=dir) 532 533 # Loop over the configurations. 534 for config in self.configs: 535 # Print out. 536 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 537 538 # Open the results file. 539 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 540 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 541 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 542 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 543 544 # Create the data pipe. 545 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 546 547 # Read the first structure. 548 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 549 550 # Load all protons as the sequence. 551 self.interpreter.structure.load_spins("@H*", ave_pos=False) 552 553 # Create the pseudo-atoms. 554 for i in range(len(self.pseudo)): 555 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 556 self.interpreter.sequence.display() 557 558 # Read the NOE list. 559 self.interpreter.noe.read_restraints(file=self.noe_file) 560 561 # Set up the N-state model. 562 self.interpreter.n_state_model.select_model(model="fixed") 563 564 # Print out. 565 print("\n"*2 + "# Set up complete #" + "\n"*10) 566 567 # Loop over each ensemble. 568 noe_viol = [] 569 for ens in range(self.num_ens): 570 # Print out the ensemble to both the log and screen. 571 if self.log: 572 sys.stdout.write(config + repr(ens) + "\n") 573 sys.stderr.write(config + repr(ens) + "\n") 574 575 # Delete the old structures and rename the molecule. 576 self.interpreter.structure.delete() 577 578 # Read the ensemble. 579 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 580 581 # Get the atomic positions. 582 self.interpreter.structure.get_pos(ave_pos=False) 583 584 # Calculate the average NOE potential. 585 self.interpreter.calc() 586 587 # Sum the violations. 588 cdp.sum_viol = 0.0 589 for i in range(len(cdp.ave_dist)): 590 if cdp.quad_pot[i][2]: 591 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 592 593 # Write out the NOE violation. 594 noe_viol.append([cdp.sum_viol, ens]) 595 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 596 597 # Save the state. 598 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 599 600 # Sort the NOE violations. 601 noe_viol.sort() 602 603 # Write the data. 604 for i in range(len(noe_viol)): 605 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))

606 607

608 - def rdc_analysis(self):

609 """Perform the RDC part of the analysis.""" 610 611 # Redirect STDOUT to a log file. 612 if self.log: 613 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 614 615 # The dipolar constant. 616 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 617 618 # Create a directory for the save files. 619 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 620 mkdir_nofail(dir=dir) 621 622 # Loop over the configurations. 623 for config in self.configs: 624 # Print out. 625 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 626 627 # Open the results files. 628 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 629 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 630 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 631 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 632 633 # Create the data pipe. 634 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 635 636 # Read the first structure. 637 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 638 639 # Load all spins as the sequence. 640 self.interpreter.structure.load_spins(ave_pos=False) 641 642 # Create the pseudo-atoms. 643 for i in range(len(self.pseudo)): 644 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 645 self.interpreter.sequence.display() 646 647 # Read the RDC data. 648 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 649 650 # Define the magnetic dipole-dipole relaxation interaction. 651 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 652 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 653 654 # Set the nuclear isotope. 655 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 656 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 657 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 658 659 # Set up the model. 660 self.interpreter.n_state_model.select_model(model="fixed") 661 662 # Print out. 663 print("\n"*2 + "# Set up complete #" + "\n"*10) 664 665 # Loop over each ensemble. 666 q_factors = [] 667 for ens in range(self.num_ens): 668 # Print out the ensemble to both the log and screen. 669 if self.log: 670 sys.stdout.write(config + repr(ens) + "\n") 671 sys.stderr.write(config + repr(ens) + "\n") 672 673 # Delete the old structures. 674 self.interpreter.structure.delete() 675 676 # Read the ensemble. 677 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 678 679 # Get the positional information, then load the CH vectors. 680 self.interpreter.structure.get_pos(ave_pos=False) 681 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 682 self.interpreter.dipole_pair.unit_vectors(ave=False) 683 684 # Minimisation. 685 #grid_search(inc=4) 686 self.interpreter.minimise("simplex", constraints=False) 687 688 # Store and write out the Q-factors. 689 q_factors.append([cdp.q_rdc, ens]) 690 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 691 692 # Calculate the alignment tensor in Hz, and store it for reference. 693 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 694 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 695 696 # Save the state. 697 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 698 699 # Sort the NOE violations. 700 q_factors.sort() 701 702 # Write the data. 703 for i in range(len(q_factors)): 704 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))

705 706

707 - def sample(self):

708 """Generate the ensembles by random sampling of the snapshots.""" 709 710 # Create the directory for the ensembles, if needed. 711 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 712 713 # Loop over the configurations. 714 for conf_index in range(len(self.configs)): 715 # Loop over each ensemble. 716 for ens in range(self.num_ens): 717 # Random sampling. 718 rand = [] 719 for j in range(self.num_models): 720 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 721 722 # Print out. 723 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 724 725 # The file name. 726 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 727 728 # Open the output file. 729 out = open(self.results_dir+sep+file_name, 'w') 730 731 # Header. 732 out.write("REM Structures: " + repr(rand) + "\n") 733 734 # Concatenation the files. 735 for j in range(self.num_models): 736 # The random file. 737 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 738 739 # Append the file. 740 out.write(open(rand_name).read()) 741 742 # Close the file. 743 out.close()

744 745

746 - def superimpose(self):

747 """Superimpose the ensembles using fit to first in Molmol.""" 748 749 # Create the output directory. 750 mkdir_nofail("ensembles_superimposed") 751 752 # Logging turned on. 753 if self.log: 754 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 755 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 756 757 # Loop over S and R. 758 for config in ["R", "S"]: 759 # Loop over each ensemble. 760 for ens in range(self.num_ens): 761 # The file names. 762 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 763 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 764 765 # Print out. 766 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 767 if self.log: 768 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 769 770 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 771 if access(self.results_dir+sep+file_out, F_OK): 772 continue 773 774 # Open the Molmol pipe. 775 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 776 777 # Init all. 778 pipe.stdin.write("InitAll yes\n") 779 780 # Read the PDB. 781 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 782 783 # Fitting to mean. 784 pipe.stdin.write("Fit to_first 'selected'\n") 785 pipe.stdin.write("Fit to_mean 'selected'\n") 786 787 # Write the result. 788 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 789 790 # End Molmol. 791 pipe.stdin.close() 792 793 # Get STDOUT and STDERR. 794 sys.stdout.write(pipe.stdout.read()) 795 if self.log: 796 log.write(pipe.stderr.read()) 797 798 # Close the pipe. 799 pipe.stdout.close() 800 pipe.stderr.close() 801 802 # Open the superimposed file in relax. 803 self.interpreter.reset() 804 self.interpreter.pipe.create('out', 'N-state') 805 self.interpreter.structure.read_pdb(file_out, parser="internal") 806 807 # Fix the retarded MOLMOL proton naming. 808 for model in cdp.structure.structural_data: 809 # Alias. 810 mol = model.mol[0] 811 812 # Loop over all atoms. 813 for i in range(len(mol.atom_name)): 814 # A proton. 815 if search('H', mol.atom_name[i]): 816 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 817 818 # Replace the superimposed file. 819 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)

Source Code for Module auto_analyses.stereochem_analysis