auto_analyses.stereochem

80 """Class for performing the relative stereochemistry analysis.""" 81

82 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id_col=None, rdc_mol_name_col=None, rdc_res_num_col=None, rdc_res_name_col=None, rdc_spin_num_col=None, rdc_spin_name_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):

83 """Set up for the stereochemistry analysis. 84 85 @keyword stage: Stage of analysis (see the module docstring above for the options). 86 @type stage: int 87 @keyword results_dir: The optional directory to place all results files into. 88 @type results_dir: None or str 89 @keyword num_ens: Number of ensembles. 90 @type num_ens: int 91 @keyword num_models: Ensemble size. 92 @type num_models: int 93 @keyword configs: All the configurations. 94 @type configs: list of str 95 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 96 @type snapshot_dir: list of str 97 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 98 @type snapshot_min: list of int 99 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 100 @type snapshot_max: list of int 101 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 102 @type pseudo: list of list of str and list of str 103 @keyword noe_file: The name of the NOE restraint file. 104 @type noe_file: str 105 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 106 @type noe_norm: float 107 @keyword rdc_name: The label for this RDC data set. 108 @type rdc_name: str 109 @keyword rdc_file: The name of the RDC file. 110 @type rdc_file: str 111 @keyword rdc_spin_id_col: The spin ID column of the RDC file. 112 @type rdc_spin_id_col: None or int 113 @keyword rdc_mol_name_col: The molecule name column of the RDC file. 114 @type rdc_mol_name_col: None or int 115 @keyword rdc_res_num_col: The residue number column of the RDC file. 116 @type rdc_res_num_col: None or int 117 @keyword rdc_res_name_col: The residue name column of the RDC file. 118 @type rdc_res_name_col: None or int 119 @keyword rdc_spin_num_col: The spin number column of the RDC file. 120 @type rdc_spin_num_col: None or int 121 @keyword rdc_spin_name_col: The spin name column of the RDC file. 122 @type rdc_spin_name_col: None or int 123 @keyword rdc_data_col: The data column of the RDC file. 124 @type rdc_data_col: int 125 @keyword rdc_error_col: The error column of the RDC file. 126 @type rdc_error_col: int 127 @keyword bond_length: The bond length value in meters. 128 @type bond_length: float 129 @keyword log: Log file output flag (only for certain stages). 130 @type log: bool 131 @keyword bucket_num: Number of buckets for the distribution plots. 132 @type bucket_num: int 133 @keyword lower_lim_noe: Distribution plot limits. 134 @type lower_lim_noe: int 135 @keyword upper_lim_noe: Distribution plot limits. 136 @type upper_lim_noe: int 137 @keyword lower_lim_rdc: Distribution plot limits. 138 @type lower_lim_rdc: int 139 @keyword upper_lim_rdc: Distribution plot limits. 140 @type upper_lim_rdc: int 141 """ 142 143 # Execution lock. 144 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 145 146 # Set up the analysis status object. 147 status.init_auto_analysis('stereochem', type='stereochem') 148 status.current_analysis = 'auto stereochem analysis' 149 150 # Store all the args. 151 self.stage = stage 152 self.results_dir = results_dir 153 self.num_ens = num_ens 154 self.num_models = num_models 155 self.configs = configs 156 self.snapshot_dir = snapshot_dir 157 self.snapshot_min = snapshot_min 158 self.snapshot_max = snapshot_max 159 self.pseudo = pseudo 160 self.noe_file = noe_file 161 self.noe_norm = noe_norm 162 self.rdc_name = rdc_name 163 self.rdc_file = rdc_file 164 self.rdc_spin_id_col = rdc_spin_id_col 165 self.rdc_mol_name_col = rdc_mol_name_col 166 self.rdc_res_num_col = rdc_res_num_col 167 self.rdc_res_name_col = rdc_res_name_col 168 self.rdc_spin_num_col = rdc_spin_num_col 169 self.rdc_spin_name_col = rdc_spin_name_col 170 self.rdc_data_col = rdc_data_col 171 self.rdc_error_col = rdc_error_col 172 self.bond_length = bond_length 173 self.log = log 174 self.bucket_num = bucket_num 175 self.lower_lim_noe = lower_lim_noe 176 self.upper_lim_noe = upper_lim_noe 177 self.lower_lim_rdc = lower_lim_rdc 178 self.upper_lim_rdc = upper_lim_rdc 179 180 # Load the interpreter. 181 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) 182 self.interpreter.populate_self() 183 self.interpreter.on(verbose=False) 184 185 # Create the results directory. 186 if self.results_dir: 187 mkdir_nofail(self.results_dir) 188 189 # Or use the current working directory. 190 else: 191 self.results_dir = getcwd() 192 193 # Create a directory for log files. 194 if self.log: 195 mkdir_nofail(self.results_dir + sep + "logs") 196 197 # Finish and unlock execution. 198 status.auto_analysis['stereochem'].fin = True 199 status.current_analysis = None 200 status.exec_lock.release()

201 202

203 - def run(self):

204 """Execute the given stage of the analysis.""" 205 206 # Store the original STDOUT. 207 self.stdout_orig = sys.stdout 208 209 # Sampling of snapshots. 210 if self.stage == 1: 211 self.sample() 212 213 # NOE violation analysis. 214 elif self.stage == 2: 215 self.noe_viol() 216 217 # Ensemble superimposition. 218 elif self.stage == 3: 219 self.superimpose() 220 221 # RDC Q-factor analysis. 222 elif self.stage == 4: 223 self.rdc_analysis() 224 225 # Grace plot creation. 226 elif self.stage == 5: 227 self.grace_plots() 228 229 # Final combined Q ordering. 230 elif self.stage == 6: 231 self.combined_q() 232 233 # Unknown stage. 234 else: 235 raise RelaxError("The stage number %s is unknown." % self.stage) 236 237 # Restore STDOUT. 238 sys.stdout = self.stdout_orig

239 240

241 - def combined_q(self):

242 """Calculate the combined Q-factor. 243 244 The combined Q is defined as:: 245 246 Q_total^2 = Q_NOE^2 + Q_RDC^2, 247 248 and the NOE Q-factor as:: 249 250 Q^2 = U / sum(NOE_i^2), 251 252 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 253 """ 254 255 # Checks. 256 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 257 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 258 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 259 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 260 261 # Loop over the configurations. 262 for i in range(len(self.configs)): 263 # Print out. 264 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) 265 266 # Open the NOE results file and read the data. 267 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 268 noe_lines = file.readlines() 269 file.close() 270 271 # Open the RDC results file and read the data. 272 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 273 rdc_lines = file.readlines() 274 file.close() 275 276 # The combined Q-factor file. 277 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 278 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 279 280 # Loop over the data (skipping the header line). 281 data = [] 282 for j in range(1, len(noe_lines)): 283 # Split the lines. 284 ens = int(split(noe_lines[j])[0]) 285 noe_viol = float(split(noe_lines[j])[1]) 286 q_rdc = float(split(rdc_lines[j])[1]) 287 288 # The NOE Q-factor. 289 q_noe = sqrt(noe_viol/self.noe_norm) 290 291 # Combined Q. 292 q = sqrt(q_noe**2 + q_rdc**2) 293 294 # Write out the unsorted list. 295 out.write("%-20i%20.15f\n" % (ens, q)) 296 297 # Store the values. 298 data.append([q, ens]) 299 300 # Sort the combined Q. 301 data.sort() 302 303 # Write the data. 304 for i in range(len(data)): 305 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 306 307 # Close the files. 308 out.close() 309 out_sorted.close()

310 311

312 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):

313 """Create the distribution data structure.""" 314 315 # The bin width. 316 bin_width = (upper - lower)/float(inc) 317 318 # Init the dist object. 319 dist = [] 320 for i in range(inc): 321 dist.append([bin_width*i+lower, 0]) 322 323 # Loop over the values. 324 for val in values: 325 # The bin. 326 bin = int((val - lower)/bin_width) 327 328 # Outside of the limits. 329 if bin < 0 or bin >= inc: 330 print("Outside of the limits: '%s'" % val) 331 continue 332 333 # Increment the count. 334 dist[bin][1] = dist[bin][1] + 1 335 336 # Convert the counts to frequencies. 337 total_pr = 0.0 338 for i in range(inc): 339 dist[i][1] = dist[i][1] / float(len(values)) 340 total_pr = total_pr + dist[i][1] 341 342 print("Total Pr: %s" % total_pr) 343 344 # Return the dist. 345 return dist

346 347

348 - def grace_plots(self):

349 """Generate grace plots of the results.""" 350 351 # The number of configs. 352 n = len(self.configs) 353 354 # The colours for the different configs. 355 defaults = [4, 2] # Blue and red. 356 colours = [] 357 for i in range(n): 358 # Default colours. 359 if i < len(defaults): 360 colours.append(defaults[i]) 361 362 # Otherwise black! 363 else: 364 colours.append(0) 365 366 # The ensemble number text. 367 ens_text = '' 368 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 369 num_ens = self.num_ens 370 for i in range(len(dividers)): 371 # The number. 372 num = int(num_ens / dividers[i]) 373 374 # The text. 375 if num: 376 text = repr(num) 377 elif not num and ens_text: 378 text = '000' 379 else: 380 continue 381 382 # Update the text. 383 ens_text = ens_text + text 384 385 # A comma. 386 if i < len(dividers)-1: 387 ens_text = ens_text + ',' 388 389 # Remove the front part of the number. 390 num_ens = num_ens - dividers[i]*num 391 392 # Subtitle for all graphs. 393 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 394 395 # NOE violations. 396 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 397 # Print out. 398 print("Generating NOE violation Grace plots.") 399 400 # Open the output files. 401 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 402 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 403 404 # Loop over the configurations. 405 data = [] 406 dist = [] 407 for i in range(n): 408 # Open the results file and read the data. 409 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 410 lines = file.readlines() 411 file.close() 412 413 # Add a new graph set. 414 data.append([]) 415 416 # Loop over the ensembles and extract the NOE violation. 417 noe_viols = [] 418 for j in range(1, len(lines)): 419 # Extract the violation. 420 viol = float(split(lines[j])[1]) 421 noe_viols.append(viol) 422 423 # Add to the data structure. 424 data[i].append([j, viol]) 425 426 # Calculate the R distribution. 427 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 428 429 # Headers. 430 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', 'NOE violation (Angstrom\S2\N)'], axis_min=[0, 0], axis_max=[self.num_ens, 200], legend_pos=[0.3, 0.8]) 431 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['NOE violation (Angstrom\S2\N)', 'Frequency'], axis_min=[0, 0], axis_max=[200, 0.2], legend_pos=[1.1, 0.8]) 432 433 # Write the data. 434 write_xy_data([data], file=grace_curve, graph_type='xy') 435 write_xy_data([dist], file=grace_dist, graph_type='xy') 436 437 # Close the files. 438 grace_curve.close() 439 grace_dist.close() 440 441 # RDC Q-factors. 442 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 443 # Print out. 444 print("Generating RDC Q-factor Grace plots.") 445 446 # Open the Grace output files. 447 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 448 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 449 450 # Loop over the configurations. 451 data = [] 452 dist = [] 453 for i in range(n): 454 # Open the results file and read the data. 455 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 456 lines = file.readlines() 457 file.close() 458 459 # Add a new graph set. 460 data.append([]) 461 462 # Loop over the Q-factors. 463 values = [] 464 for j in range(1, len(lines)): 465 # Extract the violation. 466 value = float(split(lines[j])[1]) 467 values.append(value) 468 469 # Add to the data structure. 470 data[i].append([j, value]) 471 472 # Calculate the R distribution. 473 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 474 475 # Headers. 476 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[self.num_ens, 2], legend_pos=[0.3, 0.8]) 477 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency'], axis_min=[0, 0], axis_max=[2, 0.2], legend_pos=[1.1, 0.8]) 478 479 # Write the data. 480 write_xy_data([data], file=grace_curve, graph_type='xy') 481 write_xy_data([dist], file=grace_dist, graph_type='xy') 482 483 # Close the files. 484 grace_curve.close() 485 grace_dist.close() 486 487 # NOE-RDC correlation plots. 488 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 489 # Print out. 490 print("Generating NOE-RDC correlation Grace plots.") 491 492 # Open the Grace output files. 493 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 494 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 495 496 # Grace data. 497 data = [] 498 data_scaled = [] 499 for i in range(len(self.configs)): 500 # Open the NOE results file and read the data. 501 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 502 noe_lines = file.readlines() 503 file.close() 504 505 # Add a new graph set. 506 data.append([]) 507 data_scaled.append([]) 508 509 # Open the RDC results file and read the data. 510 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 511 rdc_lines = file.readlines() 512 file.close() 513 514 # Loop over the data. 515 for j in range(1, len(noe_lines)): 516 # Split the lines. 517 noe_viol = float(split(noe_lines[j])[1]) 518 q_factor = float(split(rdc_lines[j])[1]) 519 520 # Add the xy pair. 521 data[i].append([noe_viol, q_factor]) 522 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 523 524 # Write the data. 525 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['NOE violation (Angstrom\S2\N)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[noe_viols[-1]+10, values[-1]+0.1], legend_pos=[1.1, 0.8]) 526 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[1, 1], legend_pos=[1.1, 0.8]) 527 write_xy_data([data], file=grace_file, graph_type='xy') 528 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')

529 530

531 - def noe_viol(self):

532 """NOE violation calculations.""" 533 534 # Redirect STDOUT to a log file. 535 if self.log: 536 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 537 538 # Create a directory for the save files. 539 dir = self.results_dir + sep + "NOE_results" 540 mkdir_nofail(dir=dir) 541 542 # Loop over the configurations. 543 for config in self.configs: 544 # Print out. 545 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 546 547 # Open the results file. 548 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 549 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 550 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 551 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 552 553 # Create the data pipe. 554 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 555 556 # Read the first structure. 557 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 558 559 # Load all protons as the sequence. 560 self.interpreter.structure.load_spins("@H*", ave_pos=False) 561 562 # Create the pseudo-atoms. 563 for i in range(len(self.pseudo)): 564 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 565 self.interpreter.sequence.display() 566 567 # Read the NOE list. 568 self.interpreter.noe.read_restraints(file=self.noe_file) 569 570 # Set up the N-state model. 571 self.interpreter.n_state_model.select_model(model="fixed") 572 573 # Print out. 574 print("\n"*2 + "# Set up complete #" + "\n"*10) 575 576 # Loop over each ensemble. 577 noe_viol = [] 578 for ens in range(self.num_ens): 579 # Print out the ensemble to both the log and screen. 580 if self.log: 581 sys.stdout.write(config + repr(ens) + "\n") 582 sys.stderr.write(config + repr(ens) + "\n") 583 584 # Delete the old structures and rename the molecule. 585 self.interpreter.structure.delete() 586 587 # Read the ensemble. 588 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 589 590 # Get the atomic positions. 591 self.interpreter.structure.get_pos(ave_pos=False) 592 593 # Calculate the average NOE potential. 594 self.interpreter.calc() 595 596 # Sum the violations. 597 cdp.sum_viol = 0.0 598 for i in range(len(cdp.ave_dist)): 599 if cdp.quad_pot[i][2]: 600 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 601 602 # Write out the NOE violation. 603 noe_viol.append([cdp.sum_viol, ens]) 604 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 605 606 # Save the state. 607 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 608 609 # Sort the NOE violations. 610 noe_viol.sort() 611 612 # Write the data. 613 for i in range(len(noe_viol)): 614 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))

615 616

617 - def rdc_analysis(self):

618 """Perform the RDC part of the analysis.""" 619 620 # Redirect STDOUT to a log file. 621 if self.log: 622 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 623 624 # The dipolar constant. 625 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 626 627 # Create a directory for the save files. 628 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 629 mkdir_nofail(dir=dir) 630 631 # Loop over the configurations. 632 for config in self.configs: 633 # Print out. 634 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 635 636 # Open the results files. 637 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 638 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 639 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 640 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 641 642 # Create the data pipe. 643 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 644 645 # Read the first structure. 646 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 647 648 # Load all protons as the sequence. 649 self.interpreter.structure.load_spins("@H*", ave_pos=False) 650 651 # Create the pseudo-atoms. 652 for i in range(len(self.pseudo)): 653 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 654 self.interpreter.sequence.display() 655 656 # Read the RDC data. 657 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id_col=self.rdc_spin_id_col, mol_name_col=self.rdc_mol_name_col, res_num_col=self.rdc_res_num_col, res_name_col=self.rdc_res_name_col, spin_num_col=self.rdc_spin_num_col, spin_name_col=self.rdc_spin_name_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 658 659 # Set the values needed to calculate the dipolar constant. 660 self.interpreter.value.set(self.bond_length, "r", spin_id="@H*") 661 self.interpreter.value.set(self.bond_length, "r", spin_id="@Q*") 662 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@H*") 663 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@Q*") 664 self.interpreter.value.set("1H", "proton_type", spin_id="@H*") 665 self.interpreter.value.set("1H", "proton_type", spin_id="@Q*") 666 667 # Set up the model. 668 self.interpreter.n_state_model.select_model(model="fixed") 669 670 # Print out. 671 print("\n"*2 + "# Set up complete #" + "\n"*10) 672 673 # Loop over each ensemble. 674 q_factors = [] 675 for ens in range(self.num_ens): 676 # Print out the ensemble to both the log and screen. 677 if self.log: 678 sys.stdout.write(config + repr(ens) + "\n") 679 sys.stderr.write(config + repr(ens) + "\n") 680 681 # Delete the old structures. 682 self.interpreter.structure.delete() 683 684 # Read the ensemble. 685 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 686 687 # Load the CH vectors for the H atoms. 688 self.interpreter.structure.vectors(spin_id="@H*", attached="*C*", ave=False) 689 690 # Minimisation. 691 #grid_search(inc=4) 692 self.interpreter.minimise("simplex", constraints=False) 693 694 # Store and write out the Q-factors. 695 q_factors.append([cdp.q_rdc, ens]) 696 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 697 698 # Calculate the alignment tensor in Hz, and store it for reference. 699 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 700 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 701 702 # Save the state. 703 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 704 705 # Sort the NOE violations. 706 q_factors.sort() 707 708 # Write the data. 709 for i in range(len(q_factors)): 710 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))

711 712

713 - def sample(self):

714 """Generate the ensembles by random sampling of the snapshots.""" 715 716 # Create the directory for the ensembles, if needed. 717 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 718 719 # Loop over the configurations. 720 for conf_index in range(len(self.configs)): 721 # Loop over each ensemble. 722 for ens in range(self.num_ens): 723 # Random sampling. 724 rand = [] 725 for j in range(self.num_models): 726 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 727 728 # Print out. 729 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 730 731 # The file name. 732 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 733 734 # Open the output file. 735 out = open(self.results_dir+sep+file_name, 'w') 736 737 # Header. 738 out.write("REM Structures: " + repr(rand) + "\n") 739 740 # Concatenation the files. 741 for j in range(self.num_models): 742 # The random file. 743 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 744 745 # Append the file. 746 out.write(open(rand_name).read()) 747 748 # Close the file. 749 out.close()

750 751

752 - def superimpose(self):

753 """Superimpose the ensembles using fit to first in Molmol.""" 754 755 # Create the output directory. 756 mkdir_nofail("ensembles_superimposed") 757 758 # Logging turned on. 759 if self.log: 760 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 761 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 762 763 # Loop over S and R. 764 for config in ["R", "S"]: 765 # Loop over each ensemble. 766 for ens in range(self.num_ens): 767 # The file names. 768 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 769 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 770 771 # Print out. 772 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 773 if self.log: 774 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 775 776 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 777 if access(self.results_dir+sep+file_out, F_OK): 778 continue 779 780 # Open the Molmol pipe. 781 stdin, stdout, stderr = popen3("molmol -t -f -", 'w', 0) 782 783 # Init all. 784 stdin.write("InitAll yes\n") 785 786 # Read the PDB. 787 stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 788 789 # Fitting to mean. 790 stdin.write("Fit to_first 'selected'\n") 791 stdin.write("Fit to_mean 'selected'\n") 792 793 # Write the result. 794 stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 795 796 # End Molmol. 797 stdin.close() 798 799 # Get STDOUT and STDERR. 800 sys.stdout.write(stdout.read()) 801 if self.log: 802 log.write(stderr.read()) 803 804 # Close the pipe. 805 stdout.close() 806 stderr.close() 807 808 # Open the superimposed file in relax. 809 self.interpreter.reset() 810 self.interpreter.pipe.create('out', 'N-state') 811 self.interpreter.structure.read_pdb(file_out, parser="internal") 812 813 # Fix the retarded MOLMOL proton naming. 814 for model in cdp.structure.structural_data: 815 # Alias. 816 mol = model.mol[0] 817 818 # Loop over all atoms. 819 for i in range(len(mol.atom_name)): 820 # A proton. 821 if search('H', mol.atom_name[i]): 822 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 823 824 # Replace the superimposed file. 825 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)

Source Code for Module auto_analyses.stereochem_analysis