Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010,2012-2015,2017 Edward d'Auvergne                         # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Determination of relative stereochemistry in organic molecules. 
 24   
 25  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE violations and RDC Q factors. 
 26   
 27  This script is split into multiple stages: 
 28   
 29      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 30   
 31      2.  The NOE violation analysis. 
 32   
 33      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition using the fit to first algorithm.  The superimposed ensembles will be placed into the "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 34   
 35      4.  The RDC Q factor analysis. 
 36   
 37      5.  Generation of Grace graphs. 
 38   
 39      6.  Final ordering of ensembles using the combined RDC and NOE Q factors, whereby the NOE Q factor is defined as:: 
 40   
 41          Q^2 = U / sum(NOE_i^2), 
 42   
 43      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The combined Q is given by:: 
 44   
 45          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 46  """ 
 47   
 48  # Dependencies. 
 49  import dep_check 
 50   
 51  # Python module imports. 
 52  from math import pi, sqrt 
 53  from os import F_OK, access, getcwd, sep 
 54  from random import randint 
 55  from re import search 
 56  if dep_check.subprocess_module: 
 57      from subprocess import PIPE, Popen 
 58  import sys 
 59  from time import time 
 60   
 61  # relax module imports. 
 62  from lib.periodic_table import periodic_table 
 63  from lib.physical_constants import dipolar_constant 
 64  from lib.plotting.api import write_xy_data, write_xy_header 
 65  from lib.text.sectioning import title 
 66  from lib.timing import print_elapsed_time 
 67  from prompt.interpreter import Interpreter 
 68  from lib.errors import RelaxError 
 69  from lib.io import mkdir_nofail 
 70  from status import Status; status = Status() 
 71   
 72   
 73   
74 -class Stereochem_analysis:
75 """Class for performing the relative stereochemistry analysis.""" 76
77 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
78 """Set up for the stereochemistry analysis. 79 80 @keyword stage: Stage of analysis (see the module docstring above for the options). 81 @type stage: int 82 @keyword results_dir: The optional directory to place all results files into. 83 @type results_dir: None or str 84 @keyword num_ens: Number of ensembles. 85 @type num_ens: int 86 @keyword num_models: Ensemble size. 87 @type num_models: int 88 @keyword configs: All the configurations. 89 @type configs: list of str 90 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 91 @type snapshot_dir: list of str 92 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 93 @type snapshot_min: list of int 94 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 95 @type snapshot_max: list of int 96 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 97 @type pseudo: list of list of str and list of str 98 @keyword noe_file: The name of the NOE restraint file. 99 @type noe_file: str 100 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 101 @type noe_norm: float 102 @keyword rdc_name: The label for this RDC data set. 103 @type rdc_name: str 104 @keyword rdc_file: The name of the RDC file. 105 @type rdc_file: str 106 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 107 @type rdc_spin_id1_col: None or int 108 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 109 @type rdc_spin_id2_col: None or int 110 @keyword rdc_data_col: The data column of the RDC file. 111 @type rdc_data_col: int 112 @keyword rdc_error_col: The error column of the RDC file. 113 @type rdc_error_col: int 114 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. 115 @type bond_length: float or None 116 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. 117 @type bond_length_file: float or None 118 @keyword log: Log file output flag (only for certain stages). 119 @type log: bool 120 @keyword bucket_num: Number of buckets for the distribution plots. 121 @type bucket_num: int 122 @keyword lower_lim_noe: Distribution plot limits. 123 @type lower_lim_noe: int 124 @keyword upper_lim_noe: Distribution plot limits. 125 @type upper_lim_noe: int 126 @keyword lower_lim_rdc: Distribution plot limits. 127 @type lower_lim_rdc: int 128 @keyword upper_lim_rdc: Distribution plot limits. 129 @type upper_lim_rdc: int 130 """ 131 132 # Initial printout. 133 title(file=sys.stdout, text="Stereochemistry auto-analysis") 134 135 # Safely execute the full protocol. 136 try: 137 # Execution lock. 138 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 139 140 # Set up the analysis status object. 141 status.init_auto_analysis('stereochem', type='stereochem') 142 status.current_analysis = 'auto stereochem analysis' 143 144 # Store all the args. 145 self.stage = stage 146 self.results_dir = results_dir 147 self.num_ens = num_ens 148 self.num_models = num_models 149 self.configs = configs 150 self.snapshot_dir = snapshot_dir 151 self.snapshot_min = snapshot_min 152 self.snapshot_max = snapshot_max 153 self.pseudo = pseudo 154 self.noe_file = noe_file 155 self.noe_norm = noe_norm 156 self.rdc_name = rdc_name 157 self.rdc_file = rdc_file 158 self.rdc_spin_id1_col = rdc_spin_id1_col 159 self.rdc_spin_id2_col = rdc_spin_id2_col 160 self.rdc_data_col = rdc_data_col 161 self.rdc_error_col = rdc_error_col 162 self.bond_length = bond_length 163 self.bond_length_file = bond_length_file 164 self.log = log 165 self.bucket_num = bucket_num 166 self.lower_lim_noe = lower_lim_noe 167 self.upper_lim_noe = upper_lim_noe 168 self.lower_lim_rdc = lower_lim_rdc 169 self.upper_lim_rdc = upper_lim_rdc 170 171 # Load the interpreter. 172 self.interpreter = Interpreter(show_script=False, raise_relax_error=True) 173 self.interpreter.populate_self() 174 self.interpreter.on(verbose=False) 175 176 # Create the results directory. 177 if self.results_dir: 178 mkdir_nofail(self.results_dir) 179 180 # Or use the current working directory. 181 else: 182 self.results_dir = getcwd() 183 184 # Create a directory for log files. 185 if self.log: 186 mkdir_nofail(self.results_dir + sep + "logs") 187 188 # Clean up. 189 finally: 190 # Final printout. 191 title(file=sys.stdout, text="Completion of the stereochemistry auto-analysis") 192 print_elapsed_time(time() - status.start_time) 193 194 # Finish and unlock execution. 195 status.auto_analysis['stereochem'].fin = True 196 status.current_analysis = None 197 status.exec_lock.release()
198 199
200 - def run(self):
201 """Execute the given stage of the analysis.""" 202 203 # Store the original STDOUT. 204 self.stdout_orig = sys.stdout 205 206 # Sampling of snapshots. 207 if self.stage == 1: 208 self.sample() 209 210 # NOE violation analysis. 211 elif self.stage == 2: 212 self.noe_viol() 213 214 # Ensemble superimposition. 215 elif self.stage == 3: 216 self.superimpose() 217 218 # RDC Q factor analysis. 219 elif self.stage == 4: 220 self.rdc_analysis() 221 222 # Grace plot creation. 223 elif self.stage == 5: 224 self.grace_plots() 225 226 # Final combined Q ordering. 227 elif self.stage == 6: 228 self.combined_q() 229 230 # Unknown stage. 231 else: 232 raise RelaxError("The stage number %s is unknown." % self.stage) 233 234 # Restore STDOUT. 235 sys.stdout = self.stdout_orig
236 237
238 - def combined_q(self):
239 """Calculate the combined Q factor. 240 241 The combined Q is defined as:: 242 243 Q_total^2 = Q_NOE^2 + Q_RDC^2, 244 245 and the NOE Q factor as:: 246 247 Q^2 = U / sum(NOE_i^2), 248 249 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 250 """ 251 252 # Checks. 253 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 254 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 255 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 256 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 257 258 # Loop over the configurations. 259 for i in range(len(self.configs)): 260 # Print out. 261 print("Creating the combined Q factor file for configuration '%s'." % self.configs[i]) 262 263 # Open the NOE results file and read the data. 264 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 265 noe_lines = file.readlines() 266 file.close() 267 268 # Open the RDC results file and read the data. 269 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 270 rdc_lines = file.readlines() 271 file.close() 272 273 # The combined Q factor file. 274 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 275 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 276 277 # Loop over the data (skipping the header line). 278 data = [] 279 for j in range(1, len(noe_lines)): 280 # Split the lines. 281 ens = int(noe_lines[j].split()[0]) 282 noe_viol = float(noe_lines[j].split()[1]) 283 q_rdc = float(rdc_lines[j].split()[1]) 284 285 # The NOE Q factor. 286 q_noe = sqrt(noe_viol/self.noe_norm) 287 288 # Combined Q. 289 q = sqrt(q_noe**2 + q_rdc**2) 290 291 # Write out the unsorted list. 292 out.write("%-20i%20.15f\n" % (ens, q)) 293 294 # Store the values. 295 data.append([q, ens]) 296 297 # Sort the combined Q. 298 data.sort() 299 300 # Write the data. 301 for i in range(len(data)): 302 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 303 304 # Close the files. 305 out.close() 306 out_sorted.close()
307 308
309 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
310 """Create the distribution data structure.""" 311 312 # The bin width. 313 bin_width = (upper - lower)/float(inc) 314 315 # Init the dist object. 316 dist = [] 317 for i in range(inc): 318 dist.append([bin_width*i+lower, 0]) 319 320 # Loop over the values. 321 for val in values: 322 # The bin. 323 bin = int((val - lower)/bin_width) 324 325 # Outside of the limits. 326 if bin < 0 or bin >= inc: 327 print("Outside of the limits: '%s'" % val) 328 continue 329 330 # Increment the count. 331 dist[bin][1] = dist[bin][1] + 1 332 333 # Convert the counts to frequencies. 334 total_pr = 0.0 335 for i in range(inc): 336 dist[i][1] = dist[i][1] / float(len(values)) 337 total_pr = total_pr + dist[i][1] 338 339 print("Total Pr: %s" % total_pr) 340 341 # Return the dist. 342 return dist
343 344
345 - def grace_plots(self):
346 """Generate grace plots of the results.""" 347 348 # The number of configs. 349 n = len(self.configs) 350 351 # The colours for the different configs. 352 defaults = [4, 2] # Blue and red. 353 colours = [] 354 for i in range(n): 355 # Default colours. 356 if i < len(defaults): 357 colours.append(defaults[i]) 358 359 # Otherwise black! 360 else: 361 colours.append(0) 362 363 # The ensemble number text. 364 ens_text = '' 365 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 366 num_ens = self.num_ens 367 for i in range(len(dividers)): 368 # The number. 369 num = int(num_ens / dividers[i]) 370 371 # The text. 372 if num: 373 text = repr(num) 374 elif not num and ens_text: 375 text = '000' 376 else: 377 continue 378 379 # Update the text. 380 ens_text = ens_text + text 381 382 # A comma. 383 if i < len(dividers)-1: 384 ens_text = ens_text + ',' 385 386 # Remove the front part of the number. 387 num_ens = num_ens - dividers[i]*num 388 389 # Subtitle for all graphs. 390 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 391 392 # NOE violations. 393 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 394 # Print out. 395 print("Generating NOE violation Grace plots.") 396 397 # Open the output files. 398 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 399 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 400 401 # Loop over the configurations. 402 data = [] 403 dist = [] 404 for i in range(n): 405 # Open the results file and read the data. 406 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 407 lines = file.readlines() 408 file.close() 409 410 # Add a new graph set. 411 data.append([]) 412 413 # Loop over the ensembles and extract the NOE violation. 414 noe_viols = [] 415 for j in range(1, len(lines)): 416 # Extract the violation. 417 viol = float(lines[j].split()[1]) 418 noe_viols.append(viol) 419 420 # Add to the data structure. 421 data[i].append([j, viol]) 422 423 # Calculate the R distribution. 424 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 425 426 # Headers. 427 write_xy_header(format='grace', file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]]) 428 write_xy_header(format='grace', file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) 429 430 # Write the data. 431 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') 432 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') 433 434 # Close the files. 435 grace_curve.close() 436 grace_dist.close() 437 438 # RDC Q factors. 439 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 440 # Print out. 441 print("Generating RDC Q factor Grace plots.") 442 443 # Open the Grace output files. 444 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 445 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 446 447 # Loop over the configurations. 448 data = [] 449 dist = [] 450 for i in range(n): 451 # Open the results file and read the data. 452 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 453 lines = file.readlines() 454 file.close() 455 456 # Add a new graph set. 457 data.append([]) 458 459 # Loop over the Q factors. 460 values = [] 461 for j in range(1, len(lines)): 462 # Extract the violation. 463 value = float(lines[j].split()[1]) 464 values.append(value) 465 466 # Add to the data structure. 467 data[i].append([j, value]) 468 469 # Calculate the R distribution. 470 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 471 472 # Headers. 473 write_xy_header(format='grace', file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]]) 474 write_xy_header(format='grace', file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]]) 475 476 # Write the data. 477 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') 478 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') 479 480 # Close the files. 481 grace_curve.close() 482 grace_dist.close() 483 484 # NOE-RDC correlation plots. 485 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 486 # Print out. 487 print("Generating NOE-RDC correlation Grace plots.") 488 489 # Open the Grace output files. 490 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 491 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 492 493 # Grace data. 494 data = [] 495 data_scaled = [] 496 for i in range(len(self.configs)): 497 # Open the NOE results file and read the data. 498 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 499 noe_lines = file.readlines() 500 file.close() 501 502 # Add a new graph set. 503 data.append([]) 504 data_scaled.append([]) 505 506 # Open the RDC results file and read the data. 507 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 508 rdc_lines = file.readlines() 509 file.close() 510 511 # Loop over the data. 512 for j in range(1, len(noe_lines)): 513 # Split the lines. 514 noe_viol = float(noe_lines[j].split()[1]) 515 q_factor = float(rdc_lines[j].split()[1]) 516 517 # Add the xy pair. 518 data[i].append([noe_viol, q_factor]) 519 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 520 521 # Write the data. 522 write_xy_header(format='grace', file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 523 write_xy_header(format='grace', file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 524 write_xy_data(format='grace', data=[data], file=grace_file, graph_type='xy') 525 write_xy_data(format='grace', data=[data_scaled], file=grace_file_scaled, graph_type='xy')
526 527
528 - def noe_viol(self):
529 """NOE violation calculations.""" 530 531 # Redirect STDOUT to a log file. 532 if self.log: 533 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 534 535 # Create a directory for the save files. 536 dir = self.results_dir + sep + "NOE_results" 537 mkdir_nofail(dir=dir) 538 539 # Loop over the configurations. 540 for config in self.configs: 541 # Print out. 542 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 543 544 # Open the results file. 545 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 546 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 547 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 548 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 549 550 # Create the data pipe. 551 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 552 553 # Read the first structure. 554 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 555 556 # Load all protons as the sequence. 557 self.interpreter.structure.load_spins("@H*", ave_pos=False) 558 559 # Create the pseudo-atoms. 560 for i in range(len(self.pseudo)): 561 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 562 self.interpreter.sequence.display() 563 564 # Read the NOE list. 565 self.interpreter.noe.read_restraints(file=self.noe_file) 566 567 # Set up the N-state model. 568 self.interpreter.n_state_model.select_model(model="fixed") 569 570 # Print out. 571 print("\n"*2 + "# Set up complete #" + "\n"*10) 572 573 # Loop over each ensemble. 574 noe_viol = [] 575 for ens in range(self.num_ens): 576 # Print out the ensemble to both the log and screen. 577 if self.log: 578 sys.stdout.write(config + repr(ens) + "\n") 579 sys.stderr.write(config + repr(ens) + "\n") 580 581 # Delete the old structures and rename the molecule. 582 self.interpreter.structure.delete() 583 584 # Read the ensemble. 585 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 586 587 # Get the atomic positions. 588 self.interpreter.structure.get_pos(ave_pos=False) 589 590 # Calculate the average NOE potential. 591 self.interpreter.minimise.calculate() 592 593 # Sum the violations. 594 cdp.sum_viol = 0.0 595 for i in range(len(cdp.ave_dist)): 596 if cdp.quad_pot[i][2]: 597 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 598 599 # Write out the NOE violation. 600 noe_viol.append([cdp.sum_viol, ens]) 601 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 602 603 # Save the state. 604 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 605 606 # Sort the NOE violations. 607 noe_viol.sort() 608 609 # Write the data. 610 for i in range(len(noe_viol)): 611 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
612 613
614 - def rdc_analysis(self):
615 """Perform the RDC part of the analysis.""" 616 617 # Redirect STDOUT to a log file. 618 if self.log: 619 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 620 621 # The dipolar constant. 622 d = 0.0 623 if self.bond_length != None: 624 d = 3.0 / (2.0*pi) * dipolar_constant(periodic_table.gyromagnetic_ratio('13C'), periodic_table.gyromagnetic_ratio('1H'), self.bond_length) 625 626 # Create a directory for the save files. 627 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 628 mkdir_nofail(dir=dir) 629 630 # Loop over the configurations. 631 for config in self.configs: 632 # Print out. 633 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 634 635 # Open the results files. 636 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 637 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 638 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 639 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 640 641 # Create the data pipe. 642 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 643 644 # Read the first structure. 645 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 646 647 # Load all spins as the sequence. 648 self.interpreter.structure.load_spins(ave_pos=False) 649 650 # Create the pseudo-atoms. 651 for i in range(len(self.pseudo)): 652 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 653 self.interpreter.sequence.display() 654 655 # Read the RDC data. 656 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 657 658 # Define the magnetic dipole-dipole relaxation interaction. 659 if self.bond_length != None: 660 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 661 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 662 else: 663 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 664 665 # Set the nuclear isotope. 666 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 667 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 668 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 669 670 # Set up the model. 671 self.interpreter.n_state_model.select_model(model="fixed") 672 673 # Print out. 674 print("\n"*2 + "# Set up complete #" + "\n"*10) 675 676 # Loop over each ensemble. 677 q_factors = [] 678 for ens in range(self.num_ens): 679 # Print out the ensemble to both the log and screen. 680 if self.log: 681 sys.stdout.write(config + repr(ens) + "\n") 682 sys.stderr.write(config + repr(ens) + "\n") 683 684 # Delete the old structures. 685 self.interpreter.structure.delete() 686 687 # Read the ensemble. 688 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 689 690 # Get the positional information, then load the CH vectors. 691 self.interpreter.structure.get_pos(ave_pos=False) 692 if self.bond_length != None: 693 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 694 else: 695 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 696 self.interpreter.interatom.unit_vectors(ave=False) 697 698 # Minimisation. 699 #minimise.grid_search(inc=4) 700 self.interpreter.minimise.execute("simplex", constraints=False) 701 702 # Store and write out the Q factors. 703 q_factors.append([cdp.q_rdc_norm_squared_sum, ens]) 704 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc_norm_squared_sum, cdp.q_rdc_norm_squared_sum)) 705 706 # Calculate the alignment tensor in Hz, and store it for reference. 707 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 708 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 709 710 # Save the state. 711 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 712 713 # Sort the NOE violations. 714 q_factors.sort() 715 716 # Write the data. 717 for i in range(len(q_factors)): 718 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
719 720
721 - def sample(self):
722 """Generate the ensembles by random sampling of the snapshots.""" 723 724 # Create the directory for the ensembles, if needed. 725 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 726 727 # Loop over the configurations. 728 for conf_index in range(len(self.configs)): 729 # Loop over each ensemble. 730 for ens in range(self.num_ens): 731 # Random sampling. 732 rand = [] 733 for j in range(self.num_models): 734 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 735 736 # Print out. 737 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 738 739 # The file name. 740 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 741 742 # Open the output file. 743 out = open(self.results_dir+sep+file_name, 'w') 744 745 # Header. 746 out.write("REM Structures: " + repr(rand) + "\n") 747 748 # Concatenation the files. 749 for j in range(self.num_models): 750 # The random file. 751 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 752 753 # Append the file. 754 out.write(open(rand_name).read()) 755 756 # Close the file. 757 out.close()
758 759
760 - def superimpose(self):
761 """Superimpose the ensembles using fit to first in Molmol.""" 762 763 # Create the output directory. 764 mkdir_nofail("ensembles_superimposed") 765 766 # Logging turned on. 767 if self.log: 768 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 769 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 770 771 # Loop over S and R. 772 for config in ["R", "S"]: 773 # Loop over each ensemble. 774 for ens in range(self.num_ens): 775 # The file names. 776 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 777 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 778 779 # Print out. 780 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 781 if self.log: 782 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 783 784 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 785 if access(self.results_dir+sep+file_out, F_OK): 786 continue 787 788 # Open the Molmol pipe. 789 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 790 791 # Init all. 792 pipe.stdin.write("InitAll yes\n") 793 794 # Read the PDB. 795 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 796 797 # Fitting to mean. 798 pipe.stdin.write("Fit to_first 'selected'\n") 799 pipe.stdin.write("Fit to_mean 'selected'\n") 800 801 # Write the result. 802 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 803 804 # End Molmol. 805 pipe.stdin.close() 806 807 # Get STDOUT and STDERR. 808 sys.stdout.write(pipe.stdout.read()) 809 if self.log: 810 log.write(pipe.stderr.read()) 811 812 # Close the pipe. 813 pipe.stdout.close() 814 pipe.stderr.close() 815 816 # Open the superimposed file in relax. 817 self.interpreter.reset() 818 self.interpreter.pipe.create('out', 'N-state') 819 self.interpreter.structure.read_pdb(file_out) 820 821 # Fix the retarded MOLMOL proton naming. 822 for model in cdp.structure.structural_data: 823 # Alias. 824 mol = model.mol[0] 825 826 # Loop over all atoms. 827 for i in range(len(mol.atom_name)): 828 # A proton. 829 if search('H', mol.atom_name[i]): 830 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 831 832 # Replace the superimposed file. 833 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
834