Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010-2014 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Determination of relative stereochemistry in organic molecules. 
 24   
 25  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given 
 26  set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE 
 27  violations and RDC Q-factors. 
 28   
 29  This script is split into multiple stages: 
 30   
 31      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 
 32      100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be 
 33      named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. 
 34      "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 35   
 36      2.  The NOE violation analysis. 
 37   
 38      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition 
 39      using the fit to first algorithm.  The superimposed ensembles will be placed into the 
 40      "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 41   
 42      4.  The RDC Q-factor analysis. 
 43   
 44      5.  Generation of Grace graphs. 
 45   
 46      6.  Final ordering of ensembles using the combined RDC and NOE Q-factors, whereby the NOE 
 47      Q-factor is defined as:: 
 48   
 49          Q^2 = U / sum(NOE_i^2), 
 50   
 51      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The 
 52      denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The 
 53      combined Q is given by:: 
 54   
 55          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 56  """ 
 57   
 58  # Dependencies. 
 59  import dep_check 
 60   
 61  # Python module imports. 
 62  from math import pi, sqrt 
 63  from os import F_OK, access, getcwd, sep 
 64  from random import randint 
 65  from re import search 
 66  if dep_check.subprocess_module: 
 67      from subprocess import PIPE, Popen 
 68  import sys 
 69   
 70  # relax module imports. 
 71  from pipe_control.grace import write_xy_data, write_xy_header 
 72  from lib.physical_constants import dipolar_constant, g1H, g13C 
 73  from prompt.interpreter import Interpreter 
 74  from lib.errors import RelaxError 
 75  from lib.io import mkdir_nofail 
 76  from status import Status; status = Status() 
 77   
 78   
 79   
80 -class Stereochem_analysis:
81 """Class for performing the relative stereochemistry analysis.""" 82
83 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
84 """Set up for the stereochemistry analysis. 85 86 @keyword stage: Stage of analysis (see the module docstring above for the options). 87 @type stage: int 88 @keyword results_dir: The optional directory to place all results files into. 89 @type results_dir: None or str 90 @keyword num_ens: Number of ensembles. 91 @type num_ens: int 92 @keyword num_models: Ensemble size. 93 @type num_models: int 94 @keyword configs: All the configurations. 95 @type configs: list of str 96 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 97 @type snapshot_dir: list of str 98 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 99 @type snapshot_min: list of int 100 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 101 @type snapshot_max: list of int 102 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 103 @type pseudo: list of list of str and list of str 104 @keyword noe_file: The name of the NOE restraint file. 105 @type noe_file: str 106 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 107 @type noe_norm: float 108 @keyword rdc_name: The label for this RDC data set. 109 @type rdc_name: str 110 @keyword rdc_file: The name of the RDC file. 111 @type rdc_file: str 112 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 113 @type rdc_spin_id1_col: None or int 114 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 115 @type rdc_spin_id2_col: None or int 116 @keyword rdc_data_col: The data column of the RDC file. 117 @type rdc_data_col: int 118 @keyword rdc_error_col: The error column of the RDC file. 119 @type rdc_error_col: int 120 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. 121 @type bond_length: float or None 122 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. 123 @type bond_length_file: float or None 124 @keyword log: Log file output flag (only for certain stages). 125 @type log: bool 126 @keyword bucket_num: Number of buckets for the distribution plots. 127 @type bucket_num: int 128 @keyword lower_lim_noe: Distribution plot limits. 129 @type lower_lim_noe: int 130 @keyword upper_lim_noe: Distribution plot limits. 131 @type upper_lim_noe: int 132 @keyword lower_lim_rdc: Distribution plot limits. 133 @type lower_lim_rdc: int 134 @keyword upper_lim_rdc: Distribution plot limits. 135 @type upper_lim_rdc: int 136 """ 137 138 # Execution lock. 139 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 140 141 # Set up the analysis status object. 142 status.init_auto_analysis('stereochem', type='stereochem') 143 status.current_analysis = 'auto stereochem analysis' 144 145 # Store all the args. 146 self.stage = stage 147 self.results_dir = results_dir 148 self.num_ens = num_ens 149 self.num_models = num_models 150 self.configs = configs 151 self.snapshot_dir = snapshot_dir 152 self.snapshot_min = snapshot_min 153 self.snapshot_max = snapshot_max 154 self.pseudo = pseudo 155 self.noe_file = noe_file 156 self.noe_norm = noe_norm 157 self.rdc_name = rdc_name 158 self.rdc_file = rdc_file 159 self.rdc_spin_id1_col = rdc_spin_id1_col 160 self.rdc_spin_id2_col = rdc_spin_id2_col 161 self.rdc_data_col = rdc_data_col 162 self.rdc_error_col = rdc_error_col 163 self.bond_length = bond_length 164 self.bond_length_file = bond_length_file 165 self.log = log 166 self.bucket_num = bucket_num 167 self.lower_lim_noe = lower_lim_noe 168 self.upper_lim_noe = upper_lim_noe 169 self.lower_lim_rdc = lower_lim_rdc 170 self.upper_lim_rdc = upper_lim_rdc 171 172 # Load the interpreter. 173 self.interpreter = Interpreter(show_script=False, raise_relax_error=True) 174 self.interpreter.populate_self() 175 self.interpreter.on(verbose=False) 176 177 # Create the results directory. 178 if self.results_dir: 179 mkdir_nofail(self.results_dir) 180 181 # Or use the current working directory. 182 else: 183 self.results_dir = getcwd() 184 185 # Create a directory for log files. 186 if self.log: 187 mkdir_nofail(self.results_dir + sep + "logs") 188 189 # Finish and unlock execution. 190 status.auto_analysis['stereochem'].fin = True 191 status.current_analysis = None 192 status.exec_lock.release()
193 194
195 - def run(self):
196 """Execute the given stage of the analysis.""" 197 198 # Store the original STDOUT. 199 self.stdout_orig = sys.stdout 200 201 # Sampling of snapshots. 202 if self.stage == 1: 203 self.sample() 204 205 # NOE violation analysis. 206 elif self.stage == 2: 207 self.noe_viol() 208 209 # Ensemble superimposition. 210 elif self.stage == 3: 211 self.superimpose() 212 213 # RDC Q-factor analysis. 214 elif self.stage == 4: 215 self.rdc_analysis() 216 217 # Grace plot creation. 218 elif self.stage == 5: 219 self.grace_plots() 220 221 # Final combined Q ordering. 222 elif self.stage == 6: 223 self.combined_q() 224 225 # Unknown stage. 226 else: 227 raise RelaxError("The stage number %s is unknown." % self.stage) 228 229 # Restore STDOUT. 230 sys.stdout = self.stdout_orig
231 232
233 - def combined_q(self):
234 """Calculate the combined Q-factor. 235 236 The combined Q is defined as:: 237 238 Q_total^2 = Q_NOE^2 + Q_RDC^2, 239 240 and the NOE Q-factor as:: 241 242 Q^2 = U / sum(NOE_i^2), 243 244 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 245 """ 246 247 # Checks. 248 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 249 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 250 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 251 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 252 253 # Loop over the configurations. 254 for i in range(len(self.configs)): 255 # Print out. 256 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) 257 258 # Open the NOE results file and read the data. 259 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 260 noe_lines = file.readlines() 261 file.close() 262 263 # Open the RDC results file and read the data. 264 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 265 rdc_lines = file.readlines() 266 file.close() 267 268 # The combined Q-factor file. 269 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 270 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 271 272 # Loop over the data (skipping the header line). 273 data = [] 274 for j in range(1, len(noe_lines)): 275 # Split the lines. 276 ens = int(noe_lines[j].split()[0]) 277 noe_viol = float(noe_lines[j].split()[1]) 278 q_rdc = float(rdc_lines[j].split()[1]) 279 280 # The NOE Q-factor. 281 q_noe = sqrt(noe_viol/self.noe_norm) 282 283 # Combined Q. 284 q = sqrt(q_noe**2 + q_rdc**2) 285 286 # Write out the unsorted list. 287 out.write("%-20i%20.15f\n" % (ens, q)) 288 289 # Store the values. 290 data.append([q, ens]) 291 292 # Sort the combined Q. 293 data.sort() 294 295 # Write the data. 296 for i in range(len(data)): 297 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 298 299 # Close the files. 300 out.close() 301 out_sorted.close()
302 303
304 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
305 """Create the distribution data structure.""" 306 307 # The bin width. 308 bin_width = (upper - lower)/float(inc) 309 310 # Init the dist object. 311 dist = [] 312 for i in range(inc): 313 dist.append([bin_width*i+lower, 0]) 314 315 # Loop over the values. 316 for val in values: 317 # The bin. 318 bin = int((val - lower)/bin_width) 319 320 # Outside of the limits. 321 if bin < 0 or bin >= inc: 322 print("Outside of the limits: '%s'" % val) 323 continue 324 325 # Increment the count. 326 dist[bin][1] = dist[bin][1] + 1 327 328 # Convert the counts to frequencies. 329 total_pr = 0.0 330 for i in range(inc): 331 dist[i][1] = dist[i][1] / float(len(values)) 332 total_pr = total_pr + dist[i][1] 333 334 print("Total Pr: %s" % total_pr) 335 336 # Return the dist. 337 return dist
338 339
340 - def grace_plots(self):
341 """Generate grace plots of the results.""" 342 343 # The number of configs. 344 n = len(self.configs) 345 346 # The colours for the different configs. 347 defaults = [4, 2] # Blue and red. 348 colours = [] 349 for i in range(n): 350 # Default colours. 351 if i < len(defaults): 352 colours.append(defaults[i]) 353 354 # Otherwise black! 355 else: 356 colours.append(0) 357 358 # The ensemble number text. 359 ens_text = '' 360 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 361 num_ens = self.num_ens 362 for i in range(len(dividers)): 363 # The number. 364 num = int(num_ens / dividers[i]) 365 366 # The text. 367 if num: 368 text = repr(num) 369 elif not num and ens_text: 370 text = '000' 371 else: 372 continue 373 374 # Update the text. 375 ens_text = ens_text + text 376 377 # A comma. 378 if i < len(dividers)-1: 379 ens_text = ens_text + ',' 380 381 # Remove the front part of the number. 382 num_ens = num_ens - dividers[i]*num 383 384 # Subtitle for all graphs. 385 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 386 387 # NOE violations. 388 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 389 # Print out. 390 print("Generating NOE violation Grace plots.") 391 392 # Open the output files. 393 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 394 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 395 396 # Loop over the configurations. 397 data = [] 398 dist = [] 399 for i in range(n): 400 # Open the results file and read the data. 401 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 402 lines = file.readlines() 403 file.close() 404 405 # Add a new graph set. 406 data.append([]) 407 408 # Loop over the ensembles and extract the NOE violation. 409 noe_viols = [] 410 for j in range(1, len(lines)): 411 # Extract the violation. 412 viol = float(lines[j].split()[1]) 413 noe_viols.append(viol) 414 415 # Add to the data structure. 416 data[i].append([j, viol]) 417 418 # Calculate the R distribution. 419 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 420 421 # Headers. 422 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]]) 423 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) 424 425 # Write the data. 426 write_xy_data([data], file=grace_curve, graph_type='xy') 427 write_xy_data([dist], file=grace_dist, graph_type='xy') 428 429 # Close the files. 430 grace_curve.close() 431 grace_dist.close() 432 433 # RDC Q-factors. 434 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 435 # Print out. 436 print("Generating RDC Q-factor Grace plots.") 437 438 # Open the Grace output files. 439 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 440 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 441 442 # Loop over the configurations. 443 data = [] 444 dist = [] 445 for i in range(n): 446 # Open the results file and read the data. 447 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 448 lines = file.readlines() 449 file.close() 450 451 # Add a new graph set. 452 data.append([]) 453 454 # Loop over the Q-factors. 455 values = [] 456 for j in range(1, len(lines)): 457 # Extract the violation. 458 value = float(lines[j].split()[1]) 459 values.append(value) 460 461 # Add to the data structure. 462 data[i].append([j, value]) 463 464 # Calculate the R distribution. 465 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 466 467 # Headers. 468 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]]) 469 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]]) 470 471 # Write the data. 472 write_xy_data([data], file=grace_curve, graph_type='xy') 473 write_xy_data([dist], file=grace_dist, graph_type='xy') 474 475 # Close the files. 476 grace_curve.close() 477 grace_dist.close() 478 479 # NOE-RDC correlation plots. 480 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 481 # Print out. 482 print("Generating NOE-RDC correlation Grace plots.") 483 484 # Open the Grace output files. 485 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 486 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 487 488 # Grace data. 489 data = [] 490 data_scaled = [] 491 for i in range(len(self.configs)): 492 # Open the NOE results file and read the data. 493 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 494 noe_lines = file.readlines() 495 file.close() 496 497 # Add a new graph set. 498 data.append([]) 499 data_scaled.append([]) 500 501 # Open the RDC results file and read the data. 502 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 503 rdc_lines = file.readlines() 504 file.close() 505 506 # Loop over the data. 507 for j in range(1, len(noe_lines)): 508 # Split the lines. 509 noe_viol = float(noe_lines[j].split()[1]) 510 q_factor = float(rdc_lines[j].split()[1]) 511 512 # Add the xy pair. 513 data[i].append([noe_viol, q_factor]) 514 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 515 516 # Write the data. 517 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 518 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 519 write_xy_data([data], file=grace_file, graph_type='xy') 520 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
521 522
523 - def noe_viol(self):
524 """NOE violation calculations.""" 525 526 # Redirect STDOUT to a log file. 527 if self.log: 528 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 529 530 # Create a directory for the save files. 531 dir = self.results_dir + sep + "NOE_results" 532 mkdir_nofail(dir=dir) 533 534 # Loop over the configurations. 535 for config in self.configs: 536 # Print out. 537 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 538 539 # Open the results file. 540 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 541 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 542 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 543 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 544 545 # Create the data pipe. 546 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 547 548 # Read the first structure. 549 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 550 551 # Load all protons as the sequence. 552 self.interpreter.structure.load_spins("@H*", ave_pos=False) 553 554 # Create the pseudo-atoms. 555 for i in range(len(self.pseudo)): 556 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 557 self.interpreter.sequence.display() 558 559 # Read the NOE list. 560 self.interpreter.noe.read_restraints(file=self.noe_file) 561 562 # Set up the N-state model. 563 self.interpreter.n_state_model.select_model(model="fixed") 564 565 # Print out. 566 print("\n"*2 + "# Set up complete #" + "\n"*10) 567 568 # Loop over each ensemble. 569 noe_viol = [] 570 for ens in range(self.num_ens): 571 # Print out the ensemble to both the log and screen. 572 if self.log: 573 sys.stdout.write(config + repr(ens) + "\n") 574 sys.stderr.write(config + repr(ens) + "\n") 575 576 # Delete the old structures and rename the molecule. 577 self.interpreter.structure.delete() 578 579 # Read the ensemble. 580 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 581 582 # Get the atomic positions. 583 self.interpreter.structure.get_pos(ave_pos=False) 584 585 # Calculate the average NOE potential. 586 self.interpreter.calc() 587 588 # Sum the violations. 589 cdp.sum_viol = 0.0 590 for i in range(len(cdp.ave_dist)): 591 if cdp.quad_pot[i][2]: 592 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 593 594 # Write out the NOE violation. 595 noe_viol.append([cdp.sum_viol, ens]) 596 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 597 598 # Save the state. 599 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 600 601 # Sort the NOE violations. 602 noe_viol.sort() 603 604 # Write the data. 605 for i in range(len(noe_viol)): 606 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
607 608
609 - def rdc_analysis(self):
610 """Perform the RDC part of the analysis.""" 611 612 # Redirect STDOUT to a log file. 613 if self.log: 614 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 615 616 # The dipolar constant. 617 d = 0.0 618 if self.bond_length != None: 619 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 620 621 # Create a directory for the save files. 622 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 623 mkdir_nofail(dir=dir) 624 625 # Loop over the configurations. 626 for config in self.configs: 627 # Print out. 628 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 629 630 # Open the results files. 631 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 632 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 633 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 634 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 635 636 # Create the data pipe. 637 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 638 639 # Read the first structure. 640 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 641 642 # Load all spins as the sequence. 643 self.interpreter.structure.load_spins(ave_pos=False) 644 645 # Create the pseudo-atoms. 646 for i in range(len(self.pseudo)): 647 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 648 self.interpreter.sequence.display() 649 650 # Read the RDC data. 651 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 652 653 # Define the magnetic dipole-dipole relaxation interaction. 654 if self.bond_length != None: 655 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 656 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 657 else: 658 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 659 660 # Set the nuclear isotope. 661 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 662 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 663 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 664 665 # Set up the model. 666 self.interpreter.n_state_model.select_model(model="fixed") 667 668 # Print out. 669 print("\n"*2 + "# Set up complete #" + "\n"*10) 670 671 # Loop over each ensemble. 672 q_factors = [] 673 for ens in range(self.num_ens): 674 # Print out the ensemble to both the log and screen. 675 if self.log: 676 sys.stdout.write(config + repr(ens) + "\n") 677 sys.stderr.write(config + repr(ens) + "\n") 678 679 # Delete the old structures. 680 self.interpreter.structure.delete() 681 682 # Read the ensemble. 683 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 684 685 # Get the positional information, then load the CH vectors. 686 self.interpreter.structure.get_pos(ave_pos=False) 687 if self.bond_length != None: 688 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 689 else: 690 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 691 self.interpreter.interatom.unit_vectors(ave=False) 692 693 # Minimisation. 694 #grid_search(inc=4) 695 self.interpreter.minimise("simplex", constraints=False) 696 697 # Store and write out the Q-factors. 698 q_factors.append([cdp.q_rdc, ens]) 699 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 700 701 # Calculate the alignment tensor in Hz, and store it for reference. 702 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 703 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 704 705 # Save the state. 706 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 707 708 # Sort the NOE violations. 709 q_factors.sort() 710 711 # Write the data. 712 for i in range(len(q_factors)): 713 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
714 715
716 - def sample(self):
717 """Generate the ensembles by random sampling of the snapshots.""" 718 719 # Create the directory for the ensembles, if needed. 720 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 721 722 # Loop over the configurations. 723 for conf_index in range(len(self.configs)): 724 # Loop over each ensemble. 725 for ens in range(self.num_ens): 726 # Random sampling. 727 rand = [] 728 for j in range(self.num_models): 729 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 730 731 # Print out. 732 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 733 734 # The file name. 735 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 736 737 # Open the output file. 738 out = open(self.results_dir+sep+file_name, 'w') 739 740 # Header. 741 out.write("REM Structures: " + repr(rand) + "\n") 742 743 # Concatenation the files. 744 for j in range(self.num_models): 745 # The random file. 746 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 747 748 # Append the file. 749 out.write(open(rand_name).read()) 750 751 # Close the file. 752 out.close()
753 754
755 - def superimpose(self):
756 """Superimpose the ensembles using fit to first in Molmol.""" 757 758 # Create the output directory. 759 mkdir_nofail("ensembles_superimposed") 760 761 # Logging turned on. 762 if self.log: 763 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 764 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 765 766 # Loop over S and R. 767 for config in ["R", "S"]: 768 # Loop over each ensemble. 769 for ens in range(self.num_ens): 770 # The file names. 771 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 772 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 773 774 # Print out. 775 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 776 if self.log: 777 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 778 779 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 780 if access(self.results_dir+sep+file_out, F_OK): 781 continue 782 783 # Open the Molmol pipe. 784 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 785 786 # Init all. 787 pipe.stdin.write("InitAll yes\n") 788 789 # Read the PDB. 790 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 791 792 # Fitting to mean. 793 pipe.stdin.write("Fit to_first 'selected'\n") 794 pipe.stdin.write("Fit to_mean 'selected'\n") 795 796 # Write the result. 797 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 798 799 # End Molmol. 800 pipe.stdin.close() 801 802 # Get STDOUT and STDERR. 803 sys.stdout.write(pipe.stdout.read()) 804 if self.log: 805 log.write(pipe.stderr.read()) 806 807 # Close the pipe. 808 pipe.stdout.close() 809 pipe.stderr.close() 810 811 # Open the superimposed file in relax. 812 self.interpreter.reset() 813 self.interpreter.pipe.create('out', 'N-state') 814 self.interpreter.structure.read_pdb(file_out) 815 816 # Fix the retarded MOLMOL proton naming. 817 for model in cdp.structure.structural_data: 818 # Alias. 819 mol = model.mol[0] 820 821 # Loop over all atoms. 822 for i in range(len(mol.atom_name)): 823 # A proton. 824 if search('H', mol.atom_name[i]): 825 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 826 827 # Replace the superimposed file. 828 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
829