Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010-2012 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Determination of relative stereochemistry in organic molecules. 
 24   
 25  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given 
 26  set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE 
 27  violations and RDC Q-factors. 
 28   
 29  This script is split into multiple stages: 
 30   
 31      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 
 32      100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be 
 33      named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. 
 34      "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 35   
 36      2.  The NOE violation analysis. 
 37   
 38      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition 
 39      using the fit to first algorithm.  The superimposed ensembles will be placed into the 
 40      "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 41   
 42      4.  The RDC Q-factor analysis. 
 43   
 44      5.  Generation of Grace graphs. 
 45   
 46      6.  Final ordering of ensembles using the combined RDC and NOE Q-factors, whereby the NOE 
 47      Q-factor is defined as:: 
 48   
 49          Q^2 = U / sum(NOE_i^2), 
 50   
 51      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The 
 52      denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The 
 53      combined Q is given by:: 
 54   
 55          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 56  """ 
 57   
 58  # Dependencies. 
 59  import dep_check 
 60   
 61  # Python module imports. 
 62  from math import pi, sqrt 
 63  from os import F_OK, access, getcwd, sep 
 64  from random import randint 
 65  from re import search 
 66  if dep_check.subprocess_module: 
 67      from subprocess import PIPE, Popen 
 68  import sys 
 69   
 70  # relax module imports. 
 71  from generic_fns import pipes 
 72  from generic_fns.grace import write_xy_data, write_xy_header 
 73  from generic_fns.selection import spin_loop 
 74  from physical_constants import dipolar_constant, g1H, g13C 
 75  from prompt.interpreter import Interpreter 
 76  from relax_errors import RelaxError 
 77  from relax_io import mkdir_nofail 
 78  from status import Status; status = Status() 
 79   
 80   
 81   
82 -class Stereochem_analysis:
83 """Class for performing the relative stereochemistry analysis.""" 84
85 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
86 """Set up for the stereochemistry analysis. 87 88 @keyword stage: Stage of analysis (see the module docstring above for the options). 89 @type stage: int 90 @keyword results_dir: The optional directory to place all results files into. 91 @type results_dir: None or str 92 @keyword num_ens: Number of ensembles. 93 @type num_ens: int 94 @keyword num_models: Ensemble size. 95 @type num_models: int 96 @keyword configs: All the configurations. 97 @type configs: list of str 98 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 99 @type snapshot_dir: list of str 100 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 101 @type snapshot_min: list of int 102 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 103 @type snapshot_max: list of int 104 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 105 @type pseudo: list of list of str and list of str 106 @keyword noe_file: The name of the NOE restraint file. 107 @type noe_file: str 108 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 109 @type noe_norm: float 110 @keyword rdc_name: The label for this RDC data set. 111 @type rdc_name: str 112 @keyword rdc_file: The name of the RDC file. 113 @type rdc_file: str 114 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 115 @type rdc_spin_id1_col: None or int 116 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 117 @type rdc_spin_id2_col: None or int 118 @keyword rdc_data_col: The data column of the RDC file. 119 @type rdc_data_col: int 120 @keyword rdc_error_col: The error column of the RDC file. 121 @type rdc_error_col: int 122 @keyword bond_length: The bond length value in meters. 123 @type bond_length: float 124 @keyword log: Log file output flag (only for certain stages). 125 @type log: bool 126 @keyword bucket_num: Number of buckets for the distribution plots. 127 @type bucket_num: int 128 @keyword lower_lim_noe: Distribution plot limits. 129 @type lower_lim_noe: int 130 @keyword upper_lim_noe: Distribution plot limits. 131 @type upper_lim_noe: int 132 @keyword lower_lim_rdc: Distribution plot limits. 133 @type lower_lim_rdc: int 134 @keyword upper_lim_rdc: Distribution plot limits. 135 @type upper_lim_rdc: int 136 """ 137 138 # Execution lock. 139 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 140 141 # Set up the analysis status object. 142 status.init_auto_analysis('stereochem', type='stereochem') 143 status.current_analysis = 'auto stereochem analysis' 144 145 # Store all the args. 146 self.stage = stage 147 self.results_dir = results_dir 148 self.num_ens = num_ens 149 self.num_models = num_models 150 self.configs = configs 151 self.snapshot_dir = snapshot_dir 152 self.snapshot_min = snapshot_min 153 self.snapshot_max = snapshot_max 154 self.pseudo = pseudo 155 self.noe_file = noe_file 156 self.noe_norm = noe_norm 157 self.rdc_name = rdc_name 158 self.rdc_file = rdc_file 159 self.rdc_spin_id1_col = rdc_spin_id1_col 160 self.rdc_spin_id2_col = rdc_spin_id2_col 161 self.rdc_data_col = rdc_data_col 162 self.rdc_error_col = rdc_error_col 163 self.bond_length = bond_length 164 self.log = log 165 self.bucket_num = bucket_num 166 self.lower_lim_noe = lower_lim_noe 167 self.upper_lim_noe = upper_lim_noe 168 self.lower_lim_rdc = lower_lim_rdc 169 self.upper_lim_rdc = upper_lim_rdc 170 171 # Load the interpreter. 172 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) 173 self.interpreter.populate_self() 174 self.interpreter.on(verbose=False) 175 176 # Create the results directory. 177 if self.results_dir: 178 mkdir_nofail(self.results_dir) 179 180 # Or use the current working directory. 181 else: 182 self.results_dir = getcwd() 183 184 # Create a directory for log files. 185 if self.log: 186 mkdir_nofail(self.results_dir + sep + "logs") 187 188 # Finish and unlock execution. 189 status.auto_analysis['stereochem'].fin = True 190 status.current_analysis = None 191 status.exec_lock.release()
192 193
194 - def run(self):
195 """Execute the given stage of the analysis.""" 196 197 # Store the original STDOUT. 198 self.stdout_orig = sys.stdout 199 200 # Sampling of snapshots. 201 if self.stage == 1: 202 self.sample() 203 204 # NOE violation analysis. 205 elif self.stage == 2: 206 self.noe_viol() 207 208 # Ensemble superimposition. 209 elif self.stage == 3: 210 self.superimpose() 211 212 # RDC Q-factor analysis. 213 elif self.stage == 4: 214 self.rdc_analysis() 215 216 # Grace plot creation. 217 elif self.stage == 5: 218 self.grace_plots() 219 220 # Final combined Q ordering. 221 elif self.stage == 6: 222 self.combined_q() 223 224 # Unknown stage. 225 else: 226 raise RelaxError("The stage number %s is unknown." % self.stage) 227 228 # Restore STDOUT. 229 sys.stdout = self.stdout_orig
230 231
232 - def combined_q(self):
233 """Calculate the combined Q-factor. 234 235 The combined Q is defined as:: 236 237 Q_total^2 = Q_NOE^2 + Q_RDC^2, 238 239 and the NOE Q-factor as:: 240 241 Q^2 = U / sum(NOE_i^2), 242 243 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 244 """ 245 246 # Checks. 247 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 248 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 249 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 250 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 251 252 # Loop over the configurations. 253 for i in range(len(self.configs)): 254 # Print out. 255 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) 256 257 # Open the NOE results file and read the data. 258 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 259 noe_lines = file.readlines() 260 file.close() 261 262 # Open the RDC results file and read the data. 263 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 264 rdc_lines = file.readlines() 265 file.close() 266 267 # The combined Q-factor file. 268 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 269 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 270 271 # Loop over the data (skipping the header line). 272 data = [] 273 for j in range(1, len(noe_lines)): 274 # Split the lines. 275 ens = int(noe_lines[j].split()[0]) 276 noe_viol = float(noe_lines[j].split()[1]) 277 q_rdc = float(rdc_lines[j].split()[1]) 278 279 # The NOE Q-factor. 280 q_noe = sqrt(noe_viol/self.noe_norm) 281 282 # Combined Q. 283 q = sqrt(q_noe**2 + q_rdc**2) 284 285 # Write out the unsorted list. 286 out.write("%-20i%20.15f\n" % (ens, q)) 287 288 # Store the values. 289 data.append([q, ens]) 290 291 # Sort the combined Q. 292 data.sort() 293 294 # Write the data. 295 for i in range(len(data)): 296 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 297 298 # Close the files. 299 out.close() 300 out_sorted.close()
301 302
303 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
304 """Create the distribution data structure.""" 305 306 # The bin width. 307 bin_width = (upper - lower)/float(inc) 308 309 # Init the dist object. 310 dist = [] 311 for i in range(inc): 312 dist.append([bin_width*i+lower, 0]) 313 314 # Loop over the values. 315 for val in values: 316 # The bin. 317 bin = int((val - lower)/bin_width) 318 319 # Outside of the limits. 320 if bin < 0 or bin >= inc: 321 print("Outside of the limits: '%s'" % val) 322 continue 323 324 # Increment the count. 325 dist[bin][1] = dist[bin][1] + 1 326 327 # Convert the counts to frequencies. 328 total_pr = 0.0 329 for i in range(inc): 330 dist[i][1] = dist[i][1] / float(len(values)) 331 total_pr = total_pr + dist[i][1] 332 333 print("Total Pr: %s" % total_pr) 334 335 # Return the dist. 336 return dist
337 338
339 - def grace_plots(self):
340 """Generate grace plots of the results.""" 341 342 # The number of configs. 343 n = len(self.configs) 344 345 # The colours for the different configs. 346 defaults = [4, 2] # Blue and red. 347 colours = [] 348 for i in range(n): 349 # Default colours. 350 if i < len(defaults): 351 colours.append(defaults[i]) 352 353 # Otherwise black! 354 else: 355 colours.append(0) 356 357 # The ensemble number text. 358 ens_text = '' 359 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 360 num_ens = self.num_ens 361 for i in range(len(dividers)): 362 # The number. 363 num = int(num_ens / dividers[i]) 364 365 # The text. 366 if num: 367 text = repr(num) 368 elif not num and ens_text: 369 text = '000' 370 else: 371 continue 372 373 # Update the text. 374 ens_text = ens_text + text 375 376 # A comma. 377 if i < len(dividers)-1: 378 ens_text = ens_text + ',' 379 380 # Remove the front part of the number. 381 num_ens = num_ens - dividers[i]*num 382 383 # Subtitle for all graphs. 384 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 385 386 # NOE violations. 387 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 388 # Print out. 389 print("Generating NOE violation Grace plots.") 390 391 # Open the output files. 392 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 393 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 394 395 # Loop over the configurations. 396 data = [] 397 dist = [] 398 for i in range(n): 399 # Open the results file and read the data. 400 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 401 lines = file.readlines() 402 file.close() 403 404 # Add a new graph set. 405 data.append([]) 406 407 # Loop over the ensembles and extract the NOE violation. 408 noe_viols = [] 409 for j in range(1, len(lines)): 410 # Extract the violation. 411 viol = float(lines[j].split()[1]) 412 noe_viols.append(viol) 413 414 # Add to the data structure. 415 data[i].append([j, viol]) 416 417 # Calculate the R distribution. 418 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 419 420 # Headers. 421 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)'], axis_min=[0, 0], axis_max=[self.num_ens, 200], legend_pos=[0.3, 0.8]) 422 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['NOE violation (Angstrom\\S2\\N)', 'Frequency'], axis_min=[0, 0], axis_max=[200, 0.2], legend_pos=[1.1, 0.8]) 423 424 # Write the data. 425 write_xy_data([data], file=grace_curve, graph_type='xy') 426 write_xy_data([dist], file=grace_dist, graph_type='xy') 427 428 # Close the files. 429 grace_curve.close() 430 grace_dist.close() 431 432 # RDC Q-factors. 433 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 434 # Print out. 435 print("Generating RDC Q-factor Grace plots.") 436 437 # Open the Grace output files. 438 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 439 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 440 441 # Loop over the configurations. 442 data = [] 443 dist = [] 444 for i in range(n): 445 # Open the results file and read the data. 446 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 447 lines = file.readlines() 448 file.close() 449 450 # Add a new graph set. 451 data.append([]) 452 453 # Loop over the Q-factors. 454 values = [] 455 for j in range(1, len(lines)): 456 # Extract the violation. 457 value = float(lines[j].split()[1]) 458 values.append(value) 459 460 # Add to the data structure. 461 data[i].append([j, value]) 462 463 # Calculate the R distribution. 464 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 465 466 # Headers. 467 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[self.num_ens, 2], legend_pos=[0.3, 0.8]) 468 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency'], axis_min=[0, 0], axis_max=[2, 0.2], legend_pos=[1.1, 0.8]) 469 470 # Write the data. 471 write_xy_data([data], file=grace_curve, graph_type='xy') 472 write_xy_data([dist], file=grace_dist, graph_type='xy') 473 474 # Close the files. 475 grace_curve.close() 476 grace_dist.close() 477 478 # NOE-RDC correlation plots. 479 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 480 # Print out. 481 print("Generating NOE-RDC correlation Grace plots.") 482 483 # Open the Grace output files. 484 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 485 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 486 487 # Grace data. 488 data = [] 489 data_scaled = [] 490 for i in range(len(self.configs)): 491 # Open the NOE results file and read the data. 492 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 493 noe_lines = file.readlines() 494 file.close() 495 496 # Add a new graph set. 497 data.append([]) 498 data_scaled.append([]) 499 500 # Open the RDC results file and read the data. 501 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 502 rdc_lines = file.readlines() 503 file.close() 504 505 # Loop over the data. 506 for j in range(1, len(noe_lines)): 507 # Split the lines. 508 noe_viol = float(noe_lines[j].split()[1]) 509 q_factor = float(rdc_lines[j].split()[1]) 510 511 # Add the xy pair. 512 data[i].append([noe_viol, q_factor]) 513 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 514 515 # Write the data. 516 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['NOE violation (Angstrom\\S2\\N)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[noe_viols[-1]+10, values[-1]+0.1], legend_pos=[1.1, 0.8]) 517 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[1, 1], legend_pos=[1.1, 0.8]) 518 write_xy_data([data], file=grace_file, graph_type='xy') 519 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
520 521
522 - def noe_viol(self):
523 """NOE violation calculations.""" 524 525 # Redirect STDOUT to a log file. 526 if self.log: 527 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 528 529 # Create a directory for the save files. 530 dir = self.results_dir + sep + "NOE_results" 531 mkdir_nofail(dir=dir) 532 533 # Loop over the configurations. 534 for config in self.configs: 535 # Print out. 536 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 537 538 # Open the results file. 539 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 540 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 541 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 542 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 543 544 # Create the data pipe. 545 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 546 547 # Read the first structure. 548 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 549 550 # Load all protons as the sequence. 551 self.interpreter.structure.load_spins("@H*", ave_pos=False) 552 553 # Create the pseudo-atoms. 554 for i in range(len(self.pseudo)): 555 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 556 self.interpreter.sequence.display() 557 558 # Read the NOE list. 559 self.interpreter.noe.read_restraints(file=self.noe_file) 560 561 # Set up the N-state model. 562 self.interpreter.n_state_model.select_model(model="fixed") 563 564 # Print out. 565 print("\n"*2 + "# Set up complete #" + "\n"*10) 566 567 # Loop over each ensemble. 568 noe_viol = [] 569 for ens in range(self.num_ens): 570 # Print out the ensemble to both the log and screen. 571 if self.log: 572 sys.stdout.write(config + repr(ens) + "\n") 573 sys.stderr.write(config + repr(ens) + "\n") 574 575 # Delete the old structures and rename the molecule. 576 self.interpreter.structure.delete() 577 578 # Read the ensemble. 579 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 580 581 # Get the atomic positions. 582 self.interpreter.structure.get_pos(ave_pos=False) 583 584 # Calculate the average NOE potential. 585 self.interpreter.calc() 586 587 # Sum the violations. 588 cdp.sum_viol = 0.0 589 for i in range(len(cdp.ave_dist)): 590 if cdp.quad_pot[i][2]: 591 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 592 593 # Write out the NOE violation. 594 noe_viol.append([cdp.sum_viol, ens]) 595 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 596 597 # Save the state. 598 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 599 600 # Sort the NOE violations. 601 noe_viol.sort() 602 603 # Write the data. 604 for i in range(len(noe_viol)): 605 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
606 607
608 - def rdc_analysis(self):
609 """Perform the RDC part of the analysis.""" 610 611 # Redirect STDOUT to a log file. 612 if self.log: 613 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 614 615 # The dipolar constant. 616 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 617 618 # Create a directory for the save files. 619 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 620 mkdir_nofail(dir=dir) 621 622 # Loop over the configurations. 623 for config in self.configs: 624 # Print out. 625 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 626 627 # Open the results files. 628 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 629 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 630 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 631 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 632 633 # Create the data pipe. 634 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 635 636 # Read the first structure. 637 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 638 639 # Load all spins as the sequence. 640 self.interpreter.structure.load_spins(ave_pos=False) 641 642 # Create the pseudo-atoms. 643 for i in range(len(self.pseudo)): 644 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 645 self.interpreter.sequence.display() 646 647 # Read the RDC data. 648 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 649 650 # Define the magnetic dipole-dipole relaxation interaction. 651 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 652 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 653 654 # Set the nuclear isotope. 655 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 656 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 657 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 658 659 # Set up the model. 660 self.interpreter.n_state_model.select_model(model="fixed") 661 662 # Print out. 663 print("\n"*2 + "# Set up complete #" + "\n"*10) 664 665 # Loop over each ensemble. 666 q_factors = [] 667 for ens in range(self.num_ens): 668 # Print out the ensemble to both the log and screen. 669 if self.log: 670 sys.stdout.write(config + repr(ens) + "\n") 671 sys.stderr.write(config + repr(ens) + "\n") 672 673 # Delete the old structures. 674 self.interpreter.structure.delete() 675 676 # Read the ensemble. 677 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1)), parser="internal") 678 679 # Get the positional information, then load the CH vectors. 680 self.interpreter.structure.get_pos(ave_pos=False) 681 self.interpreter.dipole_pair.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 682 self.interpreter.dipole_pair.unit_vectors(ave=False) 683 684 # Minimisation. 685 #grid_search(inc=4) 686 self.interpreter.minimise("simplex", constraints=False) 687 688 # Store and write out the Q-factors. 689 q_factors.append([cdp.q_rdc, ens]) 690 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 691 692 # Calculate the alignment tensor in Hz, and store it for reference. 693 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 694 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 695 696 # Save the state. 697 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 698 699 # Sort the NOE violations. 700 q_factors.sort() 701 702 # Write the data. 703 for i in range(len(q_factors)): 704 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
705 706
707 - def sample(self):
708 """Generate the ensembles by random sampling of the snapshots.""" 709 710 # Create the directory for the ensembles, if needed. 711 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 712 713 # Loop over the configurations. 714 for conf_index in range(len(self.configs)): 715 # Loop over each ensemble. 716 for ens in range(self.num_ens): 717 # Random sampling. 718 rand = [] 719 for j in range(self.num_models): 720 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 721 722 # Print out. 723 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 724 725 # The file name. 726 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 727 728 # Open the output file. 729 out = open(self.results_dir+sep+file_name, 'w') 730 731 # Header. 732 out.write("REM Structures: " + repr(rand) + "\n") 733 734 # Concatenation the files. 735 for j in range(self.num_models): 736 # The random file. 737 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 738 739 # Append the file. 740 out.write(open(rand_name).read()) 741 742 # Close the file. 743 out.close()
744 745
746 - def superimpose(self):
747 """Superimpose the ensembles using fit to first in Molmol.""" 748 749 # Create the output directory. 750 mkdir_nofail("ensembles_superimposed") 751 752 # Logging turned on. 753 if self.log: 754 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 755 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 756 757 # Loop over S and R. 758 for config in ["R", "S"]: 759 # Loop over each ensemble. 760 for ens in range(self.num_ens): 761 # The file names. 762 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 763 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 764 765 # Print out. 766 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 767 if self.log: 768 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 769 770 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 771 if access(self.results_dir+sep+file_out, F_OK): 772 continue 773 774 # Open the Molmol pipe. 775 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 776 777 # Init all. 778 pipe.stdin.write("InitAll yes\n") 779 780 # Read the PDB. 781 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 782 783 # Fitting to mean. 784 pipe.stdin.write("Fit to_first 'selected'\n") 785 pipe.stdin.write("Fit to_mean 'selected'\n") 786 787 # Write the result. 788 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 789 790 # End Molmol. 791 pipe.stdin.close() 792 793 # Get STDOUT and STDERR. 794 sys.stdout.write(pipe.stdout.read()) 795 if self.log: 796 log.write(pipe.stderr.read()) 797 798 # Close the pipe. 799 pipe.stdout.close() 800 pipe.stderr.close() 801 802 # Open the superimposed file in relax. 803 self.interpreter.reset() 804 self.interpreter.pipe.create('out', 'N-state') 805 self.interpreter.structure.read_pdb(file_out, parser="internal") 806 807 # Fix the retarded MOLMOL proton naming. 808 for model in cdp.structure.structural_data: 809 # Alias. 810 mol = model.mol[0] 811 812 # Loop over all atoms. 813 for i in range(len(mol.atom_name)): 814 # A proton. 815 if search('H', mol.atom_name[i]): 816 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 817 818 # Replace the superimposed file. 819 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
820