Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010-2015 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Determination of relative stereochemistry in organic molecules. 
 24   
 25  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE violations and RDC Q factors. 
 26   
 27  This script is split into multiple stages: 
 28   
 29      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 30   
 31      2.  The NOE violation analysis. 
 32   
 33      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition using the fit to first algorithm.  The superimposed ensembles will be placed into the "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 34   
 35      4.  The RDC Q factor analysis. 
 36   
 37      5.  Generation of Grace graphs. 
 38   
 39      6.  Final ordering of ensembles using the combined RDC and NOE Q factors, whereby the NOE Q factor is defined as:: 
 40   
 41          Q^2 = U / sum(NOE_i^2), 
 42   
 43      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The combined Q is given by:: 
 44   
 45          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 46  """ 
 47   
 48  # Dependencies. 
 49  import dep_check 
 50   
 51  # Python module imports. 
 52  from math import pi, sqrt 
 53  from os import F_OK, access, getcwd, sep 
 54  from random import randint 
 55  from re import search 
 56  if dep_check.subprocess_module: 
 57      from subprocess import PIPE, Popen 
 58  import sys 
 59   
 60  # relax module imports. 
 61  from lib.periodic_table import periodic_table 
 62  from lib.physical_constants import dipolar_constant 
 63  from lib.plotting.api import write_xy_data, write_xy_header 
 64  from prompt.interpreter import Interpreter 
 65  from lib.errors import RelaxError 
 66  from lib.io import mkdir_nofail 
 67  from status import Status; status = Status() 
 68   
 69   
 70   
71 -class Stereochem_analysis:
72 """Class for performing the relative stereochemistry analysis.""" 73
74 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
75 """Set up for the stereochemistry analysis. 76 77 @keyword stage: Stage of analysis (see the module docstring above for the options). 78 @type stage: int 79 @keyword results_dir: The optional directory to place all results files into. 80 @type results_dir: None or str 81 @keyword num_ens: Number of ensembles. 82 @type num_ens: int 83 @keyword num_models: Ensemble size. 84 @type num_models: int 85 @keyword configs: All the configurations. 86 @type configs: list of str 87 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 88 @type snapshot_dir: list of str 89 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 90 @type snapshot_min: list of int 91 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 92 @type snapshot_max: list of int 93 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 94 @type pseudo: list of list of str and list of str 95 @keyword noe_file: The name of the NOE restraint file. 96 @type noe_file: str 97 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 98 @type noe_norm: float 99 @keyword rdc_name: The label for this RDC data set. 100 @type rdc_name: str 101 @keyword rdc_file: The name of the RDC file. 102 @type rdc_file: str 103 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 104 @type rdc_spin_id1_col: None or int 105 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 106 @type rdc_spin_id2_col: None or int 107 @keyword rdc_data_col: The data column of the RDC file. 108 @type rdc_data_col: int 109 @keyword rdc_error_col: The error column of the RDC file. 110 @type rdc_error_col: int 111 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. 112 @type bond_length: float or None 113 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. 114 @type bond_length_file: float or None 115 @keyword log: Log file output flag (only for certain stages). 116 @type log: bool 117 @keyword bucket_num: Number of buckets for the distribution plots. 118 @type bucket_num: int 119 @keyword lower_lim_noe: Distribution plot limits. 120 @type lower_lim_noe: int 121 @keyword upper_lim_noe: Distribution plot limits. 122 @type upper_lim_noe: int 123 @keyword lower_lim_rdc: Distribution plot limits. 124 @type lower_lim_rdc: int 125 @keyword upper_lim_rdc: Distribution plot limits. 126 @type upper_lim_rdc: int 127 """ 128 129 # Execution lock. 130 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 131 132 # Set up the analysis status object. 133 status.init_auto_analysis('stereochem', type='stereochem') 134 status.current_analysis = 'auto stereochem analysis' 135 136 # Store all the args. 137 self.stage = stage 138 self.results_dir = results_dir 139 self.num_ens = num_ens 140 self.num_models = num_models 141 self.configs = configs 142 self.snapshot_dir = snapshot_dir 143 self.snapshot_min = snapshot_min 144 self.snapshot_max = snapshot_max 145 self.pseudo = pseudo 146 self.noe_file = noe_file 147 self.noe_norm = noe_norm 148 self.rdc_name = rdc_name 149 self.rdc_file = rdc_file 150 self.rdc_spin_id1_col = rdc_spin_id1_col 151 self.rdc_spin_id2_col = rdc_spin_id2_col 152 self.rdc_data_col = rdc_data_col 153 self.rdc_error_col = rdc_error_col 154 self.bond_length = bond_length 155 self.bond_length_file = bond_length_file 156 self.log = log 157 self.bucket_num = bucket_num 158 self.lower_lim_noe = lower_lim_noe 159 self.upper_lim_noe = upper_lim_noe 160 self.lower_lim_rdc = lower_lim_rdc 161 self.upper_lim_rdc = upper_lim_rdc 162 163 # Load the interpreter. 164 self.interpreter = Interpreter(show_script=False, raise_relax_error=True) 165 self.interpreter.populate_self() 166 self.interpreter.on(verbose=False) 167 168 # Create the results directory. 169 if self.results_dir: 170 mkdir_nofail(self.results_dir) 171 172 # Or use the current working directory. 173 else: 174 self.results_dir = getcwd() 175 176 # Create a directory for log files. 177 if self.log: 178 mkdir_nofail(self.results_dir + sep + "logs") 179 180 # Finish and unlock execution. 181 status.auto_analysis['stereochem'].fin = True 182 status.current_analysis = None 183 status.exec_lock.release()
184 185
186 - def run(self):
187 """Execute the given stage of the analysis.""" 188 189 # Store the original STDOUT. 190 self.stdout_orig = sys.stdout 191 192 # Sampling of snapshots. 193 if self.stage == 1: 194 self.sample() 195 196 # NOE violation analysis. 197 elif self.stage == 2: 198 self.noe_viol() 199 200 # Ensemble superimposition. 201 elif self.stage == 3: 202 self.superimpose() 203 204 # RDC Q factor analysis. 205 elif self.stage == 4: 206 self.rdc_analysis() 207 208 # Grace plot creation. 209 elif self.stage == 5: 210 self.grace_plots() 211 212 # Final combined Q ordering. 213 elif self.stage == 6: 214 self.combined_q() 215 216 # Unknown stage. 217 else: 218 raise RelaxError("The stage number %s is unknown." % self.stage) 219 220 # Restore STDOUT. 221 sys.stdout = self.stdout_orig
222 223
224 - def combined_q(self):
225 """Calculate the combined Q factor. 226 227 The combined Q is defined as:: 228 229 Q_total^2 = Q_NOE^2 + Q_RDC^2, 230 231 and the NOE Q factor as:: 232 233 Q^2 = U / sum(NOE_i^2), 234 235 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 236 """ 237 238 # Checks. 239 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 240 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 241 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 242 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 243 244 # Loop over the configurations. 245 for i in range(len(self.configs)): 246 # Print out. 247 print("Creating the combined Q factor file for configuration '%s'." % self.configs[i]) 248 249 # Open the NOE results file and read the data. 250 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 251 noe_lines = file.readlines() 252 file.close() 253 254 # Open the RDC results file and read the data. 255 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 256 rdc_lines = file.readlines() 257 file.close() 258 259 # The combined Q factor file. 260 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 261 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 262 263 # Loop over the data (skipping the header line). 264 data = [] 265 for j in range(1, len(noe_lines)): 266 # Split the lines. 267 ens = int(noe_lines[j].split()[0]) 268 noe_viol = float(noe_lines[j].split()[1]) 269 q_rdc = float(rdc_lines[j].split()[1]) 270 271 # The NOE Q factor. 272 q_noe = sqrt(noe_viol/self.noe_norm) 273 274 # Combined Q. 275 q = sqrt(q_noe**2 + q_rdc**2) 276 277 # Write out the unsorted list. 278 out.write("%-20i%20.15f\n" % (ens, q)) 279 280 # Store the values. 281 data.append([q, ens]) 282 283 # Sort the combined Q. 284 data.sort() 285 286 # Write the data. 287 for i in range(len(data)): 288 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 289 290 # Close the files. 291 out.close() 292 out_sorted.close()
293 294
295 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
296 """Create the distribution data structure.""" 297 298 # The bin width. 299 bin_width = (upper - lower)/float(inc) 300 301 # Init the dist object. 302 dist = [] 303 for i in range(inc): 304 dist.append([bin_width*i+lower, 0]) 305 306 # Loop over the values. 307 for val in values: 308 # The bin. 309 bin = int((val - lower)/bin_width) 310 311 # Outside of the limits. 312 if bin < 0 or bin >= inc: 313 print("Outside of the limits: '%s'" % val) 314 continue 315 316 # Increment the count. 317 dist[bin][1] = dist[bin][1] + 1 318 319 # Convert the counts to frequencies. 320 total_pr = 0.0 321 for i in range(inc): 322 dist[i][1] = dist[i][1] / float(len(values)) 323 total_pr = total_pr + dist[i][1] 324 325 print("Total Pr: %s" % total_pr) 326 327 # Return the dist. 328 return dist
329 330
331 - def grace_plots(self):
332 """Generate grace plots of the results.""" 333 334 # The number of configs. 335 n = len(self.configs) 336 337 # The colours for the different configs. 338 defaults = [4, 2] # Blue and red. 339 colours = [] 340 for i in range(n): 341 # Default colours. 342 if i < len(defaults): 343 colours.append(defaults[i]) 344 345 # Otherwise black! 346 else: 347 colours.append(0) 348 349 # The ensemble number text. 350 ens_text = '' 351 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 352 num_ens = self.num_ens 353 for i in range(len(dividers)): 354 # The number. 355 num = int(num_ens / dividers[i]) 356 357 # The text. 358 if num: 359 text = repr(num) 360 elif not num and ens_text: 361 text = '000' 362 else: 363 continue 364 365 # Update the text. 366 ens_text = ens_text + text 367 368 # A comma. 369 if i < len(dividers)-1: 370 ens_text = ens_text + ',' 371 372 # Remove the front part of the number. 373 num_ens = num_ens - dividers[i]*num 374 375 # Subtitle for all graphs. 376 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 377 378 # NOE violations. 379 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 380 # Print out. 381 print("Generating NOE violation Grace plots.") 382 383 # Open the output files. 384 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 385 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 386 387 # Loop over the configurations. 388 data = [] 389 dist = [] 390 for i in range(n): 391 # Open the results file and read the data. 392 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 393 lines = file.readlines() 394 file.close() 395 396 # Add a new graph set. 397 data.append([]) 398 399 # Loop over the ensembles and extract the NOE violation. 400 noe_viols = [] 401 for j in range(1, len(lines)): 402 # Extract the violation. 403 viol = float(lines[j].split()[1]) 404 noe_viols.append(viol) 405 406 # Add to the data structure. 407 data[i].append([j, viol]) 408 409 # Calculate the R distribution. 410 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 411 412 # Headers. 413 write_xy_header(format='grace', file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]]) 414 write_xy_header(format='grace', file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) 415 416 # Write the data. 417 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') 418 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') 419 420 # Close the files. 421 grace_curve.close() 422 grace_dist.close() 423 424 # RDC Q factors. 425 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 426 # Print out. 427 print("Generating RDC Q factor Grace plots.") 428 429 # Open the Grace output files. 430 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 431 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 432 433 # Loop over the configurations. 434 data = [] 435 dist = [] 436 for i in range(n): 437 # Open the results file and read the data. 438 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 439 lines = file.readlines() 440 file.close() 441 442 # Add a new graph set. 443 data.append([]) 444 445 # Loop over the Q factors. 446 values = [] 447 for j in range(1, len(lines)): 448 # Extract the violation. 449 value = float(lines[j].split()[1]) 450 values.append(value) 451 452 # Add to the data structure. 453 data[i].append([j, value]) 454 455 # Calculate the R distribution. 456 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 457 458 # Headers. 459 write_xy_header(format='grace', file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]]) 460 write_xy_header(format='grace', file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]]) 461 462 # Write the data. 463 write_xy_data(format='grace', data=[data], file=grace_curve, graph_type='xy') 464 write_xy_data(format='grace', data=[dist], file=grace_dist, graph_type='xy') 465 466 # Close the files. 467 grace_curve.close() 468 grace_dist.close() 469 470 # NOE-RDC correlation plots. 471 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 472 # Print out. 473 print("Generating NOE-RDC correlation Grace plots.") 474 475 # Open the Grace output files. 476 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 477 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 478 479 # Grace data. 480 data = [] 481 data_scaled = [] 482 for i in range(len(self.configs)): 483 # Open the NOE results file and read the data. 484 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 485 noe_lines = file.readlines() 486 file.close() 487 488 # Add a new graph set. 489 data.append([]) 490 data_scaled.append([]) 491 492 # Open the RDC results file and read the data. 493 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 494 rdc_lines = file.readlines() 495 file.close() 496 497 # Loop over the data. 498 for j in range(1, len(noe_lines)): 499 # Split the lines. 500 noe_viol = float(noe_lines[j].split()[1]) 501 q_factor = float(rdc_lines[j].split()[1]) 502 503 # Add the xy pair. 504 data[i].append([noe_viol, q_factor]) 505 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 506 507 # Write the data. 508 write_xy_header(format='grace', file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 509 write_xy_header(format='grace', file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 510 write_xy_data(format='grace', data=[data], file=grace_file, graph_type='xy') 511 write_xy_data(format='grace', data=[data_scaled], file=grace_file_scaled, graph_type='xy')
512 513
514 - def noe_viol(self):
515 """NOE violation calculations.""" 516 517 # Redirect STDOUT to a log file. 518 if self.log: 519 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 520 521 # Create a directory for the save files. 522 dir = self.results_dir + sep + "NOE_results" 523 mkdir_nofail(dir=dir) 524 525 # Loop over the configurations. 526 for config in self.configs: 527 # Print out. 528 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 529 530 # Open the results file. 531 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 532 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 533 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 534 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 535 536 # Create the data pipe. 537 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 538 539 # Read the first structure. 540 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 541 542 # Load all protons as the sequence. 543 self.interpreter.structure.load_spins("@H*", ave_pos=False) 544 545 # Create the pseudo-atoms. 546 for i in range(len(self.pseudo)): 547 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 548 self.interpreter.sequence.display() 549 550 # Read the NOE list. 551 self.interpreter.noe.read_restraints(file=self.noe_file) 552 553 # Set up the N-state model. 554 self.interpreter.n_state_model.select_model(model="fixed") 555 556 # Print out. 557 print("\n"*2 + "# Set up complete #" + "\n"*10) 558 559 # Loop over each ensemble. 560 noe_viol = [] 561 for ens in range(self.num_ens): 562 # Print out the ensemble to both the log and screen. 563 if self.log: 564 sys.stdout.write(config + repr(ens) + "\n") 565 sys.stderr.write(config + repr(ens) + "\n") 566 567 # Delete the old structures and rename the molecule. 568 self.interpreter.structure.delete() 569 570 # Read the ensemble. 571 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 572 573 # Get the atomic positions. 574 self.interpreter.structure.get_pos(ave_pos=False) 575 576 # Calculate the average NOE potential. 577 self.interpreter.minimise.calculate() 578 579 # Sum the violations. 580 cdp.sum_viol = 0.0 581 for i in range(len(cdp.ave_dist)): 582 if cdp.quad_pot[i][2]: 583 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 584 585 # Write out the NOE violation. 586 noe_viol.append([cdp.sum_viol, ens]) 587 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 588 589 # Save the state. 590 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 591 592 # Sort the NOE violations. 593 noe_viol.sort() 594 595 # Write the data. 596 for i in range(len(noe_viol)): 597 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
598 599
600 - def rdc_analysis(self):
601 """Perform the RDC part of the analysis.""" 602 603 # Redirect STDOUT to a log file. 604 if self.log: 605 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 606 607 # The dipolar constant. 608 d = 0.0 609 if self.bond_length != None: 610 d = 3.0 / (2.0*pi) * dipolar_constant(periodic_table.gyromagnetic_ratio('13C'), periodic_table.gyromagnetic_ratio('1H'), self.bond_length) 611 612 # Create a directory for the save files. 613 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 614 mkdir_nofail(dir=dir) 615 616 # Loop over the configurations. 617 for config in self.configs: 618 # Print out. 619 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 620 621 # Open the results files. 622 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 623 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 624 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 625 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 626 627 # Create the data pipe. 628 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 629 630 # Read the first structure. 631 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 632 633 # Load all spins as the sequence. 634 self.interpreter.structure.load_spins(ave_pos=False) 635 636 # Create the pseudo-atoms. 637 for i in range(len(self.pseudo)): 638 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 639 self.interpreter.sequence.display() 640 641 # Read the RDC data. 642 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 643 644 # Define the magnetic dipole-dipole relaxation interaction. 645 if self.bond_length != None: 646 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 647 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 648 else: 649 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 650 651 # Set the nuclear isotope. 652 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 653 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 654 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 655 656 # Set up the model. 657 self.interpreter.n_state_model.select_model(model="fixed") 658 659 # Print out. 660 print("\n"*2 + "# Set up complete #" + "\n"*10) 661 662 # Loop over each ensemble. 663 q_factors = [] 664 for ens in range(self.num_ens): 665 # Print out the ensemble to both the log and screen. 666 if self.log: 667 sys.stdout.write(config + repr(ens) + "\n") 668 sys.stderr.write(config + repr(ens) + "\n") 669 670 # Delete the old structures. 671 self.interpreter.structure.delete() 672 673 # Read the ensemble. 674 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 675 676 # Get the positional information, then load the CH vectors. 677 self.interpreter.structure.get_pos(ave_pos=False) 678 if self.bond_length != None: 679 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 680 else: 681 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 682 self.interpreter.interatom.unit_vectors(ave=False) 683 684 # Minimisation. 685 #minimise.grid_search(inc=4) 686 self.interpreter.minimise.execute("simplex", constraints=False) 687 688 # Store and write out the Q factors. 689 q_factors.append([cdp.q_rdc_norm_squared_sum, ens]) 690 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc_norm_squared_sum, cdp.q_rdc_norm_squared_sum)) 691 692 # Calculate the alignment tensor in Hz, and store it for reference. 693 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 694 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 695 696 # Save the state. 697 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 698 699 # Sort the NOE violations. 700 q_factors.sort() 701 702 # Write the data. 703 for i in range(len(q_factors)): 704 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
705 706
707 - def sample(self):
708 """Generate the ensembles by random sampling of the snapshots.""" 709 710 # Create the directory for the ensembles, if needed. 711 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 712 713 # Loop over the configurations. 714 for conf_index in range(len(self.configs)): 715 # Loop over each ensemble. 716 for ens in range(self.num_ens): 717 # Random sampling. 718 rand = [] 719 for j in range(self.num_models): 720 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 721 722 # Print out. 723 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 724 725 # The file name. 726 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 727 728 # Open the output file. 729 out = open(self.results_dir+sep+file_name, 'w') 730 731 # Header. 732 out.write("REM Structures: " + repr(rand) + "\n") 733 734 # Concatenation the files. 735 for j in range(self.num_models): 736 # The random file. 737 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 738 739 # Append the file. 740 out.write(open(rand_name).read()) 741 742 # Close the file. 743 out.close()
744 745
746 - def superimpose(self):
747 """Superimpose the ensembles using fit to first in Molmol.""" 748 749 # Create the output directory. 750 mkdir_nofail("ensembles_superimposed") 751 752 # Logging turned on. 753 if self.log: 754 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 755 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 756 757 # Loop over S and R. 758 for config in ["R", "S"]: 759 # Loop over each ensemble. 760 for ens in range(self.num_ens): 761 # The file names. 762 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 763 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 764 765 # Print out. 766 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 767 if self.log: 768 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 769 770 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 771 if access(self.results_dir+sep+file_out, F_OK): 772 continue 773 774 # Open the Molmol pipe. 775 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 776 777 # Init all. 778 pipe.stdin.write("InitAll yes\n") 779 780 # Read the PDB. 781 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 782 783 # Fitting to mean. 784 pipe.stdin.write("Fit to_first 'selected'\n") 785 pipe.stdin.write("Fit to_mean 'selected'\n") 786 787 # Write the result. 788 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 789 790 # End Molmol. 791 pipe.stdin.close() 792 793 # Get STDOUT and STDERR. 794 sys.stdout.write(pipe.stdout.read()) 795 if self.log: 796 log.write(pipe.stderr.read()) 797 798 # Close the pipe. 799 pipe.stdout.close() 800 pipe.stderr.close() 801 802 # Open the superimposed file in relax. 803 self.interpreter.reset() 804 self.interpreter.pipe.create('out', 'N-state') 805 self.interpreter.structure.read_pdb(file_out) 806 807 # Fix the retarded MOLMOL proton naming. 808 for model in cdp.structure.structural_data: 809 # Alias. 810 mol = model.mol[0] 811 812 # Loop over all atoms. 813 for i in range(len(mol.atom_name)): 814 # A proton. 815 if search('H', mol.atom_name[i]): 816 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 817 818 # Replace the superimposed file. 819 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
820