Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010-2014 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Determination of relative stereochemistry in organic molecules. 
 24   
 25  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE violations and RDC Q factors. 
 26   
 27  This script is split into multiple stages: 
 28   
 29      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 30   
 31      2.  The NOE violation analysis. 
 32   
 33      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition using the fit to first algorithm.  The superimposed ensembles will be placed into the "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 34   
 35      4.  The RDC Q factor analysis. 
 36   
 37      5.  Generation of Grace graphs. 
 38   
 39      6.  Final ordering of ensembles using the combined RDC and NOE Q factors, whereby the NOE Q factor is defined as:: 
 40   
 41          Q^2 = U / sum(NOE_i^2), 
 42   
 43      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The combined Q is given by:: 
 44   
 45          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 46  """ 
 47   
 48  # Dependencies. 
 49  import dep_check 
 50   
 51  # Python module imports. 
 52  from math import pi, sqrt 
 53  from os import F_OK, access, getcwd, sep 
 54  from random import randint 
 55  from re import search 
 56  if dep_check.subprocess_module: 
 57      from subprocess import PIPE, Popen 
 58  import sys 
 59   
 60  # relax module imports. 
 61  from pipe_control.grace import write_xy_data, write_xy_header 
 62  from lib.physical_constants import dipolar_constant, g1H, g13C 
 63  from prompt.interpreter import Interpreter 
 64  from lib.errors import RelaxError 
 65  from lib.io import mkdir_nofail 
 66  from status import Status; status = Status() 
 67   
 68   
 69   
70 -class Stereochem_analysis:
71 """Class for performing the relative stereochemistry analysis.""" 72
73 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id1_col=None, rdc_spin_id2_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, bond_length_file=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
74 """Set up for the stereochemistry analysis. 75 76 @keyword stage: Stage of analysis (see the module docstring above for the options). 77 @type stage: int 78 @keyword results_dir: The optional directory to place all results files into. 79 @type results_dir: None or str 80 @keyword num_ens: Number of ensembles. 81 @type num_ens: int 82 @keyword num_models: Ensemble size. 83 @type num_models: int 84 @keyword configs: All the configurations. 85 @type configs: list of str 86 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 87 @type snapshot_dir: list of str 88 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 89 @type snapshot_min: list of int 90 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 91 @type snapshot_max: list of int 92 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 93 @type pseudo: list of list of str and list of str 94 @keyword noe_file: The name of the NOE restraint file. 95 @type noe_file: str 96 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 97 @type noe_norm: float 98 @keyword rdc_name: The label for this RDC data set. 99 @type rdc_name: str 100 @keyword rdc_file: The name of the RDC file. 101 @type rdc_file: str 102 @keyword rdc_spin_id1_col: The spin ID column of the first spin in the RDC file. 103 @type rdc_spin_id1_col: None or int 104 @keyword rdc_spin_id2_col: The spin ID column of the second spin in the RDC file. 105 @type rdc_spin_id2_col: None or int 106 @keyword rdc_data_col: The data column of the RDC file. 107 @type rdc_data_col: int 108 @keyword rdc_error_col: The error column of the RDC file. 109 @type rdc_error_col: int 110 @keyword bond_length: The bond length value in meters. This overrides the bond_length_file argument. 111 @type bond_length: float or None 112 @keyword bond_length_file: The file of bond lengths for each atom pair in meters. The first and second columns must be the spin ID strings and the third column must contain the data. 113 @type bond_length_file: float or None 114 @keyword log: Log file output flag (only for certain stages). 115 @type log: bool 116 @keyword bucket_num: Number of buckets for the distribution plots. 117 @type bucket_num: int 118 @keyword lower_lim_noe: Distribution plot limits. 119 @type lower_lim_noe: int 120 @keyword upper_lim_noe: Distribution plot limits. 121 @type upper_lim_noe: int 122 @keyword lower_lim_rdc: Distribution plot limits. 123 @type lower_lim_rdc: int 124 @keyword upper_lim_rdc: Distribution plot limits. 125 @type upper_lim_rdc: int 126 """ 127 128 # Execution lock. 129 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 130 131 # Set up the analysis status object. 132 status.init_auto_analysis('stereochem', type='stereochem') 133 status.current_analysis = 'auto stereochem analysis' 134 135 # Store all the args. 136 self.stage = stage 137 self.results_dir = results_dir 138 self.num_ens = num_ens 139 self.num_models = num_models 140 self.configs = configs 141 self.snapshot_dir = snapshot_dir 142 self.snapshot_min = snapshot_min 143 self.snapshot_max = snapshot_max 144 self.pseudo = pseudo 145 self.noe_file = noe_file 146 self.noe_norm = noe_norm 147 self.rdc_name = rdc_name 148 self.rdc_file = rdc_file 149 self.rdc_spin_id1_col = rdc_spin_id1_col 150 self.rdc_spin_id2_col = rdc_spin_id2_col 151 self.rdc_data_col = rdc_data_col 152 self.rdc_error_col = rdc_error_col 153 self.bond_length = bond_length 154 self.bond_length_file = bond_length_file 155 self.log = log 156 self.bucket_num = bucket_num 157 self.lower_lim_noe = lower_lim_noe 158 self.upper_lim_noe = upper_lim_noe 159 self.lower_lim_rdc = lower_lim_rdc 160 self.upper_lim_rdc = upper_lim_rdc 161 162 # Load the interpreter. 163 self.interpreter = Interpreter(show_script=False, raise_relax_error=True) 164 self.interpreter.populate_self() 165 self.interpreter.on(verbose=False) 166 167 # Create the results directory. 168 if self.results_dir: 169 mkdir_nofail(self.results_dir) 170 171 # Or use the current working directory. 172 else: 173 self.results_dir = getcwd() 174 175 # Create a directory for log files. 176 if self.log: 177 mkdir_nofail(self.results_dir + sep + "logs") 178 179 # Finish and unlock execution. 180 status.auto_analysis['stereochem'].fin = True 181 status.current_analysis = None 182 status.exec_lock.release()
183 184
185 - def run(self):
186 """Execute the given stage of the analysis.""" 187 188 # Store the original STDOUT. 189 self.stdout_orig = sys.stdout 190 191 # Sampling of snapshots. 192 if self.stage == 1: 193 self.sample() 194 195 # NOE violation analysis. 196 elif self.stage == 2: 197 self.noe_viol() 198 199 # Ensemble superimposition. 200 elif self.stage == 3: 201 self.superimpose() 202 203 # RDC Q factor analysis. 204 elif self.stage == 4: 205 self.rdc_analysis() 206 207 # Grace plot creation. 208 elif self.stage == 5: 209 self.grace_plots() 210 211 # Final combined Q ordering. 212 elif self.stage == 6: 213 self.combined_q() 214 215 # Unknown stage. 216 else: 217 raise RelaxError("The stage number %s is unknown." % self.stage) 218 219 # Restore STDOUT. 220 sys.stdout = self.stdout_orig
221 222
223 - def combined_q(self):
224 """Calculate the combined Q factor. 225 226 The combined Q is defined as:: 227 228 Q_total^2 = Q_NOE^2 + Q_RDC^2, 229 230 and the NOE Q factor as:: 231 232 Q^2 = U / sum(NOE_i^2), 233 234 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 235 """ 236 237 # Checks. 238 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 239 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 240 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 241 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 242 243 # Loop over the configurations. 244 for i in range(len(self.configs)): 245 # Print out. 246 print("Creating the combined Q factor file for configuration '%s'." % self.configs[i]) 247 248 # Open the NOE results file and read the data. 249 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 250 noe_lines = file.readlines() 251 file.close() 252 253 # Open the RDC results file and read the data. 254 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 255 rdc_lines = file.readlines() 256 file.close() 257 258 # The combined Q factor file. 259 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 260 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 261 262 # Loop over the data (skipping the header line). 263 data = [] 264 for j in range(1, len(noe_lines)): 265 # Split the lines. 266 ens = int(noe_lines[j].split()[0]) 267 noe_viol = float(noe_lines[j].split()[1]) 268 q_rdc = float(rdc_lines[j].split()[1]) 269 270 # The NOE Q factor. 271 q_noe = sqrt(noe_viol/self.noe_norm) 272 273 # Combined Q. 274 q = sqrt(q_noe**2 + q_rdc**2) 275 276 # Write out the unsorted list. 277 out.write("%-20i%20.15f\n" % (ens, q)) 278 279 # Store the values. 280 data.append([q, ens]) 281 282 # Sort the combined Q. 283 data.sort() 284 285 # Write the data. 286 for i in range(len(data)): 287 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 288 289 # Close the files. 290 out.close() 291 out_sorted.close()
292 293
294 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
295 """Create the distribution data structure.""" 296 297 # The bin width. 298 bin_width = (upper - lower)/float(inc) 299 300 # Init the dist object. 301 dist = [] 302 for i in range(inc): 303 dist.append([bin_width*i+lower, 0]) 304 305 # Loop over the values. 306 for val in values: 307 # The bin. 308 bin = int((val - lower)/bin_width) 309 310 # Outside of the limits. 311 if bin < 0 or bin >= inc: 312 print("Outside of the limits: '%s'" % val) 313 continue 314 315 # Increment the count. 316 dist[bin][1] = dist[bin][1] + 1 317 318 # Convert the counts to frequencies. 319 total_pr = 0.0 320 for i in range(inc): 321 dist[i][1] = dist[i][1] / float(len(values)) 322 total_pr = total_pr + dist[i][1] 323 324 print("Total Pr: %s" % total_pr) 325 326 # Return the dist. 327 return dist
328 329
330 - def grace_plots(self):
331 """Generate grace plots of the results.""" 332 333 # The number of configs. 334 n = len(self.configs) 335 336 # The colours for the different configs. 337 defaults = [4, 2] # Blue and red. 338 colours = [] 339 for i in range(n): 340 # Default colours. 341 if i < len(defaults): 342 colours.append(defaults[i]) 343 344 # Otherwise black! 345 else: 346 colours.append(0) 347 348 # The ensemble number text. 349 ens_text = '' 350 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 351 num_ens = self.num_ens 352 for i in range(len(dividers)): 353 # The number. 354 num = int(num_ens / dividers[i]) 355 356 # The text. 357 if num: 358 text = repr(num) 359 elif not num and ens_text: 360 text = '000' 361 else: 362 continue 363 364 # Update the text. 365 ens_text = ens_text + text 366 367 # A comma. 368 if i < len(dividers)-1: 369 ens_text = ens_text + ',' 370 371 # Remove the front part of the number. 372 num_ens = num_ens - dividers[i]*num 373 374 # Subtitle for all graphs. 375 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 376 377 # NOE violations. 378 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 379 # Print out. 380 print("Generating NOE violation Grace plots.") 381 382 # Open the output files. 383 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 384 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 385 386 # Loop over the configurations. 387 data = [] 388 dist = [] 389 for i in range(n): 390 # Open the results file and read the data. 391 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 392 lines = file.readlines() 393 file.close() 394 395 # Add a new graph set. 396 data.append([]) 397 398 # Loop over the ensembles and extract the NOE violation. 399 noe_viols = [] 400 for j in range(1, len(lines)): 401 # Extract the violation. 402 viol = float(lines[j].split()[1]) 403 noe_viols.append(viol) 404 405 # Add to the data structure. 406 data[i].append([j, viol]) 407 408 # Calculate the R distribution. 409 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 410 411 # Headers. 412 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', 'NOE violation (Angstrom\\S2\\N)']], legend_pos=[[0.3, 0.8]]) 413 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', 'Frequency']], legend_pos=[[1.1, 0.8]]) 414 415 # Write the data. 416 write_xy_data([data], file=grace_curve, graph_type='xy') 417 write_xy_data([dist], file=grace_dist, graph_type='xy') 418 419 # Close the files. 420 grace_curve.close() 421 grace_dist.close() 422 423 # RDC Q factors. 424 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 425 # Print out. 426 print("Generating RDC Q factor Grace plots.") 427 428 # Open the Grace output files. 429 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 430 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 431 432 # Loop over the configurations. 433 data = [] 434 dist = [] 435 for i in range(n): 436 # Open the results file and read the data. 437 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 438 lines = file.readlines() 439 file.close() 440 441 # Add a new graph set. 442 data.append([]) 443 444 # Loop over the Q factors. 445 values = [] 446 for j in range(1, len(lines)): 447 # Extract the violation. 448 value = float(lines[j].split()[1]) 449 values.append(value) 450 451 # Add to the data structure. 452 data[i].append([j, value]) 453 454 # Calculate the R distribution. 455 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 456 457 # Headers. 458 write_xy_header(file=grace_curve, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[0]*n], axis_labels=[['Ensemble (sorted)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[0.3, 0.8]]) 459 write_xy_header(file=grace_dist, title='%s RDC Q factor comparison' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[1]*n], symbol_sizes=[[0.5]*n], linestyle=[[3]*n], axis_labels=[['%s RDC Q factor (pales format)' % self.rdc_name, 'Frequency']], legend_pos=[[1.1, 0.8]]) 460 461 # Write the data. 462 write_xy_data([data], file=grace_curve, graph_type='xy') 463 write_xy_data([dist], file=grace_dist, graph_type='xy') 464 465 # Close the files. 466 grace_curve.close() 467 grace_dist.close() 468 469 # NOE-RDC correlation plots. 470 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 471 # Print out. 472 print("Generating NOE-RDC correlation Grace plots.") 473 474 # Open the Grace output files. 475 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 476 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 477 478 # Grace data. 479 data = [] 480 data_scaled = [] 481 for i in range(len(self.configs)): 482 # Open the NOE results file and read the data. 483 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 484 noe_lines = file.readlines() 485 file.close() 486 487 # Add a new graph set. 488 data.append([]) 489 data_scaled.append([]) 490 491 # Open the RDC results file and read the data. 492 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 493 rdc_lines = file.readlines() 494 file.close() 495 496 # Loop over the data. 497 for j in range(1, len(noe_lines)): 498 # Split the lines. 499 noe_viol = float(noe_lines[j].split()[1]) 500 q_factor = float(rdc_lines[j].split()[1]) 501 502 # Add the xy pair. 503 data[i].append([noe_viol, q_factor]) 504 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 505 506 # Write the data. 507 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['NOE violation (Angstrom\\S2\\N)', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 508 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q factor' % self.rdc_name, subtitle=subtitle, sets=[n], set_names=[self.configs], set_colours=[colours], symbols=[[9]*n], symbol_sizes=[[0.24]*n], linetype=[[0]*n], axis_labels=[['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q factor (pales format)' % self.rdc_name]], legend_pos=[[1.1, 0.8]]) 509 write_xy_data([data], file=grace_file, graph_type='xy') 510 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
511 512
513 - def noe_viol(self):
514 """NOE violation calculations.""" 515 516 # Redirect STDOUT to a log file. 517 if self.log: 518 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 519 520 # Create a directory for the save files. 521 dir = self.results_dir + sep + "NOE_results" 522 mkdir_nofail(dir=dir) 523 524 # Loop over the configurations. 525 for config in self.configs: 526 # Print out. 527 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 528 529 # Open the results file. 530 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 531 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 532 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 533 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 534 535 # Create the data pipe. 536 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 537 538 # Read the first structure. 539 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 540 541 # Load all protons as the sequence. 542 self.interpreter.structure.load_spins("@H*", ave_pos=False) 543 544 # Create the pseudo-atoms. 545 for i in range(len(self.pseudo)): 546 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 547 self.interpreter.sequence.display() 548 549 # Read the NOE list. 550 self.interpreter.noe.read_restraints(file=self.noe_file) 551 552 # Set up the N-state model. 553 self.interpreter.n_state_model.select_model(model="fixed") 554 555 # Print out. 556 print("\n"*2 + "# Set up complete #" + "\n"*10) 557 558 # Loop over each ensemble. 559 noe_viol = [] 560 for ens in range(self.num_ens): 561 # Print out the ensemble to both the log and screen. 562 if self.log: 563 sys.stdout.write(config + repr(ens) + "\n") 564 sys.stderr.write(config + repr(ens) + "\n") 565 566 # Delete the old structures and rename the molecule. 567 self.interpreter.structure.delete() 568 569 # Read the ensemble. 570 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 571 572 # Get the atomic positions. 573 self.interpreter.structure.get_pos(ave_pos=False) 574 575 # Calculate the average NOE potential. 576 self.interpreter.calc() 577 578 # Sum the violations. 579 cdp.sum_viol = 0.0 580 for i in range(len(cdp.ave_dist)): 581 if cdp.quad_pot[i][2]: 582 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 583 584 # Write out the NOE violation. 585 noe_viol.append([cdp.sum_viol, ens]) 586 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 587 588 # Save the state. 589 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 590 591 # Sort the NOE violations. 592 noe_viol.sort() 593 594 # Write the data. 595 for i in range(len(noe_viol)): 596 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
597 598
599 - def rdc_analysis(self):
600 """Perform the RDC part of the analysis.""" 601 602 # Redirect STDOUT to a log file. 603 if self.log: 604 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 605 606 # The dipolar constant. 607 d = 0.0 608 if self.bond_length != None: 609 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 610 611 # Create a directory for the save files. 612 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 613 mkdir_nofail(dir=dir) 614 615 # Loop over the configurations. 616 for config in self.configs: 617 # Print out. 618 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 619 620 # Open the results files. 621 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 622 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 623 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 624 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 625 626 # Create the data pipe. 627 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 628 629 # Read the first structure. 630 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 631 632 # Load all spins as the sequence. 633 self.interpreter.structure.load_spins(ave_pos=False) 634 635 # Create the pseudo-atoms. 636 for i in range(len(self.pseudo)): 637 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 638 self.interpreter.sequence.display() 639 640 # Read the RDC data. 641 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id1_col=self.rdc_spin_id1_col, spin_id2_col=self.rdc_spin_id2_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 642 643 # Define the magnetic dipole-dipole relaxation interaction. 644 if self.bond_length != None: 645 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 646 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@Q*', ave_dist=self.bond_length) 647 else: 648 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 649 650 # Set the nuclear isotope. 651 self.interpreter.spin.isotope(isotope='13C', spin_id='@C*') 652 self.interpreter.spin.isotope(isotope='1H', spin_id='@H*') 653 self.interpreter.spin.isotope(isotope='1H', spin_id='@Q*') 654 655 # Set up the model. 656 self.interpreter.n_state_model.select_model(model="fixed") 657 658 # Print out. 659 print("\n"*2 + "# Set up complete #" + "\n"*10) 660 661 # Loop over each ensemble. 662 q_factors = [] 663 for ens in range(self.num_ens): 664 # Print out the ensemble to both the log and screen. 665 if self.log: 666 sys.stdout.write(config + repr(ens) + "\n") 667 sys.stderr.write(config + repr(ens) + "\n") 668 669 # Delete the old structures. 670 self.interpreter.structure.delete() 671 672 # Read the ensemble. 673 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=list(range(1, self.num_models+1))) 674 675 # Get the positional information, then load the CH vectors. 676 self.interpreter.structure.get_pos(ave_pos=False) 677 if self.bond_length != None: 678 self.interpreter.interatom.set_dist(spin_id1='@C*', spin_id2='@H*', ave_dist=self.bond_length) 679 else: 680 self.interpreter.interatom.read_dist(file=self.bond_length_file, spin_id1_col=1, spin_id2_col=2, data_col=3) 681 self.interpreter.interatom.unit_vectors(ave=False) 682 683 # Minimisation. 684 #grid_search(inc=4) 685 self.interpreter.minimise("simplex", constraints=False) 686 687 # Store and write out the Q factors. 688 q_factors.append([cdp.q_rdc, ens]) 689 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 690 691 # Calculate the alignment tensor in Hz, and store it for reference. 692 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 693 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 694 695 # Save the state. 696 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 697 698 # Sort the NOE violations. 699 q_factors.sort() 700 701 # Write the data. 702 for i in range(len(q_factors)): 703 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
704 705
706 - def sample(self):
707 """Generate the ensembles by random sampling of the snapshots.""" 708 709 # Create the directory for the ensembles, if needed. 710 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 711 712 # Loop over the configurations. 713 for conf_index in range(len(self.configs)): 714 # Loop over each ensemble. 715 for ens in range(self.num_ens): 716 # Random sampling. 717 rand = [] 718 for j in range(self.num_models): 719 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 720 721 # Print out. 722 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 723 724 # The file name. 725 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 726 727 # Open the output file. 728 out = open(self.results_dir+sep+file_name, 'w') 729 730 # Header. 731 out.write("REM Structures: " + repr(rand) + "\n") 732 733 # Concatenation the files. 734 for j in range(self.num_models): 735 # The random file. 736 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 737 738 # Append the file. 739 out.write(open(rand_name).read()) 740 741 # Close the file. 742 out.close()
743 744
745 - def superimpose(self):
746 """Superimpose the ensembles using fit to first in Molmol.""" 747 748 # Create the output directory. 749 mkdir_nofail("ensembles_superimposed") 750 751 # Logging turned on. 752 if self.log: 753 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 754 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 755 756 # Loop over S and R. 757 for config in ["R", "S"]: 758 # Loop over each ensemble. 759 for ens in range(self.num_ens): 760 # The file names. 761 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 762 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 763 764 # Print out. 765 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 766 if self.log: 767 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 768 769 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 770 if access(self.results_dir+sep+file_out, F_OK): 771 continue 772 773 # Open the Molmol pipe. 774 pipe = Popen("molmol -t -f -", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=False) 775 776 # Init all. 777 pipe.stdin.write("InitAll yes\n") 778 779 # Read the PDB. 780 pipe.stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 781 782 # Fitting to mean. 783 pipe.stdin.write("Fit to_first 'selected'\n") 784 pipe.stdin.write("Fit to_mean 'selected'\n") 785 786 # Write the result. 787 pipe.stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 788 789 # End Molmol. 790 pipe.stdin.close() 791 792 # Get STDOUT and STDERR. 793 sys.stdout.write(pipe.stdout.read()) 794 if self.log: 795 log.write(pipe.stderr.read()) 796 797 # Close the pipe. 798 pipe.stdout.close() 799 pipe.stderr.close() 800 801 # Open the superimposed file in relax. 802 self.interpreter.reset() 803 self.interpreter.pipe.create('out', 'N-state') 804 self.interpreter.structure.read_pdb(file_out) 805 806 # Fix the retarded MOLMOL proton naming. 807 for model in cdp.structure.structural_data: 808 # Alias. 809 mol = model.mol[0] 810 811 # Loop over all atoms. 812 for i in range(len(mol.atom_name)): 813 # A proton. 814 if search('H', mol.atom_name[i]): 815 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 816 817 # Replace the superimposed file. 818 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
819