Package auto_analyses :: Module stereochem_analysis
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.stereochem_analysis

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2010-2012 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax.                                     # 
  6  #                                                                             # 
  7  # relax is free software; you can redistribute it and/or modify               # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation; either version 2 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # relax is distributed in the hope that it will be useful,                    # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with relax; if not, write to the Free Software                        # 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23  # Module docstring. 
 24  """Determination of relative stereochemistry in organic molecules. 
 25   
 26  The analysis is preformed by using multiple ensembles of structures, randomly sampled from a given 
 27  set of structures.  The discrimination is performed by comparing the sets of ensembles using NOE 
 28  violations and RDC Q-factors. 
 29   
 30  This script is split into multiple stages: 
 31   
 32      1.  The random sampling of the snapshots to generate the N ensembles (NUM_ENS, usually 10,000 to 
 33      100,000) of M members (NUM_MODELS, usually ~10).  The original snapshot files are expected to be 
 34      named the SNAPSHOT_DIR + CONFIG + a number from SNAPSHOT_MIN to SNAPSHOT_MAX + ".pdb", e.g. 
 35      "snapshots/R647.pdb".  The ensembles will be placed into the "ensembles" directory. 
 36   
 37      2.  The NOE violation analysis. 
 38   
 39      3.  The superimposition of ensembles.  For each ensemble, Molmol is used for superimposition 
 40      using the fit to first algorithm.  The superimposed ensembles will be placed into the 
 41      "ensembles_superimposed" directory.  This stage is not necessary for the NOE analysis. 
 42   
 43      4.  The RDC Q-factor analysis. 
 44   
 45      5.  Generation of Grace graphs. 
 46   
 47      6.  Final ordering of ensembles using the combined RDC and NOE Q-factors, whereby the NOE 
 48      Q-factor is defined as:: 
 49   
 50          Q^2 = U / sum(NOE_i^2), 
 51   
 52      where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2.  The 
 53      denominator is the sum of all squared NOEs - this must be given as the value of NOE_NORM.  The 
 54      combined Q is given by:: 
 55   
 56          Q_total^2 = Q_NOE^2 + Q_RDC^2. 
 57  """ 
 58   
 59  # Python module imports. 
 60  from math import pi, sqrt 
 61  from os import F_OK, access, getcwd, popen3, sep 
 62  from random import randint 
 63  from re import search 
 64  from string import split 
 65  import sys 
 66   
 67  # relax module imports. 
 68  from generic_fns import pipes 
 69  from generic_fns.grace import write_xy_data, write_xy_header 
 70  from generic_fns.selection import spin_loop 
 71  from physical_constants import dipolar_constant, g1H, g13C 
 72  from prompt.interpreter import Interpreter 
 73  from relax_errors import RelaxError 
 74  from relax_io import mkdir_nofail 
 75  from status import Status; status = Status() 
 76   
 77   
 78   
79 -class Stereochem_analysis:
80 """Class for performing the relative stereochemistry analysis.""" 81
82 - def __init__(self, stage=1, results_dir=None, num_ens=10000, num_models=10, configs=None, snapshot_dir='snapshots', snapshot_min=None, snapshot_max=None, pseudo=None, noe_file=None, noe_norm=None, rdc_name=None, rdc_file=None, rdc_spin_id_col=None, rdc_mol_name_col=None, rdc_res_num_col=None, rdc_res_name_col=None, rdc_spin_num_col=None, rdc_spin_name_col=None, rdc_data_col=None, rdc_error_col=None, bond_length=None, log=None, bucket_num=200, lower_lim_noe=0.0, upper_lim_noe=600.0, lower_lim_rdc=0.0, upper_lim_rdc=1.0):
83 """Set up for the stereochemistry analysis. 84 85 @keyword stage: Stage of analysis (see the module docstring above for the options). 86 @type stage: int 87 @keyword results_dir: The optional directory to place all results files into. 88 @type results_dir: None or str 89 @keyword num_ens: Number of ensembles. 90 @type num_ens: int 91 @keyword num_models: Ensemble size. 92 @type num_models: int 93 @keyword configs: All the configurations. 94 @type configs: list of str 95 @keyword snapshot_dir: Snapshot directories (corresponding to the configurations). 96 @type snapshot_dir: list of str 97 @keyword snapshot_min: The number of the first snapshots (corresponding to the configurations). 98 @type snapshot_min: list of int 99 @keyword snapshot_max: The number of the last snapshots (corresponding to the configurations). 100 @type snapshot_max: list of int 101 @keyword pseudo: The list of pseudo-atoms. Each element is a list of the pseudo-atom name and a list of all those atoms forming the pseudo-atom. For example, pseudo = [["Q7", ["@H16", "@H17", "@H18"]], ["Q9", ["@H20", "@H21", "@H22"]]]. 102 @type pseudo: list of list of str and list of str 103 @keyword noe_file: The name of the NOE restraint file. 104 @type noe_file: str 105 @keyword noe_norm: The NOE normalisation factor (equal to the sum of all NOEs squared). 106 @type noe_norm: float 107 @keyword rdc_name: The label for this RDC data set. 108 @type rdc_name: str 109 @keyword rdc_file: The name of the RDC file. 110 @type rdc_file: str 111 @keyword rdc_spin_id_col: The spin ID column of the RDC file. 112 @type rdc_spin_id_col: None or int 113 @keyword rdc_mol_name_col: The molecule name column of the RDC file. 114 @type rdc_mol_name_col: None or int 115 @keyword rdc_res_num_col: The residue number column of the RDC file. 116 @type rdc_res_num_col: None or int 117 @keyword rdc_res_name_col: The residue name column of the RDC file. 118 @type rdc_res_name_col: None or int 119 @keyword rdc_spin_num_col: The spin number column of the RDC file. 120 @type rdc_spin_num_col: None or int 121 @keyword rdc_spin_name_col: The spin name column of the RDC file. 122 @type rdc_spin_name_col: None or int 123 @keyword rdc_data_col: The data column of the RDC file. 124 @type rdc_data_col: int 125 @keyword rdc_error_col: The error column of the RDC file. 126 @type rdc_error_col: int 127 @keyword bond_length: The bond length value in meters. 128 @type bond_length: float 129 @keyword log: Log file output flag (only for certain stages). 130 @type log: bool 131 @keyword bucket_num: Number of buckets for the distribution plots. 132 @type bucket_num: int 133 @keyword lower_lim_noe: Distribution plot limits. 134 @type lower_lim_noe: int 135 @keyword upper_lim_noe: Distribution plot limits. 136 @type upper_lim_noe: int 137 @keyword lower_lim_rdc: Distribution plot limits. 138 @type lower_lim_rdc: int 139 @keyword upper_lim_rdc: Distribution plot limits. 140 @type upper_lim_rdc: int 141 """ 142 143 # Execution lock. 144 status.exec_lock.acquire('auto stereochem analysis', mode='auto-analysis') 145 146 # Set up the analysis status object. 147 status.init_auto_analysis('stereochem', type='stereochem') 148 status.current_analysis = 'auto stereochem analysis' 149 150 # Store all the args. 151 self.stage = stage 152 self.results_dir = results_dir 153 self.num_ens = num_ens 154 self.num_models = num_models 155 self.configs = configs 156 self.snapshot_dir = snapshot_dir 157 self.snapshot_min = snapshot_min 158 self.snapshot_max = snapshot_max 159 self.pseudo = pseudo 160 self.noe_file = noe_file 161 self.noe_norm = noe_norm 162 self.rdc_name = rdc_name 163 self.rdc_file = rdc_file 164 self.rdc_spin_id_col = rdc_spin_id_col 165 self.rdc_mol_name_col = rdc_mol_name_col 166 self.rdc_res_num_col = rdc_res_num_col 167 self.rdc_res_name_col = rdc_res_name_col 168 self.rdc_spin_num_col = rdc_spin_num_col 169 self.rdc_spin_name_col = rdc_spin_name_col 170 self.rdc_data_col = rdc_data_col 171 self.rdc_error_col = rdc_error_col 172 self.bond_length = bond_length 173 self.log = log 174 self.bucket_num = bucket_num 175 self.lower_lim_noe = lower_lim_noe 176 self.upper_lim_noe = upper_lim_noe 177 self.lower_lim_rdc = lower_lim_rdc 178 self.upper_lim_rdc = upper_lim_rdc 179 180 # Load the interpreter. 181 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) 182 self.interpreter.populate_self() 183 self.interpreter.on(verbose=False) 184 185 # Create the results directory. 186 if self.results_dir: 187 mkdir_nofail(self.results_dir) 188 189 # Or use the current working directory. 190 else: 191 self.results_dir = getcwd() 192 193 # Create a directory for log files. 194 if self.log: 195 mkdir_nofail(self.results_dir + sep + "logs") 196 197 # Finish and unlock execution. 198 status.auto_analysis['stereochem'].fin = True 199 status.current_analysis = None 200 status.exec_lock.release()
201 202
203 - def run(self):
204 """Execute the given stage of the analysis.""" 205 206 # Store the original STDOUT. 207 self.stdout_orig = sys.stdout 208 209 # Sampling of snapshots. 210 if self.stage == 1: 211 self.sample() 212 213 # NOE violation analysis. 214 elif self.stage == 2: 215 self.noe_viol() 216 217 # Ensemble superimposition. 218 elif self.stage == 3: 219 self.superimpose() 220 221 # RDC Q-factor analysis. 222 elif self.stage == 4: 223 self.rdc_analysis() 224 225 # Grace plot creation. 226 elif self.stage == 5: 227 self.grace_plots() 228 229 # Final combined Q ordering. 230 elif self.stage == 6: 231 self.combined_q() 232 233 # Unknown stage. 234 else: 235 raise RelaxError("The stage number %s is unknown." % self.stage) 236 237 # Restore STDOUT. 238 sys.stdout = self.stdout_orig
239 240
241 - def combined_q(self):
242 """Calculate the combined Q-factor. 243 244 The combined Q is defined as:: 245 246 Q_total^2 = Q_NOE^2 + Q_RDC^2, 247 248 and the NOE Q-factor as:: 249 250 Q^2 = U / sum(NOE_i^2), 251 252 where U is the quadratic flat bottom well potential - the NOE violation in Angstrom^2. 253 """ 254 255 # Checks. 256 if not access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 257 raise RelaxError("The NOE analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted") 258 if not access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 259 raise RelaxError("The RDC analysis has not been performed, cannot find the file '%s'." % self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted") 260 261 # Loop over the configurations. 262 for i in range(len(self.configs)): 263 # Print out. 264 print("Creating the combined Q-factor file for configuration '%s'." % self.configs[i]) 265 266 # Open the NOE results file and read the data. 267 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 268 noe_lines = file.readlines() 269 file.close() 270 271 # Open the RDC results file and read the data. 272 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 273 rdc_lines = file.readlines() 274 file.close() 275 276 # The combined Q-factor file. 277 out = open(self.results_dir+sep+"Q_total_%s" % self.configs[i], 'w') 278 out_sorted = open(self.results_dir+sep+"Q_total_%s_sorted" % self.configs[i], 'w') 279 280 # Loop over the data (skipping the header line). 281 data = [] 282 for j in range(1, len(noe_lines)): 283 # Split the lines. 284 ens = int(split(noe_lines[j])[0]) 285 noe_viol = float(split(noe_lines[j])[1]) 286 q_rdc = float(split(rdc_lines[j])[1]) 287 288 # The NOE Q-factor. 289 q_noe = sqrt(noe_viol/self.noe_norm) 290 291 # Combined Q. 292 q = sqrt(q_noe**2 + q_rdc**2) 293 294 # Write out the unsorted list. 295 out.write("%-20i%20.15f\n" % (ens, q)) 296 297 # Store the values. 298 data.append([q, ens]) 299 300 # Sort the combined Q. 301 data.sort() 302 303 # Write the data. 304 for i in range(len(data)): 305 out_sorted.write("%-20i%20.15f\n" % (data[i][1], data[i][0])) 306 307 # Close the files. 308 out.close() 309 out_sorted.close()
310 311
312 - def generate_distribution(self, values, lower=0.0, upper=200.0, inc=None):
313 """Create the distribution data structure.""" 314 315 # The bin width. 316 bin_width = (upper - lower)/float(inc) 317 318 # Init the dist object. 319 dist = [] 320 for i in range(inc): 321 dist.append([bin_width*i+lower, 0]) 322 323 # Loop over the values. 324 for val in values: 325 # The bin. 326 bin = int((val - lower)/bin_width) 327 328 # Outside of the limits. 329 if bin < 0 or bin >= inc: 330 print("Outside of the limits: '%s'" % val) 331 continue 332 333 # Increment the count. 334 dist[bin][1] = dist[bin][1] + 1 335 336 # Convert the counts to frequencies. 337 total_pr = 0.0 338 for i in range(inc): 339 dist[i][1] = dist[i][1] / float(len(values)) 340 total_pr = total_pr + dist[i][1] 341 342 print("Total Pr: %s" % total_pr) 343 344 # Return the dist. 345 return dist
346 347
348 - def grace_plots(self):
349 """Generate grace plots of the results.""" 350 351 # The number of configs. 352 n = len(self.configs) 353 354 # The colours for the different configs. 355 defaults = [4, 2] # Blue and red. 356 colours = [] 357 for i in range(n): 358 # Default colours. 359 if i < len(defaults): 360 colours.append(defaults[i]) 361 362 # Otherwise black! 363 else: 364 colours.append(0) 365 366 # The ensemble number text. 367 ens_text = '' 368 dividers = [1e15, 1e12, 1e9, 1e6, 1e3, 1] 369 num_ens = self.num_ens 370 for i in range(len(dividers)): 371 # The number. 372 num = int(num_ens / dividers[i]) 373 374 # The text. 375 if num: 376 text = repr(num) 377 elif not num and ens_text: 378 text = '000' 379 else: 380 continue 381 382 # Update the text. 383 ens_text = ens_text + text 384 385 # A comma. 386 if i < len(dividers)-1: 387 ens_text = ens_text + ',' 388 389 # Remove the front part of the number. 390 num_ens = num_ens - dividers[i]*num 391 392 # Subtitle for all graphs. 393 subtitle = '%s ensembles of %s' % (ens_text, self.num_models) 394 395 # NOE violations. 396 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK): 397 # Print out. 398 print("Generating NOE violation Grace plots.") 399 400 # Open the output files. 401 grace_curve = open(self.results_dir+sep+"NOE_viol_curve.agr", 'w') 402 grace_dist = open(self.results_dir+sep+"NOE_viol_dist.agr", 'w') 403 404 # Loop over the configurations. 405 data = [] 406 dist = [] 407 for i in range(n): 408 # Open the results file and read the data. 409 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i] + "_sorted") 410 lines = file.readlines() 411 file.close() 412 413 # Add a new graph set. 414 data.append([]) 415 416 # Loop over the ensembles and extract the NOE violation. 417 noe_viols = [] 418 for j in range(1, len(lines)): 419 # Extract the violation. 420 viol = float(split(lines[j])[1]) 421 noe_viols.append(viol) 422 423 # Add to the data structure. 424 data[i].append([j, viol]) 425 426 # Calculate the R distribution. 427 dist.append(self.generate_distribution(noe_viols, inc=self.bucket_num, upper=self.upper_lim_noe, lower=self.lower_lim_noe)) 428 429 # Headers. 430 write_xy_header(file=grace_curve, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', 'NOE violation (Angstrom\S2\N)'], axis_min=[0, 0], axis_max=[self.num_ens, 200], legend_pos=[0.3, 0.8]) 431 write_xy_header(file=grace_dist, title='NOE violation comparison', subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['NOE violation (Angstrom\S2\N)', 'Frequency'], axis_min=[0, 0], axis_max=[200, 0.2], legend_pos=[1.1, 0.8]) 432 433 # Write the data. 434 write_xy_data([data], file=grace_curve, graph_type='xy') 435 write_xy_data([dist], file=grace_dist, graph_type='xy') 436 437 # Close the files. 438 grace_curve.close() 439 grace_dist.close() 440 441 # RDC Q-factors. 442 if access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 443 # Print out. 444 print("Generating RDC Q-factor Grace plots.") 445 446 # Open the Grace output files. 447 grace_curve = open(self.results_dir+sep+"RDC_%s_curve.agr" % self.rdc_name, 'w') 448 grace_dist = open(self.results_dir+sep+"RDC_%s_dist.agr" % self.rdc_name, 'w') 449 450 # Loop over the configurations. 451 data = [] 452 dist = [] 453 for i in range(n): 454 # Open the results file and read the data. 455 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i] + "_sorted") 456 lines = file.readlines() 457 file.close() 458 459 # Add a new graph set. 460 data.append([]) 461 462 # Loop over the Q-factors. 463 values = [] 464 for j in range(1, len(lines)): 465 # Extract the violation. 466 value = float(split(lines[j])[1]) 467 values.append(value) 468 469 # Add to the data structure. 470 data[i].append([j, value]) 471 472 # Calculate the R distribution. 473 dist.append(self.generate_distribution(values, inc=self.bucket_num, upper=self.upper_lim_rdc, lower=self.lower_lim_rdc)) 474 475 # Headers. 476 write_xy_header(file=grace_curve, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[0]*n, axis_labels=['Ensemble (sorted)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[self.num_ens, 2], legend_pos=[0.3, 0.8]) 477 write_xy_header(file=grace_dist, title='%s RDC Q-factor comparison' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[1]*n, symbol_sizes=[0.5]*n, linestyle=[3]*n, axis_labels=['%s RDC Q-factor (pales format)' % self.rdc_name, 'Frequency'], axis_min=[0, 0], axis_max=[2, 0.2], legend_pos=[1.1, 0.8]) 478 479 # Write the data. 480 write_xy_data([data], file=grace_curve, graph_type='xy') 481 write_xy_data([dist], file=grace_dist, graph_type='xy') 482 483 # Close the files. 484 grace_curve.close() 485 grace_dist.close() 486 487 # NOE-RDC correlation plots. 488 if access(self.results_dir+sep+"NOE_viol_" + self.configs[0] + "_sorted", F_OK) and access(self.results_dir+sep+"Q_factors_" + self.configs[0] + "_sorted", F_OK): 489 # Print out. 490 print("Generating NOE-RDC correlation Grace plots.") 491 492 # Open the Grace output files. 493 grace_file = open(self.results_dir+sep+"correlation_plot.agr", 'w') 494 grace_file_scaled = open(self.results_dir+sep+"correlation_plot_scaled.agr", 'w') 495 496 # Grace data. 497 data = [] 498 data_scaled = [] 499 for i in range(len(self.configs)): 500 # Open the NOE results file and read the data. 501 file = open(self.results_dir+sep+"NOE_viol_" + self.configs[i]) 502 noe_lines = file.readlines() 503 file.close() 504 505 # Add a new graph set. 506 data.append([]) 507 data_scaled.append([]) 508 509 # Open the RDC results file and read the data. 510 file = open(self.results_dir+sep+"Q_factors_" + self.configs[i]) 511 rdc_lines = file.readlines() 512 file.close() 513 514 # Loop over the data. 515 for j in range(1, len(noe_lines)): 516 # Split the lines. 517 noe_viol = float(split(noe_lines[j])[1]) 518 q_factor = float(split(rdc_lines[j])[1]) 519 520 # Add the xy pair. 521 data[i].append([noe_viol, q_factor]) 522 data_scaled[i].append([sqrt(noe_viol/self.noe_norm), q_factor]) 523 524 # Write the data. 525 write_xy_header(file=grace_file, title='Correlation plot - %s RDC vs. NOE' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['NOE violation (Angstrom\S2\N)', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[noe_viols[-1]+10, values[-1]+0.1], legend_pos=[1.1, 0.8]) 526 write_xy_header(file=grace_file_scaled, title='Correlation plot - %s RDC vs. NOE Q-factor' % self.rdc_name, subtitle=subtitle, sets=n, set_names=self.configs, set_colours=colours, symbols=[9]*n, symbol_sizes=[0.24]*n, linetype=[0]*n, axis_labels=['Normalised NOE violation (Q = sqrt(U / \\xS\\f{}NOE\\si\\N\\S2\\N))', '%s RDC Q-factor (pales format)' % self.rdc_name], axis_min=[0, 0], axis_max=[1, 1], legend_pos=[1.1, 0.8]) 527 write_xy_data([data], file=grace_file, graph_type='xy') 528 write_xy_data([data_scaled], file=grace_file_scaled, graph_type='xy')
529 530
531 - def noe_viol(self):
532 """NOE violation calculations.""" 533 534 # Redirect STDOUT to a log file. 535 if self.log: 536 sys.stdout = open(self.results_dir+sep+"logs" + sep + "NOE_viol.log", 'w') 537 538 # Create a directory for the save files. 539 dir = self.results_dir + sep + "NOE_results" 540 mkdir_nofail(dir=dir) 541 542 # Loop over the configurations. 543 for config in self.configs: 544 # Print out. 545 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 546 547 # Open the results file. 548 out = open(self.results_dir+sep+"NOE_viol_" + config, 'w') 549 out_sorted = open(self.results_dir+sep+"NOE_viol_" + config + "_sorted", 'w') 550 out.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 551 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "NOE_volation")) 552 553 # Create the data pipe. 554 self.interpreter.pipe.create("noe_viol_%s" % config, "N-state") 555 556 # Read the first structure. 557 self.interpreter.structure.read_pdb("ensembles" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 558 559 # Load all protons as the sequence. 560 self.interpreter.structure.load_spins("@H*", ave_pos=False) 561 562 # Create the pseudo-atoms. 563 for i in range(len(self.pseudo)): 564 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 565 self.interpreter.sequence.display() 566 567 # Read the NOE list. 568 self.interpreter.noe.read_restraints(file=self.noe_file) 569 570 # Set up the N-state model. 571 self.interpreter.n_state_model.select_model(model="fixed") 572 573 # Print out. 574 print("\n"*2 + "# Set up complete #" + "\n"*10) 575 576 # Loop over each ensemble. 577 noe_viol = [] 578 for ens in range(self.num_ens): 579 # Print out the ensemble to both the log and screen. 580 if self.log: 581 sys.stdout.write(config + repr(ens) + "\n") 582 sys.stderr.write(config + repr(ens) + "\n") 583 584 # Delete the old structures and rename the molecule. 585 self.interpreter.structure.delete() 586 587 # Read the ensemble. 588 self.interpreter.structure.read_pdb("ensembles" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 589 590 # Get the atomic positions. 591 self.interpreter.structure.get_pos(ave_pos=False) 592 593 # Calculate the average NOE potential. 594 self.interpreter.calc() 595 596 # Sum the violations. 597 cdp.sum_viol = 0.0 598 for i in range(len(cdp.ave_dist)): 599 if cdp.quad_pot[i][2]: 600 cdp.sum_viol = cdp.sum_viol + cdp.quad_pot[i][2] 601 602 # Write out the NOE violation. 603 noe_viol.append([cdp.sum_viol, ens]) 604 out.write("%-20i%30.15f\n" % (ens, cdp.sum_viol)) 605 606 # Save the state. 607 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 608 609 # Sort the NOE violations. 610 noe_viol.sort() 611 612 # Write the data. 613 for i in range(len(noe_viol)): 614 out_sorted.write("%-20i%20.15f\n" % (noe_viol[i][1], noe_viol[i][0]))
615 616
617 - def rdc_analysis(self):
618 """Perform the RDC part of the analysis.""" 619 620 # Redirect STDOUT to a log file. 621 if self.log: 622 sys.stdout = open(self.results_dir+sep+"logs" + sep + "RDC_%s_analysis.log" % self.rdc_name, 'w') 623 624 # The dipolar constant. 625 d = 3.0 / (2.0*pi) * dipolar_constant(g13C, g1H, self.bond_length) 626 627 # Create a directory for the save files. 628 dir = self.results_dir + sep + "RDC_%s_results" % self.rdc_name 629 mkdir_nofail(dir=dir) 630 631 # Loop over the configurations. 632 for config in self.configs: 633 # Print out. 634 print("\n"*10 + "# Set up for config " + config + " #" + "\n") 635 636 # Open the results files. 637 out = open(self.results_dir+sep+"Q_factors_" + config, 'w') 638 out_sorted = open(self.results_dir+sep+"Q_factors_" + config + "_sorted", 'w') 639 out.write("%-20s%20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)", "RDC_Q_factor(standard)")) 640 out_sorted.write("%-20s%20s\n" % ("# Ensemble", "RDC_Q_factor(pales)")) 641 642 # Create the data pipe. 643 self.interpreter.pipe.create("rdc_analysis_%s" % config, "N-state") 644 645 # Read the first structure. 646 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + "0.pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 647 648 # Load all protons as the sequence. 649 self.interpreter.structure.load_spins("@H*", ave_pos=False) 650 651 # Create the pseudo-atoms. 652 for i in range(len(self.pseudo)): 653 self.interpreter.spin.create_pseudo(spin_name=self.pseudo[i][0], members=self.pseudo[i][1], averaging="linear") 654 self.interpreter.sequence.display() 655 656 # Read the RDC data. 657 self.interpreter.rdc.read(align_id=self.rdc_file, file=self.rdc_file, spin_id_col=self.rdc_spin_id_col, mol_name_col=self.rdc_mol_name_col, res_num_col=self.rdc_res_num_col, res_name_col=self.rdc_res_name_col, spin_num_col=self.rdc_spin_num_col, spin_name_col=self.rdc_spin_name_col, data_col=self.rdc_data_col, error_col=self.rdc_error_col) 658 659 # Set the values needed to calculate the dipolar constant. 660 self.interpreter.value.set(self.bond_length, "r", spin_id="@H*") 661 self.interpreter.value.set(self.bond_length, "r", spin_id="@Q*") 662 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@H*") 663 self.interpreter.value.set("13C", "heteronuc_type", spin_id="@Q*") 664 self.interpreter.value.set("1H", "proton_type", spin_id="@H*") 665 self.interpreter.value.set("1H", "proton_type", spin_id="@Q*") 666 667 # Set up the model. 668 self.interpreter.n_state_model.select_model(model="fixed") 669 670 # Print out. 671 print("\n"*2 + "# Set up complete #" + "\n"*10) 672 673 # Loop over each ensemble. 674 q_factors = [] 675 for ens in range(self.num_ens): 676 # Print out the ensemble to both the log and screen. 677 if self.log: 678 sys.stdout.write(config + repr(ens) + "\n") 679 sys.stderr.write(config + repr(ens) + "\n") 680 681 # Delete the old structures. 682 self.interpreter.structure.delete() 683 684 # Read the ensemble. 685 self.interpreter.structure.read_pdb("ensembles_superimposed" + sep + config + repr(ens) + ".pdb", dir=self.results_dir, set_mol_name=config, set_model_num=range(1, self.num_models+1), parser="internal") 686 687 # Load the CH vectors for the H atoms. 688 self.interpreter.structure.vectors(spin_id="@H*", attached="*C*", ave=False) 689 690 # Minimisation. 691 #grid_search(inc=4) 692 self.interpreter.minimise("simplex", constraints=False) 693 694 # Store and write out the Q-factors. 695 q_factors.append([cdp.q_rdc, ens]) 696 out.write("%-20i%20.15f%20.15f\n" % (ens, cdp.q_rdc, cdp.q_rdc_norm2)) 697 698 # Calculate the alignment tensor in Hz, and store it for reference. 699 cdp.align_tensor_Hz = d * cdp.align_tensors[0].A 700 cdp.align_tensor_Hz_5D = d * cdp.align_tensors[0].A_5D 701 702 # Save the state. 703 self.interpreter.results.write(file="%s_results_%s" % (config, ens), dir=dir, force=True) 704 705 # Sort the NOE violations. 706 q_factors.sort() 707 708 # Write the data. 709 for i in range(len(q_factors)): 710 out_sorted.write("%-20i%20.15f\n" % (q_factors[i][1], q_factors[i][0]))
711 712
713 - def sample(self):
714 """Generate the ensembles by random sampling of the snapshots.""" 715 716 # Create the directory for the ensembles, if needed. 717 mkdir_nofail(dir=self.results_dir + sep + "ensembles") 718 719 # Loop over the configurations. 720 for conf_index in range(len(self.configs)): 721 # Loop over each ensemble. 722 for ens in range(self.num_ens): 723 # Random sampling. 724 rand = [] 725 for j in range(self.num_models): 726 rand.append(randint(self.snapshot_min[conf_index], self.snapshot_max[conf_index])) 727 728 # Print out. 729 print("Generating ensemble %s%s from structures %s." % (self.configs[conf_index], ens, rand)) 730 731 # The file name. 732 file_name = "ensembles" + sep + self.configs[conf_index] + repr(ens) + ".pdb" 733 734 # Open the output file. 735 out = open(self.results_dir+sep+file_name, 'w') 736 737 # Header. 738 out.write("REM Structures: " + repr(rand) + "\n") 739 740 # Concatenation the files. 741 for j in range(self.num_models): 742 # The random file. 743 rand_name = self.snapshot_dir[conf_index] + sep + self.configs[conf_index] + repr(rand[j]) + ".pdb" 744 745 # Append the file. 746 out.write(open(rand_name).read()) 747 748 # Close the file. 749 out.close()
750 751
752 - def superimpose(self):
753 """Superimpose the ensembles using fit to first in Molmol.""" 754 755 # Create the output directory. 756 mkdir_nofail("ensembles_superimposed") 757 758 # Logging turned on. 759 if self.log: 760 log = open(self.results_dir+sep+"logs" + sep + "superimpose_molmol.stderr", 'w') 761 sys.stdout = open(self.results_dir+sep+"logs" + sep + "superimpose.log", 'w') 762 763 # Loop over S and R. 764 for config in ["R", "S"]: 765 # Loop over each ensemble. 766 for ens in range(self.num_ens): 767 # The file names. 768 file_in = "ensembles" + sep + config + repr(ens) + ".pdb" 769 file_out = "ensembles_superimposed" + sep + config + repr(ens) + ".pdb" 770 771 # Print out. 772 sys.stderr.write("Superimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 773 if self.log: 774 log.write("\n\n\nSuperimposing %s with Molmol, output to %s.\n" % (file_in, file_out)) 775 776 # Failure handling (if a failure occurred and this is rerun, skip all existing files). 777 if access(self.results_dir+sep+file_out, F_OK): 778 continue 779 780 # Open the Molmol pipe. 781 stdin, stdout, stderr = popen3("molmol -t -f -", 'w', 0) 782 783 # Init all. 784 stdin.write("InitAll yes\n") 785 786 # Read the PDB. 787 stdin.write("ReadPdb " + self.results_dir+sep+file_in + "\n") 788 789 # Fitting to mean. 790 stdin.write("Fit to_first 'selected'\n") 791 stdin.write("Fit to_mean 'selected'\n") 792 793 # Write the result. 794 stdin.write("WritePdb " + self.results_dir+sep+file_out + "\n") 795 796 # End Molmol. 797 stdin.close() 798 799 # Get STDOUT and STDERR. 800 sys.stdout.write(stdout.read()) 801 if self.log: 802 log.write(stderr.read()) 803 804 # Close the pipe. 805 stdout.close() 806 stderr.close() 807 808 # Open the superimposed file in relax. 809 self.interpreter.reset() 810 self.interpreter.pipe.create('out', 'N-state') 811 self.interpreter.structure.read_pdb(file_out, parser="internal") 812 813 # Fix the retarded MOLMOL proton naming. 814 for model in cdp.structure.structural_data: 815 # Alias. 816 mol = model.mol[0] 817 818 # Loop over all atoms. 819 for i in range(len(mol.atom_name)): 820 # A proton. 821 if search('H', mol.atom_name[i]): 822 mol.atom_name[i] = mol.atom_name[i][1:] + mol.atom_name[i][0] 823 824 # Replace the superimposed file. 825 self.interpreter.structure.write_pdb(config + repr(ens) + ".pdb", dir=self.results_dir+sep+"ensembles_superimposed", force=True)
826