Package auto_analyses :: Module dauvergne_protocol
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.dauvergne_protocol

   1  ############################################################################### 
   2  #                                                                             # 
   3  # Copyright (C) 2004-2014 Edward d'Auvergne                                   # 
   4  #                                                                             # 
   5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
   6  #                                                                             # 
   7  # This program is free software: you can redistribute it and/or modify        # 
   8  # it under the terms of the GNU General Public License as published by        # 
   9  # the Free Software Foundation, either version 3 of the License, or           # 
  10  # (at your option) any later version.                                         # 
  11  #                                                                             # 
  12  # This program is distributed in the hope that it will be useful,             # 
  13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
  14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
  15  # GNU General Public License for more details.                                # 
  16  #                                                                             # 
  17  # You should have received a copy of the GNU General Public License           # 
  18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
  19  #                                                                             # 
  20  ############################################################################### 
  21   
  22  # Python module imports. 
  23  from math import pi 
  24  from os import F_OK, R_OK, W_OK, X_OK, access, getcwd, listdir, sep 
  25  from os.path import isdir 
  26  from re import search 
  27  import sys 
  28  from time import sleep 
  29   
  30  # relax module imports. 
  31  from info import Info_box; info = Info_box() 
  32  from lib.errors import RelaxError, RelaxNoSequenceError, RelaxNoValueError 
  33  from lib.float import floatAsByteArray 
  34  from lib.text.sectioning import title, subtitle 
  35  from lib.text.string import LIST, PARAGRAPH, SECTION, SUBSECTION, TITLE, to_docstring 
  36  from pipe_control.interatomic import interatomic_loop 
  37  from pipe_control.mol_res_spin import exists_mol_res_spin_data, return_spin, spin_loop 
  38  from pipe_control.pipes import cdp_name, get_pipe, has_pipe, pipe_names, switch 
  39  from pipe_control.spectrometer import get_frequencies 
  40  from prompt.interpreter import Interpreter 
  41  from status import Status; status = Status() 
  42   
  43   
  44  doc = [ 
  45          [TITLE, "Automatic analysis for black-box model-free results."], 
  46          [PARAGRAPH, "The dauvergne_protocol auto-analysis is designed for those who appreciate black-boxes or those who appreciate complex code.  Importantly, data at multiple magnetic field strengths is essential for this analysis.  If you would like to change how model-free analysis is performed, the code in the file auto_analyses/dauvergne_protocol.py in the base relax directory can be copied and modified as needed and used with the relax script interface.  This file is simply a complex relax script.  For a description of object-oriented coding in python using classes, functions/methods, self, etc., please see the python tutorial."], 
  47   
  48          [SECTION, "References"], 
  49   
  50          [SUBSECTION, "Auto-analysis primary reference"], 
  51          [PARAGRAPH, "The model-free optimisation methodology herein is that of:"], 
  52          [LIST, info.bib['dAuvergneGooley08b'].cite_short()], 
  53   
  54          [SUBSECTION, "Techniques used in the auto-analysis"], 
  55          [PARAGRAPH, "Other references for features of this dauvergne_protocol auto-analysis include model-free model selection using Akaike's Information Criterion:"], 
  56          [LIST, info.bib['dAuvergneGooley03'].cite_short()], 
  57          [PARAGRAPH, "The elimination of failed model-free models and Monte Carlo simulations:"], 
  58          [LIST, info.bib['dAuvergneGooley06'].cite_short()], 
  59          [PARAGRAPH, "Significant model-free optimisation improvements:"], 
  60          [LIST, info.bib['dAuvergneGooley08a'].cite_short()], 
  61          [PARAGRAPH, "Rather than searching for the lowest chi-squared value, this auto-analysis searches for the model with the lowest AIC criterion.  This complex multi-universe, multi-dimensional problem is formulated, using set theory, as the universal solution:"], 
  62          [LIST, info.bib['dAuvergneGooley07'].cite_short()], 
  63          [PARAGRAPH, "The basic three references for the original and extended model-free theories are:"], 
  64          [LIST, info.bib['LipariSzabo82a'].cite_short()], 
  65          [LIST, info.bib['LipariSzabo82b'].cite_short()], 
  66          [LIST, info.bib['Clore90'].cite_short()], 
  67   
  68          [SECTION, "How to use this auto-analysis"], 
  69          [PARAGRAPH, "The five diffusion models used in this auto-analysis are:"], 
  70          [LIST, "Model I   (MI)   - Local tm."], 
  71          [LIST, "Model II  (MII)  - Sphere."], 
  72          [LIST, "Model III (MIII) - Prolate spheroid."], 
  73          [LIST, "Model IV  (MIV)  - Oblate spheroid."], 
  74          [LIST, "Model V   (MV)   - Ellipsoid."], 
  75          [PARAGRAPH, "If using the script-based user interface (UI), changing the value of the variable diff_model will determine the behaviour of this auto-analysis.  Model I must be optimised prior to any of the other diffusion models, while the Models II to V can be optimised in any order.  To select the various models, set the variable diff_model to the following strings:"], 
  76          [LIST, "MI   - 'local_tm'"], 
  77          [LIST, "MII  - 'sphere'"], 
  78          [LIST, "MIII - 'prolate'"], 
  79          [LIST, "MIV  - 'oblate'"], 
  80          [LIST, "MV   - 'ellipsoid'"], 
  81          [PARAGRAPH, "This approach has the advantage of eliminating the need for an initial estimate of a global diffusion tensor and removing all the problems associated with the initial estimate."], 
  82          [PARAGRAPH, "It is important that the number of parameters in a model does not exceed the number of relaxation data sets for that spin.  If this is the case, the list of models in the mf_models and local_tm_models variables will need to be trimmed."], 
  83   
  84          [SUBSECTION, "Model I - Local tm"], 
  85          [PARAGRAPH, "This will optimise the diffusion model whereby all spin of the molecule have a local tm value, i.e. there is no global diffusion tensor.  This model needs to be optimised prior to optimising any of the other diffusion models.  Each spin is fitted to the multiple model-free models separately, where the parameter tm is included in each model."], 
  86          [PARAGRAPH, "AIC model selection is used to select the models for each spin."], 
  87   
  88          [SUBSECTION, "Model II - Sphere"], 
  89          [PARAGRAPH, "This will optimise the isotropic diffusion model.  Multiple steps are required, an initial optimisation of the diffusion tensor, followed by a repetitive optimisation until convergence of the diffusion tensor.  In the relax script UI each of these steps requires this script to be rerun, unless the conv_loop flag is True.  In the GUI (graphical user interface), the procedure is repeated automatically until convergence.  For the initial optimisation, which will be placed in the directory './sphere/init/', the following steps are used:"], 
  90          [PARAGRAPH, "The model-free models and parameter values for each spin are set to those of diffusion model MI."], 
  91          [PARAGRAPH, "The local tm parameter is removed from the models."], 
  92          [PARAGRAPH, "The model-free parameters are fixed and a global spherical diffusion tensor is minimised."], 
  93          [PARAGRAPH, "For the repetitive optimisation, each minimisation is named from 'round_1' onwards.  The initial 'round_1' optimisation will extract the diffusion tensor from the results file in './sphere/init/', and the results will be placed in the directory './sphere/round_1/'.  Each successive round will take the diffusion tensor from the previous round.  The following steps are used:"], 
  94          [PARAGRAPH, "The global diffusion tensor is fixed and the multiple model-free models are fitted to each spin."], 
  95          [PARAGRAPH, "AIC model selection is used to select the models for each spin."], 
  96          [PARAGRAPH, "All model-free and diffusion parameters are allowed to vary and a global optimisation of all parameters is carried out."], 
  97   
  98          [SUBSECTION, "Model III - Prolate spheroid"], 
  99          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that an axially symmetric diffusion tensor with Da >= 0 is used.  The base directory containing all the results is './prolate/'."], 
 100   
 101          [SUBSECTION, "Model IV - Oblate spheroid"], 
 102          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that an axially symmetric diffusion tensor with Da <= 0 is used.  The base directory containing all the results is './oblate/'."], 
 103   
 104          [SUBSECTION, "Model V - Ellipsoid"], 
 105          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that a fully anisotropic diffusion tensor is used (also known as rhombic or asymmetric diffusion).  The base directory is './ellipsoid/'."], 
 106   
 107          [SUBSECTION, "Final run"], 
 108          [PARAGRAPH, "Once all the diffusion models have converged, the final run can be executed.  This is done by setting the variable diff_model to 'final'.  This consists of two steps, diffusion tensor model selection, and Monte Carlo simulations.  Firstly AIC model selection is used to select between the diffusion tensor models.  Monte Carlo simulations are then run solely on this selected diffusion model.  Minimisation of the model is bypassed as it is assumed that the model is already fully optimised (if this is not the case the final run is not yet appropriate)."], 
 109          [PARAGRAPH, "The final black-box model-free results will be placed in the file 'final/results'."] 
 110  ] 
 111   
 112   
 113  # Build the module docstring. 
 114  __doc__ = to_docstring(doc) 
 115   
 116   
 117   
118 -class dAuvergne_protocol:
119 """The model-free auto-analysis.""" 120 121 # Some class variables. 122 opt_func_tol = 1e-25 123 opt_max_iterations = int(1e7) 124
125 - def __init__(self, pipe_name=None, pipe_bundle=None, results_dir=None, write_results_dir=None, diff_model=None, mf_models=['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9'], local_tm_models=['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9'], grid_inc=11, diff_tensor_grid_inc={'sphere': 11, 'prolate': 11, 'oblate': 11, 'ellipsoid': 6}, min_algor='newton', mc_sim_num=500, max_iter=None, user_fns=None, conv_loop=True):
126 """Perform the full model-free analysis protocol of d'Auvergne and Gooley, 2008b. 127 128 @keyword pipe_name: The name of the data pipe containing the sequence info. This data pipe should have all values set including the CSA value, the bond length, the heteronucleus name and proton name. It should also have all relaxation data loaded. 129 @type pipe_name: str 130 @keyword pipe_bundle: The data pipe bundle to associate all spawned data pipes with. 131 @type pipe_bundle: str 132 @keyword results_dir: The directory where optimisation results will read from. Results will also be saved to this directory if the write_results_dir argument is not given. 133 @type results_dir: str 134 @keyword write_results_dir: The directory where optimisation results will be saved in. If None, it will default to the value of the results_dir argument. This is mainly used for debugging. 135 @type write_results_dir: str or None 136 @keyword diff_model: The global diffusion model to optimise. This can be one of 'local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', or 'final'. If all or a subset of these are supplied as a list, then these will be automatically looped over and calculated. 137 @type diff_model: str or list of str 138 @keyword mf_models: The model-free models. 139 @type mf_models: list of str 140 @keyword local_tm_models: The model-free models. 141 @type local_tm_models: list of str 142 @keyword grid_inc: The grid search size (the number of increments per dimension). 143 @type grid_inc: int 144 @keyword diff_tensor_grid_inc: A list of grid search sizes for the optimisation of the sphere, prolate spheroid, oblate spheroid, and ellipsoid. 145 @type diff_tensor_grid_inc: list of int 146 @keyword min_algor: The minimisation algorithm (in most cases this should not be changed). 147 @type min_algor: str 148 @keyword mc_sim_num: The number of Monte Carlo simulations to be used for error analysis at the end of the analysis. 149 @type mc_sim_num: int 150 @keyword max_iter: The maximum number of iterations for the global iteration. Set to None, then the algorithm iterates until convergence. 151 @type max_iter: int or None. 152 @keyword user_fns: A dictionary of replacement user functions. These will overwrite the standard user functions. The key should be the name of the user function or user function class and the value should be the function or class instance. 153 @type user_fns: dict 154 @keyword conv_loop: Automatic looping over all rounds until convergence. 155 @type conv_loop: bool 156 """ 157 158 # Printout. 159 title(file=sys.stdout, text="The dauvergne_protocol model-free auto-analysis") 160 161 # Execution lock. 162 status.exec_lock.acquire(pipe_bundle, mode='auto-analysis') 163 164 # Store the args. 165 self.pipe_name = pipe_name 166 self.pipe_bundle = pipe_bundle 167 self.mf_models = mf_models 168 self.local_tm_models = local_tm_models 169 self.grid_inc = grid_inc 170 self.diff_tensor_grid_inc = diff_tensor_grid_inc 171 self.min_algor = min_algor 172 self.mc_sim_num = mc_sim_num 173 self.max_iter = max_iter 174 self.conv_loop = conv_loop 175 176 # The model-free data pipe names. 177 self.mf_model_pipes = [] 178 for i in range(len(self.mf_models)): 179 self.mf_model_pipes.append(self.name_pipe(self.mf_models[i])) 180 self.local_tm_model_pipes = [] 181 for i in range(len(self.local_tm_models)): 182 self.local_tm_model_pipes.append(self.name_pipe(self.local_tm_models[i])) 183 184 # The diffusion models. 185 if isinstance(diff_model, list): 186 self.diff_model_list = diff_model 187 else: 188 self.diff_model_list = [diff_model] 189 190 # Project directory (i.e. directory containing the model-free model results and the newly generated files) 191 if results_dir: 192 self.results_dir = results_dir + sep 193 else: 194 self.results_dir = getcwd() + sep 195 if write_results_dir: 196 self.write_results_dir = write_results_dir + sep 197 else: 198 self.write_results_dir = self.results_dir 199 200 # Data checks. 201 self.check_vars() 202 203 # Set the data pipe to the current data pipe. 204 if self.pipe_name != cdp_name(): 205 switch(self.pipe_name) 206 207 # Some info for the status. 208 self.status_setup() 209 210 # Load the interpreter. 211 self.interpreter = Interpreter(show_script=False, raise_relax_error=True) 212 self.interpreter.populate_self() 213 self.interpreter.on(verbose=False) 214 215 # Replacement user functions. 216 if user_fns: 217 for name in user_fns: 218 setattr(self.interpreter, name, user_fns[name]) 219 220 # Execute the protocol. 221 try: 222 # Loop over the models. 223 for self.diff_model in self.diff_model_list: 224 # Wait a little while between diffusion models. 225 sleep(1) 226 227 # Set the global model name. 228 status.auto_analysis[self.pipe_bundle].diff_model = self.diff_model 229 230 # Initialise the convergence data structures. 231 self.conv_data = Container() 232 self.conv_data.chi2 = [] 233 self.conv_data.models = [] 234 self.conv_data.diff_vals = [] 235 if self.diff_model == 'sphere': 236 self.conv_data.diff_params = ['tm'] 237 elif self.diff_model == 'oblate' or self.diff_model == 'prolate': 238 self.conv_data.diff_params = ['tm', 'Da', 'theta', 'phi'] 239 elif self.diff_model == 'ellipsoid': 240 self.conv_data.diff_params = ['tm', 'Da', 'Dr', 'alpha', 'beta', 'gamma'] 241 self.conv_data.spin_ids = [] 242 self.conv_data.mf_params = [] 243 self.conv_data.mf_vals = [] 244 245 # Execute the analysis for each diffusion model. 246 self.execute() 247 248 # Clean up. 249 finally: 250 # Finish and unlock execution. 251 status.auto_analysis[self.pipe_bundle].fin = True 252 status.current_analysis = None 253 status.exec_lock.release()
254 255
256 - def check_vars(self):
257 """Check that the user has set the variables correctly.""" 258 259 # Printout. 260 subtitle(file=sys.stdout, text="Auto-analysis variable checking") 261 262 # The pipe bundle. 263 if not isinstance(self.pipe_bundle, str): 264 raise RelaxError("The pipe bundle name '%s' is invalid." % self.pipe_bundle) 265 266 # The diff model. 267 valid_models = ['local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', 'final'] 268 for i in range(len(self.diff_model_list)): 269 if self.diff_model_list[i] not in valid_models: 270 raise RelaxError("The diff_model value '%s' is incorrectly set. It must be one of %s." % (self.diff_model_list[i], valid_models)) 271 272 # Model-free models. 273 mf_models = ['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9'] 274 local_tm_models = ['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9'] 275 if not isinstance(self.mf_models, list): 276 raise RelaxError("The self.mf_models user variable must be a list.") 277 if not isinstance(self.local_tm_models, list): 278 raise RelaxError("The self.local_tm_models user variable must be a list.") 279 for i in range(len(self.mf_models)): 280 if self.mf_models[i] not in mf_models: 281 raise RelaxError("The self.mf_models user variable '%s' is incorrectly set. It must be one of %s." % (self.mf_models, mf_models)) 282 for i in range(len(self.local_tm_models)): 283 if self.local_tm_models[i] not in local_tm_models: 284 raise RelaxError("The self.local_tm_models user variable '%s' is incorrectly set. It must be one of %s." % (self.local_tm_models, local_tm_models)) 285 286 # Sequence data. 287 if not exists_mol_res_spin_data(): 288 raise RelaxNoSequenceError(self.pipe_name) 289 290 # Relaxation data. 291 if not hasattr(cdp, 'ri_ids') or len(cdp.ri_ids) == 0: 292 raise RelaxNoRiError(ri_id) 293 294 # Insufficient data. 295 if len(cdp.ri_ids) <= 3: 296 raise RelaxError("Insufficient relaxation data, 4 or more data sets are essential for the execution of this script.") 297 298 # Spin vars. 299 for spin, spin_id in spin_loop(return_id=True): 300 # Skip deselected spins. 301 if not spin.select: 302 continue 303 304 # Test if the isotope type has been set. 305 if not hasattr(spin, 'isotope') or spin.isotope == None: 306 raise RelaxNoValueError("nuclear isotope type", spin_id=spin_id) 307 308 # Skip spins with no relaxation data. 309 if not hasattr(spin, 'ri_data') or spin.ri_data == None: 310 continue 311 312 # Test if the CSA value has been set. 313 if not hasattr(spin, 'csa') or spin.csa == None: 314 raise RelaxNoValueError("CSA", spin_id=spin_id) 315 316 # Interatomic vars. 317 for interatom in interatomic_loop(): 318 # Get the corresponding spins. 319 spin1 = return_spin(interatom.spin_id1) 320 spin2 = return_spin(interatom.spin_id2) 321 322 # Skip deselected spins. 323 if not spin1.select or not spin2.select: 324 continue 325 326 # Test if the interatomic distance has been set. 327 if not hasattr(interatom, 'r') or interatom.r == None: 328 raise RelaxNoValueError("interatomic distance", spin_id=interatom.spin_id1, spin_id2=interatom.spin_id2) 329 330 # Min vars. 331 if not isinstance(self.grid_inc, int): 332 raise RelaxError("The grid_inc user variable '%s' is incorrectly set. It should be an integer." % self.grid_inc) 333 if not isinstance(self.diff_tensor_grid_inc, dict): 334 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. It should be a dictionary." % self.diff_tensor_grid_inc) 335 for tensor in ['sphere', 'prolate', 'oblate', 'ellipsoid']: 336 if not tensor in self.diff_tensor_grid_inc: 337 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. It should contain the '%s' key." % (self.diff_tensor_grid_inc, tensor)) 338 if not isinstance(self.diff_tensor_grid_inc[tensor], int): 339 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. The value corresponding to the key '%s' should be an integer." % (self.diff_tensor_grid_inc, tensor)) 340 if not isinstance(self.min_algor, str): 341 raise RelaxError("The min_algor user variable '%s' is incorrectly set. It should be a string." % self.min_algor) 342 if not isinstance(self.mc_sim_num, int): 343 raise RelaxError("The mc_sim_num user variable '%s' is incorrectly set. It should be an integer." % self.mc_sim_num) 344 345 # Looping. 346 if not isinstance(self.conv_loop, bool): 347 raise RelaxError("The conv_loop user variable '%s' is incorrectly set. It should be one of the booleans True or False." % self.conv_loop)
348 349
350 - def convergence(self):
351 """Test for the convergence of the global model.""" 352 353 # Printout. 354 title(file=sys.stdout, text="Convergence testing") 355 356 # Maximum number of iterations reached. 357 if self.max_iter and self.round > self.max_iter: 358 print("Maximum number of global iterations reached. Terminating the protocol before convergence has been reached.") 359 return True 360 361 # Store the data of the current data pipe. 362 self.conv_data.chi2.append(cdp.chi2) 363 364 # Create a string representation of the model-free models of the current data pipe. 365 curr_models = '' 366 for spin in spin_loop(): 367 if hasattr(spin, 'model'): 368 if not spin.model == 'None': 369 curr_models = curr_models + spin.model 370 self.conv_data.models.append(curr_models) 371 372 # Store the diffusion tensor parameters. 373 self.conv_data.diff_vals.append([]) 374 for param in self.conv_data.diff_params: 375 # Get the parameter values. 376 self.conv_data.diff_vals[-1].append(getattr(cdp.diff_tensor, param)) 377 378 # Store the model-free parameters. 379 self.conv_data.mf_vals.append([]) 380 self.conv_data.mf_params.append([]) 381 self.conv_data.spin_ids.append([]) 382 for spin, spin_id in spin_loop(return_id=True): 383 # Skip spin systems with no 'params' object. 384 if not hasattr(spin, 'params'): 385 continue 386 387 # Add the spin ID, parameters, and empty value list. 388 self.conv_data.spin_ids[-1].append(spin_id) 389 self.conv_data.mf_params[-1].append([]) 390 self.conv_data.mf_vals[-1].append([]) 391 392 # Loop over the parameters. 393 for j in range(len(spin.params)): 394 # Get the parameters and values. 395 self.conv_data.mf_params[-1][-1].append(spin.params[j]) 396 self.conv_data.mf_vals[-1][-1].append(getattr(spin, spin.params[j].lower())) 397 398 # No need for tests. 399 if self.round == 1: 400 print("First round of optimisation, skipping the convergence tests.\n\n\n") 401 return False 402 403 # Loop over the iterations. 404 converged = False 405 for i in range(self.start_round, self.round - 1): 406 # Print out. 407 print("\n\n\n# Comparing the current iteration to iteration %i.\n" % (i+1)) 408 409 # Index. 410 index = i - self.start_round 411 412 # Chi-squared test. 413 print("Chi-squared test:") 414 print(" chi2 (iter %i): %s" % (i+1, self.conv_data.chi2[index])) 415 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.chi2[index])) 416 print(" chi2 (iter %i): %s" % (self.round, self.conv_data.chi2[-1])) 417 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.chi2[-1])) 418 print(" chi2 (difference): %s" % (self.conv_data.chi2[index] - self.conv_data.chi2[-1])) 419 if self.conv_data.chi2[index] == self.conv_data.chi2[-1]: 420 print(" The chi-squared value has converged.\n") 421 else: 422 print(" The chi-squared value has not converged.\n") 423 continue 424 425 # Identical model-free model test. 426 print("Identical model-free models test:") 427 if self.conv_data.models[index] == self.conv_data.models[-1]: 428 print(" The model-free models have converged.\n") 429 else: 430 print(" The model-free models have not converged.\n") 431 continue 432 433 # Identical diffusion tensor parameter value test. 434 print("Identical diffusion tensor parameter test:") 435 params_converged = True 436 for k in range(len(self.conv_data.diff_params)): 437 # Test if not identical. 438 if self.conv_data.diff_vals[index][k] != self.conv_data.diff_vals[-1][k]: 439 print(" Parameter: %s" % param) 440 print(" Value (iter %i): %s" % (i+1, self.conv_data.diff_vals[index][k])) 441 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.diff_vals[index][k])) 442 print(" Value (iter %i): %s" % (self.round, self.conv_data.diff_vals[-1][k])) 443 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.diff_vals[-1][k])) 444 print(" The diffusion parameters have not converged.\n") 445 params_converged = False 446 break 447 if not params_converged: 448 continue 449 print(" The diffusion tensor parameters have converged.\n") 450 451 # Identical model-free parameter value test. 452 print("\nIdentical model-free parameter test:") 453 if len(self.conv_data.spin_ids[index]) != len(self.conv_data.spin_ids[-1]): 454 print(" Different number of spins.") 455 continue 456 for j in range(len(self.conv_data.spin_ids[-1])): 457 # Loop over the parameters. 458 for k in range(len(self.conv_data.mf_params[-1][j])): 459 # Test if not identical. 460 if self.conv_data.mf_vals[index][j][k] != self.conv_data.mf_vals[-1][j][k]: 461 print(" Spin ID: %s" % self.conv_data.spin_ids[-1][j]) 462 print(" Parameter: %s" % self.conv_data.mf_params[-1][j][k]) 463 print(" Value (iter %i): %s" % (i+1, self.conv_data.mf_vals[index][j][k])) 464 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k])) 465 print(" Value (iter %i): %s" % (self.round, self.conv_data.mf_vals[-1][j][k])) 466 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k])) 467 print(" The model-free parameters have not converged.\n") 468 params_converged = False 469 break 470 if not params_converged: 471 continue 472 print(" The model-free parameters have converged.\n") 473 474 # Convergence. 475 converged = True 476 break 477 478 479 # Final printout. 480 ################## 481 482 print("\nConvergence:") 483 if converged: 484 # Update the status. 485 status.auto_analysis[self.pipe_bundle].convergence = True 486 487 # Print out. 488 print(" [ Yes ]") 489 490 # Return the termination condition. 491 return True 492 else: 493 # Print out. 494 print(" [ No ]") 495 496 # Return False to not terminate. 497 return False
498 499
500 - def determine_rnd(self, model=None):
501 """Function for returning the name of next round of optimisation.""" 502 503 # The base model directory. 504 base_dir = self.results_dir+sep+model 505 506 # Printout. 507 subtitle(file=sys.stdout, text="Determining the next round of optimisation") 508 print("%-30s %s" % ("Base model directory:", base_dir)) 509 510 # Catch if a file exists with the name of the directory. 511 if not isdir(base_dir) and access(base_dir, F_OK): 512 raise RelaxError("The base model directory '%s' is not usable as a file with the same name already exists." % base_dir) 513 514 # If no directory exists, set the round to 'init' or 0. 515 if not isdir(base_dir): 516 print("%-30s %i" % ("Round:", 0)) 517 print("The base directory does not exist.") 518 return 0 519 520 # Is the directory readable, writable, and executable. 521 if not access(base_dir, R_OK): 522 raise RelaxError("The base model directory '%s' is not readable." % base_dir) 523 if not access(base_dir, X_OK): 524 raise RelaxError("The base model directory '%s' is not executable." % base_dir) 525 526 # Get a list of all files in the directory model. 527 dir_list = listdir(base_dir) 528 529 # Set the round to 'init' or 0 if there is no directory called 'init'. 530 if 'init' not in dir_list: 531 print("%-30s %i" % ("Round:", 0)) 532 print("No 'init' directory present in the base directory.") 533 return 0 534 535 # Create a list of all files which begin with 'round_'. 536 rnd_dirs = [] 537 for file in dir_list: 538 if search('^round_', file): 539 rnd_dirs.append(file) 540 541 # Create a sorted list of integer round numbers. 542 numbers = [] 543 for dir in rnd_dirs: 544 try: 545 numbers.append(int(dir[6:])) 546 except: 547 pass 548 numbers.sort() 549 550 # No directories beginning with 'round_' exist, set the round to 1. 551 if not len(numbers): 552 print("%-30s %i" % ("Round:", 1)) 553 print("No directories beginning with 'round_' exist.") 554 return 1 555 556 # The highest number. 557 max_round = numbers[-1] 558 559 # Check that the opt/results file exists for the round (working backwards). 560 for i in range(max_round, -1, -1): 561 # Assume the round is complete. 562 complete_round = i 563 564 # The file root. 565 file_root = base_dir + sep + "round_%i" % i + sep + 'opt' + sep + 'results' 566 567 # Stop looping when the opt/results file is found. 568 if access(file_root + '.bz2', F_OK): 569 break 570 if access(file_root + '.gz', F_OK): 571 break 572 if access(file_root, F_OK): 573 break 574 575 # No round, so assume the initial state. 576 if complete_round == 0: 577 print("%-30s %i" % ("Round:", 0)) 578 print("No opt/results files can be found.") 579 return 0 580 581 # Determine the number for the next round (add 1 to the highest completed round). 582 print("%-30s %i" % ("Round:", complete_round + 1)) 583 return complete_round + 1
584 585
586 - def execute(self):
587 """Execute the protocol.""" 588 589 # MI - Local tm. 590 ################ 591 592 if self.diff_model == 'local_tm': 593 # Printout. 594 title(file=sys.stdout, text="Model MI - Local tm") 595 596 # Base directory to place files into. 597 self.base_dir = self.results_dir+'local_tm'+sep 598 599 # Sequential optimisation of all model-free models (function must be modified to suit). 600 self.multi_model(local_tm=True) 601 602 # Model selection. 603 self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic') 604 605 606 # Diffusion models MII to MV. 607 ############################# 608 609 elif self.diff_model == 'sphere' or self.diff_model == 'prolate' or self.diff_model == 'oblate' or self.diff_model == 'ellipsoid': 610 # Printout. 611 if self.diff_model == 'sphere': 612 title(file=sys.stdout, text="Model MII - Spherical diffusion") 613 elif self.diff_model == 'prolate': 614 title(file=sys.stdout, text="Model MIII - Prolate spheroidal diffusion") 615 elif self.diff_model == 'oblate': 616 title(file=sys.stdout, text="Model MIV - Oblate spheroidal diffusion") 617 elif self.diff_model == 'ellipsoid': 618 title(file=sys.stdout, text="Model MV - Ellipsoidal diffusion") 619 620 # No local_tm directory! 621 dir_list = listdir(self.results_dir) 622 if 'local_tm' not in dir_list: 623 raise RelaxError("The local_tm model must be optimised first.") 624 625 # The initial round of optimisation - not zero if calculations were interrupted. 626 self.start_round = self.determine_rnd(model=self.diff_model) 627 628 # Loop until convergence if conv_loop is set, otherwise just loop once. 629 # This looping could be made much cleaner by removing the dependence on the determine_rnd() function. 630 while True: 631 # Determine which round of optimisation to do (init, round_1, round_2, etc). 632 self.round = self.determine_rnd(model=self.diff_model) 633 status.auto_analysis[self.pipe_bundle].round = self.round 634 635 # Inital round of optimisation for diffusion models MII to MV. 636 if self.round == 0: 637 # Printout. 638 subtitle(file=sys.stdout, text="Initial round of optimisation") 639 640 # Base directory to place files into. 641 self.base_dir = self.results_dir+self.diff_model+sep+'init'+sep 642 643 # Run name. 644 name = self.name_pipe(self.diff_model) 645 646 # Create the data pipe (deleting the old one if it exists). 647 if has_pipe(name): 648 self.interpreter.pipe.delete(name) 649 self.interpreter.pipe.create(name, 'mf', bundle=self.pipe_bundle) 650 651 # Load the local tm diffusion model MI results. 652 self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic') 653 654 # Remove the tm parameter. 655 self.interpreter.model_free.remove_tm() 656 657 # Initialise the diffusion tensor. 658 if self.diff_model == 'sphere': 659 self.interpreter.diffusion_tensor.init(None, fixed=False) 660 inc = self.diff_tensor_grid_inc['sphere'] 661 elif self.diff_model == 'prolate': 662 self.interpreter.diffusion_tensor.init((None, None, None, None), spheroid_type='prolate', fixed=False) 663 inc = self.diff_tensor_grid_inc['prolate'] 664 elif self.diff_model == 'oblate': 665 self.interpreter.diffusion_tensor.init((None, None, None, None), spheroid_type='oblate', fixed=False) 666 inc = self.diff_tensor_grid_inc['oblate'] 667 elif self.diff_model == 'ellipsoid': 668 self.interpreter.diffusion_tensor.init((None, None, None, None, None, None), fixed=False) 669 inc = self.diff_tensor_grid_inc['ellipsoid'] 670 671 # Minimise just the diffusion tensor. 672 self.interpreter.fix('all_spins') 673 self.interpreter.minimise.grid_search(inc=inc) 674 self.interpreter.minimise.execute(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 675 676 # Write the results. 677 self.interpreter.results.write(file='results', dir=self.base_dir, force=True) 678 679 680 # Normal round of optimisation for diffusion models MII to MV. 681 else: 682 # Printout. 683 subtitle(file=sys.stdout, text="Round %i of optimisation" % self.round) 684 685 # Base directory to place files into. 686 self.base_dir = self.results_dir+self.diff_model + sep+'round_'+repr(self.round)+sep 687 688 # Load the optimised diffusion tensor from either the previous round. 689 self.load_tensor() 690 691 # Sequential optimisation of all model-free models (function must be modified to suit). 692 self.multi_model() 693 694 # Model selection. 695 self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic') 696 697 # Final optimisation of all diffusion and model-free parameters. 698 self.interpreter.fix('all', fixed=False) 699 700 # Minimise all parameters. 701 self.interpreter.minimise.execute(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 702 703 # Write the results. 704 dir = self.base_dir + 'opt' 705 self.interpreter.results.write(file='results', dir=dir, force=True) 706 707 # Test for convergence. 708 converged = self.convergence() 709 710 # Break out of the infinite while loop if automatic looping is not activated or if convergence has occurred. 711 if converged or not self.conv_loop: 712 break 713 714 # Unset the status. 715 status.auto_analysis[self.pipe_bundle].round = None 716 717 718 # Final run. 719 ############ 720 721 elif self.diff_model == 'final': 722 # Printout. 723 title(file=sys.stdout, text="Final run") 724 725 # Diffusion model selection. 726 ############################ 727 728 # Printout. 729 subtitle(file=sys.stdout, text="Diffusion model selection") 730 731 # The contents of the results directory. 732 dir_list = listdir(self.results_dir) 733 734 # Check that the minimal set of global diffusion models required for the protocol has been optimised. 735 min_models = ['local_tm', 'sphere'] 736 for model in min_models: 737 if model not in dir_list: 738 raise RelaxError("The minimum set of global diffusion models required for the protocol have not been optimised, the '%s' model results cannot be found." % model) 739 740 # Build a list of all global diffusion models optimised. 741 all_models = ['local_tm', 'sphere', 'prolate', 'oblate', 'ellipsoid'] 742 self.opt_models = [] 743 self.pipes = [] 744 for model in all_models: 745 if model in dir_list: 746 self.opt_models.append(model) 747 self.pipes.append(self.name_pipe(model)) 748 749 # Remove all temporary pipes used in this auto-analysis. 750 for name in pipe_names(bundle=self.pipe_bundle): 751 if name in self.pipes + self.mf_model_pipes + self.local_tm_model_pipes + [self.name_pipe('aic'), self.name_pipe('previous')]: 752 self.interpreter.pipe.delete(name) 753 754 # Create the local_tm data pipe. 755 self.interpreter.pipe.create(self.name_pipe('local_tm'), 'mf', bundle=self.pipe_bundle) 756 757 # Load the local tm diffusion model MI results. 758 self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic') 759 760 # Loop over models MII to MV. 761 for model in ['sphere', 'prolate', 'oblate', 'ellipsoid']: 762 # Skip missing models. 763 if model not in self.opt_models: 764 continue 765 766 # Determine which was the last round of optimisation for each of the models. 767 self.round = self.determine_rnd(model=model) - 1 768 769 # If no directories begining with 'round_' exist, the script has not been properly utilised! 770 if self.round < 1: 771 # Construct the name of the diffusion tensor. 772 name = model 773 if model == 'prolate' or model == 'oblate': 774 name = name + ' spheroid' 775 776 # Throw an error to prevent misuse of the script. 777 raise RelaxError("Multiple rounds of optimisation of the " + name + " (between 8 to 15) are required for the proper execution of this script.") 778 779 # Create the data pipe. 780 self.interpreter.pipe.create(self.name_pipe(model), 'mf', bundle=self.pipe_bundle) 781 782 # Load the diffusion model results. 783 self.interpreter.results.read(file='results', dir=self.results_dir+model + sep+'round_'+repr(self.round)+sep+'opt') 784 785 # Model selection between MI to MV. 786 self.model_selection(modsel_pipe=self.name_pipe('final'), write_flag=False) 787 788 789 # Monte Carlo simulations. 790 ########################## 791 792 # Printout. 793 subtitle(file=sys.stdout, text="Monte Carlo simulations") 794 795 # Fix the diffusion tensor, if it exists. 796 if hasattr(get_pipe(self.name_pipe('final')), 'diff_tensor'): 797 self.interpreter.fix('diff') 798 799 # Simulations. 800 self.interpreter.monte_carlo.setup(number=self.mc_sim_num) 801 self.interpreter.monte_carlo.create_data() 802 self.interpreter.monte_carlo.initial_values() 803 self.interpreter.minimise.execute(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 804 self.interpreter.eliminate() 805 self.interpreter.monte_carlo.error_analysis() 806 807 808 # Write the final results. 809 ########################## 810 811 # Printout. 812 subtitle(file=sys.stdout, text="Writing the final results") 813 814 # Create results files and plots of the data. 815 self.write_results() 816 817 818 # Unknown script behaviour. 819 ########################### 820 821 else: 822 raise RelaxError("Unknown diffusion model, change the value of 'self.diff_model'")
823 824
825 - def load_tensor(self):
826 """Function for loading the optimised diffusion tensor.""" 827 828 # Create the data pipe for the previous data (deleting the old data pipe first if necessary). 829 if has_pipe(self.name_pipe('previous')): 830 self.interpreter.pipe.delete(self.name_pipe('previous')) 831 self.interpreter.pipe.create(self.name_pipe('previous'), 'mf', bundle=self.pipe_bundle) 832 833 # Load the optimised diffusion tensor from the initial round. 834 if self.round == 1: 835 self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'init') 836 837 # Load the optimised diffusion tensor from the previous round. 838 else: 839 self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'round_'+repr(self.round-1)+sep+'opt')
840 841
842 - def model_selection(self, modsel_pipe=None, dir=None, write_flag=True):
843 """Model selection function.""" 844 845 # Model selection (delete the model selection pipe if it already exists). 846 if has_pipe(modsel_pipe): 847 self.interpreter.pipe.delete(modsel_pipe) 848 self.interpreter.model_selection(method='AIC', modsel_pipe=modsel_pipe, bundle=self.pipe_bundle, pipes=self.pipes) 849 850 # Write the results. 851 if write_flag: 852 self.interpreter.results.write(file='results', dir=dir, force=True)
853 854
855 - def multi_model(self, local_tm=False):
856 """Function for optimisation of all model-free models.""" 857 858 # Set the data pipe names (also the names of preset model-free models). 859 if local_tm: 860 models = self.local_tm_models 861 self.pipes = self.local_tm_models 862 else: 863 models = self.mf_models 864 self.pipes = [] 865 for i in range(len(models)): 866 self.pipes.append(self.name_pipe(models[i])) 867 868 # Loop over the data pipes. 869 for i in range(len(models)): 870 # Place the model name into the status container. 871 status.auto_analysis[self.pipe_bundle].current_model = models[i] 872 873 # Create the data pipe (by copying). 874 if has_pipe(self.pipes[i]): 875 self.interpreter.pipe.delete(self.pipes[i]) 876 self.interpreter.pipe.copy(self.pipe_name, self.pipes[i], bundle_to=self.pipe_bundle) 877 self.interpreter.pipe.switch(self.pipes[i]) 878 879 # Copy the diffusion tensor from the 'opt' data pipe and prevent it from being minimised. 880 if not local_tm: 881 self.interpreter.diffusion_tensor.copy(self.name_pipe('previous')) 882 self.interpreter.fix('diff') 883 884 # Select the model-free model. 885 self.interpreter.model_free.select_model(model=models[i]) 886 887 # Minimise. 888 self.interpreter.minimise.grid_search(inc=self.grid_inc) 889 self.interpreter.minimise.execute(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 890 891 # Model elimination. 892 self.interpreter.eliminate() 893 894 # Write the results. 895 dir = self.base_dir + models[i] 896 self.interpreter.results.write(file='results', dir=dir, force=True) 897 898 # Unset the status. 899 status.auto_analysis[self.pipe_bundle].current_model = None
900 901
902 - def name_pipe(self, prefix):
903 """Generate a unique name for the data pipe. 904 905 @param prefix: The prefix of the data pipe name. 906 @type prefix: str 907 """ 908 909 # The unique pipe name. 910 name = "%s - %s" % (prefix, self.pipe_bundle) 911 912 # Return the name. 913 return name
914 915
916 - def status_setup(self):
917 """Initialise the status object.""" 918 919 # Initialise the status object for this auto-analysis. 920 status.init_auto_analysis(self.pipe_bundle, type='dauvergne_protocol') 921 status.current_analysis = self.pipe_bundle 922 923 # The global diffusion model. 924 status.auto_analysis[self.pipe_bundle].diff_model = None 925 926 # The round of optimisation, i.e. the global iteration. 927 status.auto_analysis[self.pipe_bundle].round = None 928 929 # The list of model-free local tm models for optimisation, i.e. the global iteration. 930 status.auto_analysis[self.pipe_bundle].local_tm_models = self.local_tm_models 931 932 # The list of model-free models for optimisation, i.e. the global iteration. 933 status.auto_analysis[self.pipe_bundle].mf_models = self.mf_models 934 935 # The current model-free model. 936 status.auto_analysis[self.pipe_bundle].current_model = None 937 938 # The maximum number of iterations of the global model. 939 status.auto_analysis[self.pipe_bundle].max_iter = self.max_iter 940 941 # The convergence of the global model. 942 status.auto_analysis[self.pipe_bundle].convergence = False
943 944
945 - def write_results(self):
946 """Create Grace plots of the final model-free results.""" 947 948 # Save the results file. 949 dir = self.write_results_dir + 'final' 950 self.interpreter.results.write(file='results', dir=dir, force=True) 951 952 # The Grace plots. 953 dir = self.write_results_dir + 'final' + sep + 'grace' 954 self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2', file='s2.agr', dir=dir, force=True) 955 self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2f', file='s2f.agr', dir=dir, force=True) 956 self.interpreter.grace.write(x_data_type='res_num', y_data_type='s2s', file='s2s.agr', dir=dir, force=True) 957 self.interpreter.grace.write(x_data_type='res_num', y_data_type='te', file='te.agr', dir=dir, force=True) 958 self.interpreter.grace.write(x_data_type='res_num', y_data_type='tf', file='tf.agr', dir=dir, force=True) 959 self.interpreter.grace.write(x_data_type='res_num', y_data_type='ts', file='ts.agr', dir=dir, force=True) 960 self.interpreter.grace.write(x_data_type='res_num', y_data_type='rex', file='rex.agr', dir=dir, force=True) 961 self.interpreter.grace.write(x_data_type='s2', y_data_type='te', file='s2_vs_te.agr', dir=dir, force=True) 962 self.interpreter.grace.write(x_data_type='s2', y_data_type='rex', file='s2_vs_rex.agr', dir=dir, force=True) 963 self.interpreter.grace.write(x_data_type='te', y_data_type='rex', file='te_vs_rex.agr', dir=dir, force=True) 964 965 # Write the values to text files. 966 dir = self.write_results_dir + 'final' 967 self.interpreter.value.write(param='s2', file='s2.txt', dir=dir, force=True) 968 self.interpreter.value.write(param='s2f', file='s2f.txt', dir=dir, force=True) 969 self.interpreter.value.write(param='s2s', file='s2s.txt', dir=dir, force=True) 970 self.interpreter.value.write(param='te', file='te.txt', dir=dir, force=True) 971 self.interpreter.value.write(param='tf', file='tf.txt', dir=dir, force=True) 972 self.interpreter.value.write(param='ts', file='ts.txt', dir=dir, force=True) 973 self.interpreter.value.write(param='rex', file='rex.txt', dir=dir, force=True) 974 self.interpreter.value.write(param='local_tm', file='local_tm.txt', dir=dir, force=True) 975 frqs = get_frequencies() 976 for i in range(len(frqs)): 977 comment = "This is the Rex value with units rad.s^-1 scaled to a magnetic field strength of %s MHz." % (frqs[i]/1e6) 978 self.interpreter.value.write(param='rex', file='rex_%s.txt'%int(frqs[i]/1e6), dir=dir, scaling=(2.0*pi*frqs[i])**2, comment=comment, force=True) 979 980 # Create the PyMOL macros. 981 dir = self.write_results_dir + 'final' + sep + 'pymol' 982 self.interpreter.pymol.macro_write(data_type='s2', dir=dir, force=True) 983 self.interpreter.pymol.macro_write(data_type='s2f', dir=dir, force=True) 984 self.interpreter.pymol.macro_write(data_type='s2s', dir=dir, force=True) 985 self.interpreter.pymol.macro_write(data_type='amp_fast', dir=dir, force=True) 986 self.interpreter.pymol.macro_write(data_type='amp_slow', dir=dir, force=True) 987 self.interpreter.pymol.macro_write(data_type='te', dir=dir, force=True) 988 self.interpreter.pymol.macro_write(data_type='tf', dir=dir, force=True) 989 self.interpreter.pymol.macro_write(data_type='ts', dir=dir, force=True) 990 self.interpreter.pymol.macro_write(data_type='time_fast', dir=dir, force=True) 991 self.interpreter.pymol.macro_write(data_type='time_slow', dir=dir, force=True) 992 self.interpreter.pymol.macro_write(data_type='rex', dir=dir, force=True) 993 994 # Create the Molmol macros. 995 dir = self.write_results_dir + 'final' + sep + 'molmol' 996 self.interpreter.molmol.macro_write(data_type='s2', dir=dir, force=True) 997 self.interpreter.molmol.macro_write(data_type='s2f', dir=dir, force=True) 998 self.interpreter.molmol.macro_write(data_type='s2s', dir=dir, force=True) 999 self.interpreter.molmol.macro_write(data_type='amp_fast', dir=dir, force=True) 1000 self.interpreter.molmol.macro_write(data_type='amp_slow', dir=dir, force=True) 1001 self.interpreter.molmol.macro_write(data_type='te', dir=dir, force=True) 1002 self.interpreter.molmol.macro_write(data_type='tf', dir=dir, force=True) 1003 self.interpreter.molmol.macro_write(data_type='ts', dir=dir, force=True) 1004 self.interpreter.molmol.macro_write(data_type='time_fast', dir=dir, force=True) 1005 self.interpreter.molmol.macro_write(data_type='time_slow', dir=dir, force=True) 1006 self.interpreter.molmol.macro_write(data_type='rex', dir=dir, force=True) 1007 1008 # Create a diffusion tensor representation of the tensor, if a PDB file is present and the local tm global model has not been selected. 1009 if hasattr(cdp, 'structure') and hasattr(cdp, 'diff_tensor'): 1010 dir = self.write_results_dir + 'final' 1011 self.interpreter.structure.create_diff_tensor_pdb(file="tensor.pdb", dir=dir, force=True)
1012 1013 1014
1015 -class Container:
1016 """Empty container for data storage."""
1017