Package auto_analyses :: Module dauvergne_protocol
[hide private]
[frames] | no frames]

Source Code for Module auto_analyses.dauvergne_protocol

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2004-2013 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Python module imports. 
 23  from math import pi 
 24  from os import F_OK, access, getcwd, listdir, sep 
 25  from re import search 
 26  from time import sleep 
 27   
 28  # relax module imports. 
 29  from float import floatAsByteArray 
 30  from info import Info_box; info = Info_box() 
 31  from generic_fns.interatomic import interatomic_loop 
 32  from generic_fns.mol_res_spin import exists_mol_res_spin_data, generate_spin_id, return_spin, spin_index_loop, spin_loop 
 33  from generic_fns.pipes import cdp_name, get_pipe, has_pipe, pipe_names, switch 
 34  from generic_fns import frq, selection 
 35  from prompt.interpreter import Interpreter 
 36  from relax_errors import RelaxError, RelaxNoSequenceError, RelaxNoValueError 
 37  from relax_io import DummyFileObject 
 38  from relax_string import LIST, PARAGRAPH, SECTION, SUBSECTION, TITLE, to_docstring 
 39  from status import Status; status = Status() 
 40   
 41   
 42  doc = [ 
 43          [TITLE, "Automatic analysis for black-box model-free results."], 
 44          [PARAGRAPH, "The dauvergne_protocol auto-analysis is designed for those who appreciate black-boxes or those who appreciate complex code.  Importantly, data at multiple magnetic field strengths is essential for this analysis.  If you would like to change how model-free analysis is performed, the code in the file auto_analyses/dauvergne_protocol.py in the base relax directory can be copied and modified as needed and used with the relax script interface.  This file is simply a complex relax script.  For a description of object-oriented coding in python using classes, functions/methods, self, etc., please see the python tutorial."], 
 45   
 46          [SECTION, "References"], 
 47   
 48          [SUBSECTION, "Auto-analysis primary reference"], 
 49          [PARAGRAPH, "The model-free optimisation methodology herein is that of:"], 
 50          [LIST, info.bib['dAuvergneGooley08b'].cite_short()], 
 51   
 52          [SUBSECTION, "Techniques used in the auto-analysis"], 
 53          [PARAGRAPH, "Other references for features of this dauvergne_protocol auto-analysis include model-free model selection using Akaike's Information Criterion:"], 
 54          [LIST, info.bib['dAuvergneGooley03'].cite_short()], 
 55          [PARAGRAPH, "The elimination of failed model-free models and Monte Carlo simulations:"], 
 56          [LIST, info.bib['dAuvergneGooley06'].cite_short()], 
 57          [PARAGRAPH, "Significant model-free optimisation improvements:"], 
 58          [LIST, info.bib['dAuvergneGooley08a'].cite_short()], 
 59          [PARAGRAPH, "Rather than searching for the lowest chi-squared value, this auto-analysis searches for the model with the lowest AIC criterion.  This complex multi-universe, multi-dimensional problem is formulated, using set theory, as the universal solution:"], 
 60          [LIST, info.bib['dAuvergneGooley07'].cite_short()], 
 61          [PARAGRAPH, "The basic three references for the original and extended model-free theories are:"], 
 62          [LIST, info.bib['LipariSzabo82a'].cite_short()], 
 63          [LIST, info.bib['LipariSzabo82b'].cite_short()], 
 64          [LIST, info.bib['Clore90'].cite_short()], 
 65   
 66          [SECTION, "How to use this auto-analysis"], 
 67          [PARAGRAPH, "The five diffusion models used in this auto-analysis are:"], 
 68          [LIST, "Model I   (MI)   - Local tm."], 
 69          [LIST, "Model II  (MII)  - Sphere."], 
 70          [LIST, "Model III (MIII) - Prolate spheroid."], 
 71          [LIST, "Model IV  (MIV)  - Oblate spheroid."], 
 72          [LIST, "Model V   (MV)   - Ellipsoid."], 
 73          [PARAGRAPH, "If using the script-based user interface (UI), changing the value of the variable diff_model will determine the behaviour of this auto-analysis.  Model I must be optimised prior to any of the other diffusion models, while the Models II to V can be optimised in any order.  To select the various models, set the variable diff_model to the following strings:"], 
 74          [LIST, "MI   - 'local_tm'"], 
 75          [LIST, "MII  - 'sphere'"], 
 76          [LIST, "MIII - 'prolate'"], 
 77          [LIST, "MIV  - 'oblate'"], 
 78          [LIST, "MV   - 'ellipsoid'"], 
 79          [PARAGRAPH, "This approach has the advantage of eliminating the need for an initial estimate of a global diffusion tensor and removing all the problems associated with the initial estimate."], 
 80          [PARAGRAPH, "It is important that the number of parameters in a model does not exceed the number of relaxation data sets for that spin.  If this is the case, the list of models in the mf_models and local_tm_models variables will need to be trimmed."], 
 81   
 82          [SUBSECTION, "Model I - Local tm"], 
 83          [PARAGRAPH, "This will optimise the diffusion model whereby all spin of the molecule have a local tm value, i.e. there is no global diffusion tensor.  This model needs to be optimised prior to optimising any of the other diffusion models.  Each spin is fitted to the multiple model-free models separately, where the parameter tm is included in each model."], 
 84          [PARAGRAPH, "AIC model selection is used to select the models for each spin."], 
 85   
 86          [SUBSECTION, "Model II - Sphere"], 
 87          [PARAGRAPH, "This will optimise the isotropic diffusion model.  Multiple steps are required, an initial optimisation of the diffusion tensor, followed by a repetitive optimisation until convergence of the diffusion tensor.  In the relax script UI each of these steps requires this script to be rerun, unless the conv_loop flag is True.  In the GUI (graphical user interface), the procedure is repeated automatically until convergence.  For the initial optimisation, which will be placed in the directory './sphere/init/', the following steps are used:"], 
 88          [PARAGRAPH, "The model-free models and parameter values for each spin are set to those of diffusion model MI."], 
 89          [PARAGRAPH, "The local tm parameter is removed from the models."], 
 90          [PARAGRAPH, "The model-free parameters are fixed and a global spherical diffusion tensor is minimised."], 
 91          [PARAGRAPH, "For the repetitive optimisation, each minimisation is named from 'round_1' onwards.  The initial 'round_1' optimisation will extract the diffusion tensor from the results file in './sphere/init/', and the results will be placed in the directory './sphere/round_1/'.  Each successive round will take the diffusion tensor from the previous round.  The following steps are used:"], 
 92          [PARAGRAPH, "The global diffusion tensor is fixed and the multiple model-free models are fitted to each spin."], 
 93          [PARAGRAPH, "AIC model selection is used to select the models for each spin."], 
 94          [PARAGRAPH, "All model-free and diffusion parameters are allowed to vary and a global optimisation of all parameters is carried out."], 
 95   
 96          [SUBSECTION, "Model III - Prolate spheroid"], 
 97          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that an axially symmetric diffusion tensor with Da >= 0 is used.  The base directory containing all the results is './prolate/'."], 
 98   
 99          [SUBSECTION, "Model IV - Oblate spheroid"], 
100          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that an axially symmetric diffusion tensor with Da <= 0 is used.  The base directory containing all the results is './oblate/'."], 
101   
102          [SUBSECTION, "Model V - Ellipsoid"], 
103          [PARAGRAPH, "The methods used are identical to those of diffusion model MII, except that a fully anisotropic diffusion tensor is used (also known as rhombic or asymmetric diffusion).  The base directory is './ellipsoid/'."], 
104   
105          [SUBSECTION, "Final run"], 
106          [PARAGRAPH, "Once all the diffusion models have converged, the final run can be executed.  This is done by setting the variable diff_model to 'final'.  This consists of two steps, diffusion tensor model selection, and Monte Carlo simulations.  Firstly AIC model selection is used to select between the diffusion tensor models.  Monte Carlo simulations are then run solely on this selected diffusion model.  Minimisation of the model is bypassed as it is assumed that the model is already fully optimised (if this is not the case the final run is not yet appropriate)."], 
107          [PARAGRAPH, "The final black-box model-free results will be placed in the file 'final/results'."] 
108  ] 
109   
110   
111  # Build the module docstring. 
112  __doc__ = to_docstring(doc) 
113   
114   
115   
116 -class dAuvergne_protocol:
117 """The model-free auto-analysis.""" 118 119 # Some class variables. 120 opt_func_tol = 1e-25 121 opt_max_iterations = int(1e7) 122
123 - def __init__(self, pipe_name=None, pipe_bundle=None, results_dir=None, write_results_dir=None, diff_model=None, mf_models=['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9'], local_tm_models=['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9'], grid_inc=11, diff_tensor_grid_inc={'sphere': 11, 'prolate': 11, 'oblate': 11, 'ellipsoid': 6}, min_algor='newton', mc_sim_num=500, max_iter=None, user_fns=None, conv_loop=True):
124 """Perform the full model-free analysis protocol of d'Auvergne and Gooley, 2008b. 125 126 @keyword pipe_name: The name of the data pipe containing the sequence info. This data pipe should have all values set including the CSA value, the bond length, the heteronucleus name and proton name. It should also have all relaxation data loaded. 127 @type pipe_name: str 128 @keyword pipe_bundle: The data pipe bundle to associate all spawned data pipes with. 129 @type pipe_bundle: str 130 @keyword results_dir: The directory where optimisation results will read from. Results will also be saved to this directory if the write_results_dir argument is not given. 131 @type results_dir: str 132 @keyword write_results_dir: The directory where optimisation results will be saved in. If None, it will default to the value of the results_dir argument. This is mainly used for debugging. 133 @type write_results_dir: str or None 134 @keyword diff_model: The global diffusion model to optimise. This can be one of 'local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', or 'final'. If all or a subset of these are supplied as a list, then these will be automatically looped over and calculated. 135 @type diff_model: str or list of str 136 @keyword mf_models: The model-free models. 137 @type mf_models: list of str 138 @keyword local_tm_models: The model-free models. 139 @type local_tm_models: list of str 140 @keyword grid_inc: The grid search size (the number of increments per dimension). 141 @type grid_inc: int 142 @keyword diff_tensor_grid_inc: A list of grid search sizes for the optimisation of the sphere, prolate spheroid, oblate spheroid, and ellipsoid. 143 @type diff_tensor_grid_inc: list of int 144 @keyword min_algor: The minimisation algorithm (in most cases this should not be changed). 145 @type min_algor: str 146 @keyword mc_sim_num: The number of Monte Carlo simulations to be used for error analysis at the end of the analysis. 147 @type mc_sim_num: int 148 @keyword max_iter: The maximum number of iterations for the global iteration. Set to None, then the algorithm iterates until convergence. 149 @type max_iter: int or None. 150 @keyword user_fns: A dictionary of replacement user functions. These will overwrite the standard user functions. The key should be the name of the user function or user function class and the value should be the function or class instance. 151 @type user_fns: dict 152 @keyword conv_loop: Automatic looping over all rounds until convergence. 153 @type conv_loop: bool 154 """ 155 156 # Execution lock. 157 status.exec_lock.acquire(pipe_bundle, mode='auto-analysis') 158 159 # Store the args. 160 self.pipe_name = pipe_name 161 self.pipe_bundle = pipe_bundle 162 self.mf_models = mf_models 163 self.local_tm_models = local_tm_models 164 self.grid_inc = grid_inc 165 self.diff_tensor_grid_inc = diff_tensor_grid_inc 166 self.min_algor = min_algor 167 self.mc_sim_num = mc_sim_num 168 self.max_iter = max_iter 169 self.conv_loop = conv_loop 170 171 # The model-free data pipe names. 172 self.mf_model_pipes = [] 173 for i in range(len(self.mf_models)): 174 self.mf_model_pipes.append(self.name_pipe(self.mf_models[i])) 175 self.local_tm_model_pipes = [] 176 for i in range(len(self.local_tm_models)): 177 self.local_tm_model_pipes.append(self.name_pipe(self.local_tm_models[i])) 178 179 # The diffusion models. 180 if isinstance(diff_model, list): 181 self.diff_model_list = diff_model 182 else: 183 self.diff_model_list = [diff_model] 184 185 # Project directory (i.e. directory containing the model-free model results and the newly generated files) 186 if results_dir: 187 self.results_dir = results_dir + sep 188 else: 189 self.results_dir = getcwd() + sep 190 if write_results_dir: 191 self.write_results_dir = write_results_dir + sep 192 else: 193 self.write_results_dir = self.results_dir 194 195 # Data checks. 196 self.check_vars() 197 198 # Set the data pipe to the current data pipe. 199 if self.pipe_name != cdp_name(): 200 switch(self.pipe_name) 201 202 # Some info for the status. 203 self.status_setup() 204 205 # Load the interpreter. 206 self.interpreter = Interpreter(show_script=False, quit=False, raise_relax_error=True) 207 self.interpreter.populate_self() 208 self.interpreter.on(verbose=False) 209 210 # Replacement user functions. 211 if user_fns: 212 for name in user_fns: 213 setattr(self.interpreter, name, user_fns[name]) 214 215 # Execute the protocol. 216 try: 217 # Loop over the models. 218 for self.diff_model in self.diff_model_list: 219 # Wait a little while between diffusion models. 220 sleep(1) 221 222 # Set the global model name. 223 status.auto_analysis[self.pipe_bundle].diff_model = self.diff_model 224 225 # Initialise the convergence data structures. 226 self.conv_data = Container() 227 self.conv_data.chi2 = [] 228 self.conv_data.models = [] 229 self.conv_data.diff_vals = [] 230 if self.diff_model == 'sphere': 231 self.conv_data.diff_params = ['tm'] 232 elif self.diff_model == 'oblate' or self.diff_model == 'prolate': 233 self.conv_data.diff_params = ['tm', 'Da', 'theta', 'phi'] 234 elif self.diff_model == 'ellipsoid': 235 self.conv_data.diff_params = ['tm', 'Da', 'Dr', 'alpha', 'beta', 'gamma'] 236 self.conv_data.spin_ids = [] 237 self.conv_data.mf_params = [] 238 self.conv_data.mf_vals = [] 239 240 # Execute the analysis for each diffusion model. 241 self.execute() 242 243 # Clean up. 244 finally: 245 # Finish and unlock execution. 246 status.auto_analysis[self.pipe_bundle].fin = True 247 status.current_analysis = None 248 status.exec_lock.release()
249 250
251 - def check_vars(self):
252 """Check that the user has set the variables correctly.""" 253 254 # The pipe bundle. 255 if not isinstance(self.pipe_bundle, str): 256 raise RelaxError("The pipe bundle name '%s' is invalid." % self.pipe_bundle) 257 258 # The diff model. 259 valid_models = ['local_tm', 'sphere', 'oblate', 'prolate', 'ellipsoid', 'final'] 260 for i in range(len(self.diff_model_list)): 261 if self.diff_model_list[i] not in valid_models: 262 raise RelaxError("The diff_model value '%s' is incorrectly set. It must be one of %s." % (self.diff_model_list[i], valid_models)) 263 264 # Model-free models. 265 mf_models = ['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9'] 266 local_tm_models = ['tm0', 'tm1', 'tm2', 'tm3', 'tm4', 'tm5', 'tm6', 'tm7', 'tm8', 'tm9'] 267 if not isinstance(self.mf_models, list): 268 raise RelaxError("The self.mf_models user variable must be a list.") 269 if not isinstance(self.local_tm_models, list): 270 raise RelaxError("The self.local_tm_models user variable must be a list.") 271 for i in range(len(self.mf_models)): 272 if self.mf_models[i] not in mf_models: 273 raise RelaxError("The self.mf_models user variable '%s' is incorrectly set. It must be one of %s." % (self.mf_models, mf_models)) 274 for i in range(len(self.local_tm_models)): 275 if self.local_tm_models[i] not in local_tm_models: 276 raise RelaxError("The self.local_tm_models user variable '%s' is incorrectly set. It must be one of %s." % (self.local_tm_models, local_tm_models)) 277 278 # Sequence data. 279 if not exists_mol_res_spin_data(): 280 raise RelaxNoSequenceError(self.pipe_name) 281 282 # Relaxation data. 283 if not hasattr(cdp, 'ri_ids') or len(cdp.ri_ids) == 0: 284 raise RelaxNoRiError(ri_id) 285 286 # Insufficient data. 287 if len(cdp.ri_ids) <= 3: 288 raise RelaxError("Insufficient relaxation data, 4 or more data sets are essential for the execution of this script.") 289 290 # Spin vars. 291 for spin, spin_id in spin_loop(return_id=True): 292 # Skip deselected spins. 293 if not spin.select: 294 continue 295 296 # Test if the isotope type has been set. 297 if not hasattr(spin, 'isotope') or spin.isotope == None: 298 raise RelaxNoValueError("nuclear isotope type", spin_id=spin_id) 299 300 # Skip spins with no relaxation data. 301 if not hasattr(spin, 'ri_data') or spin.ri_data == None: 302 continue 303 304 # Test if the CSA value has been set. 305 if not hasattr(spin, 'csa') or spin.csa == None: 306 raise RelaxNoValueError("CSA", spin_id=spin_id) 307 308 # Interatomic vars. 309 for interatom in interatomic_loop(): 310 # Get the corresponding spins. 311 spin1 = return_spin(interatom.spin_id1) 312 spin2 = return_spin(interatom.spin_id2) 313 314 # Skip deselected spins. 315 if not spin1.select or not spin2.select: 316 continue 317 318 # Test if the interatomic distance has been set. 319 if not hasattr(interatom, 'r') or interatom.r == None: 320 raise RelaxNoValueError("interatomic distance", spin_id=interatom.spin_id1, spin_id2=interatom.spin_id2) 321 322 # Min vars. 323 if not isinstance(self.grid_inc, int): 324 raise RelaxError("The grid_inc user variable '%s' is incorrectly set. It should be an integer." % self.grid_inc) 325 if not isinstance(self.diff_tensor_grid_inc, dict): 326 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. It should be a dictionary." % self.diff_tensor_grid_inc) 327 for tensor in ['sphere', 'prolate', 'oblate', 'ellipsoid']: 328 if not tensor in self.diff_tensor_grid_inc: 329 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. It should contain the '%s' key." % (self.diff_tensor_grid_inc, tensor)) 330 if not isinstance(self.diff_tensor_grid_inc[tensor], int): 331 raise RelaxError("The diff_tensor_grid_inc user variable '%s' is incorrectly set. The value corresponding to the key '%s' should be an integer." % (self.diff_tensor_grid_inc, tensor)) 332 if not isinstance(self.min_algor, str): 333 raise RelaxError("The min_algor user variable '%s' is incorrectly set. It should be a string." % self.min_algor) 334 if not isinstance(self.mc_sim_num, int): 335 raise RelaxError("The mc_sim_num user variable '%s' is incorrectly set. It should be an integer." % self.mc_sim_num) 336 337 # Looping. 338 if not isinstance(self.conv_loop, bool): 339 raise RelaxError("The conv_loop user variable '%s' is incorrectly set. It should be one of the booleans True or False." % self.conv_loop)
340 341
342 - def convergence(self):
343 """Test for the convergence of the global model.""" 344 345 # Print out. 346 print("\n\n\n") 347 print("#####################") 348 print("# Convergence tests #") 349 print("#####################\n") 350 351 # Maximum number of iterations reached. 352 if self.max_iter and self.round > self.max_iter: 353 print("Maximum number of global iterations reached. Terminating the protocol before convergence has been reached.") 354 return True 355 356 # Store the data of the current data pipe. 357 self.conv_data.chi2.append(cdp.chi2) 358 359 # Create a string representation of the model-free models of the current data pipe. 360 curr_models = '' 361 for spin in spin_loop(): 362 if hasattr(spin, 'model'): 363 if not spin.model == 'None': 364 curr_models = curr_models + spin.model 365 self.conv_data.models.append(curr_models) 366 367 # Store the diffusion tensor parameters. 368 self.conv_data.diff_vals.append([]) 369 for param in self.conv_data.diff_params: 370 # Get the parameter values. 371 self.conv_data.diff_vals[-1].append(getattr(cdp.diff_tensor, param)) 372 373 # Store the model-free parameters. 374 self.conv_data.mf_vals.append([]) 375 self.conv_data.mf_params.append([]) 376 self.conv_data.spin_ids.append([]) 377 for spin, spin_id in spin_loop(return_id=True): 378 # Skip spin systems with no 'params' object. 379 if not hasattr(spin, 'params'): 380 continue 381 382 # Add the spin ID, parameters, and empty value list. 383 self.conv_data.spin_ids[-1].append(spin_id) 384 self.conv_data.mf_params[-1].append([]) 385 self.conv_data.mf_vals[-1].append([]) 386 387 # Loop over the parameters. 388 for j in range(len(spin.params)): 389 # Get the parameters and values. 390 self.conv_data.mf_params[-1][-1].append(spin.params[j]) 391 self.conv_data.mf_vals[-1][-1].append(getattr(spin, spin.params[j].lower())) 392 393 # No need for tests. 394 if self.round == 1: 395 print("First round of optimisation, skipping the convergence tests.\n\n\n") 396 return False 397 398 # Loop over the iterations. 399 converged = False 400 for i in range(self.start_round, self.round - 1): 401 # Print out. 402 print("\n\n\n# Comparing the current iteration to iteration %i.\n" % (i+1)) 403 404 # Index. 405 index = i - self.start_round 406 407 # Chi-squared test. 408 print("Chi-squared test:") 409 print(" chi2 (iter %i): %s" % (i+1, self.conv_data.chi2[index])) 410 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.chi2[index])) 411 print(" chi2 (iter %i): %s" % (self.round, self.conv_data.chi2[-1])) 412 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.chi2[-1])) 413 print(" chi2 (difference): %s" % (self.conv_data.chi2[index] - self.conv_data.chi2[-1])) 414 if self.conv_data.chi2[index] == self.conv_data.chi2[-1]: 415 print(" The chi-squared value has converged.\n") 416 else: 417 print(" The chi-squared value has not converged.\n") 418 continue 419 420 # Identical model-free model test. 421 print("Identical model-free models test:") 422 if self.conv_data.models[index] == self.conv_data.models[-1]: 423 print(" The model-free models have converged.\n") 424 else: 425 print(" The model-free models have not converged.\n") 426 continue 427 428 # Identical diffusion tensor parameter value test. 429 print("Identical diffusion tensor parameter test:") 430 params_converged = True 431 for k in range(len(self.conv_data.diff_params)): 432 # Test if not identical. 433 if self.conv_data.diff_vals[index][k] != self.conv_data.diff_vals[-1][k]: 434 print(" Parameter: %s" % param) 435 print(" Value (iter %i): %s" % (i+1, self.conv_data.diff_vals[index][k])) 436 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.diff_vals[index][k])) 437 print(" Value (iter %i): %s" % (self.round, self.conv_data.diff_vals[-1][k])) 438 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.diff_vals[-1][k])) 439 print(" The diffusion parameters have not converged.\n") 440 params_converged = False 441 break 442 if not params_converged: 443 continue 444 print(" The diffusion tensor parameters have converged.\n") 445 446 # Identical model-free parameter value test. 447 print("\nIdentical model-free parameter test:") 448 if len(self.conv_data.spin_ids[index]) != len(self.conv_data.spin_ids[-1]): 449 print(" Different number of spins.") 450 continue 451 for j in range(len(self.conv_data.spin_ids[-1])): 452 # Loop over the parameters. 453 for k in range(len(self.conv_data.mf_params[-1][j])): 454 # Test if not identical. 455 if self.conv_data.mf_vals[index][j][k] != self.conv_data.mf_vals[-1][j][k]: 456 print(" Spin ID: %s" % self.conv_data.spin_ids[-1][j]) 457 print(" Parameter: %s" % self.conv_data.mf_params[-1][j][k]) 458 print(" Value (iter %i): %s" % (i+1, self.conv_data.mf_vals[index][j][k])) 459 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k])) 460 print(" Value (iter %i): %s" % (self.round, self.conv_data.mf_vals[-1][j][k])) 461 print(" (as an IEEE-754 byte array: %s)" % floatAsByteArray(self.conv_data.mf_vals[index][j][k])) 462 print(" The model-free parameters have not converged.\n") 463 params_converged = False 464 break 465 if not params_converged: 466 continue 467 print(" The model-free parameters have converged.\n") 468 469 # Convergence. 470 converged = True 471 break 472 473 474 # Final printout. 475 ################## 476 477 print("\nConvergence:") 478 if converged: 479 # Update the status. 480 status.auto_analysis[self.pipe_bundle].convergence = True 481 482 # Print out. 483 print(" [ Yes ]") 484 485 # Return the termination condition. 486 return True 487 else: 488 # Print out. 489 print(" [ No ]") 490 491 # Return False to not terminate. 492 return False
493 494
495 - def determine_rnd(self, model=None):
496 """Function for returning the name of next round of optimisation.""" 497 498 # Get a list of all files in the directory model. If no directory exists, set the round to 'init' or 0. 499 try: 500 dir_list = listdir(self.results_dir+sep+model) 501 except: 502 return 0 503 504 # Set the round to 'init' or 0 if there is no directory called 'init'. 505 if 'init' not in dir_list: 506 return 0 507 508 # Create a list of all files which begin with 'round_'. 509 rnd_dirs = [] 510 for file in dir_list: 511 if search('^round_', file): 512 rnd_dirs.append(file) 513 514 # Create a sorted list of integer round numbers. 515 numbers = [] 516 for dir in rnd_dirs: 517 try: 518 numbers.append(int(dir[6:])) 519 except: 520 pass 521 numbers.sort() 522 523 # No directories beginning with 'round_' exist, set the round to 1. 524 if not len(numbers): 525 return 1 526 527 # The highest number. 528 max_round = numbers[-1] 529 530 # Check that the opt/results file exists for the round (working backwards). 531 for i in range(max_round, -1, -1): 532 # Assume the round is complete. 533 complete_round = i 534 535 # The file root. 536 file_root = self.results_dir + sep + model + sep + "round_%i" % i + sep + 'opt' + sep + 'results' 537 538 # Stop looping when the opt/results file is found. 539 if access(file_root + '.bz2', F_OK): 540 break 541 if access(file_root + '.gz', F_OK): 542 break 543 if access(file_root, F_OK): 544 break 545 546 # No round, so assume the initial state. 547 if complete_round == 0: 548 return 0 549 550 # Determine the number for the next round (add 1 to the highest completed round). 551 return complete_round + 1
552 553
554 - def execute(self):
555 """Execute the protocol.""" 556 557 # MI - Local tm. 558 ################ 559 560 if self.diff_model == 'local_tm': 561 # Base directory to place files into. 562 self.base_dir = self.results_dir+'local_tm'+sep 563 564 # Sequential optimisation of all model-free models (function must be modified to suit). 565 self.multi_model(local_tm=True) 566 567 # Model selection. 568 self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic') 569 570 571 # Diffusion models MII to MV. 572 ############################# 573 574 elif self.diff_model == 'sphere' or self.diff_model == 'prolate' or self.diff_model == 'oblate' or self.diff_model == 'ellipsoid': 575 # No local_tm directory! 576 dir_list = listdir(self.results_dir) 577 if 'local_tm' not in dir_list: 578 raise RelaxError("The local_tm model must be optimised first.") 579 580 # The initial round of optimisation - not zero if calculations were interrupted. 581 self.start_round = self.determine_rnd(model=self.diff_model) 582 583 # Loop until convergence if conv_loop is set, otherwise just loop once. 584 # This looping could be made much cleaner by removing the dependence on the determine_rnd() function. 585 while True: 586 # Determine which round of optimisation to do (init, round_1, round_2, etc). 587 self.round = self.determine_rnd(model=self.diff_model) 588 status.auto_analysis[self.pipe_bundle].round = self.round 589 590 # Inital round of optimisation for diffusion models MII to MV. 591 if self.round == 0: 592 # Base directory to place files into. 593 self.base_dir = self.results_dir+self.diff_model+sep+'init'+sep 594 595 # Run name. 596 name = self.name_pipe(self.diff_model) 597 598 # Create the data pipe (deleting the old one if it exists). 599 if has_pipe(name): 600 self.interpreter.pipe.delete(name) 601 self.interpreter.pipe.create(name, 'mf', bundle=self.pipe_bundle) 602 603 # Load the local tm diffusion model MI results. 604 self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic') 605 606 # Remove the tm parameter. 607 self.interpreter.model_free.remove_tm() 608 609 # Add an arbitrary diffusion tensor which will be optimised. 610 if self.diff_model == 'sphere': 611 self.interpreter.diffusion_tensor.init(10e-9, fixed=False) 612 inc = self.diff_tensor_grid_inc['sphere'] 613 elif self.diff_model == 'prolate': 614 self.interpreter.diffusion_tensor.init((10e-9, 0, 0, 0), spheroid_type='prolate', fixed=False) 615 inc = self.diff_tensor_grid_inc['prolate'] 616 elif self.diff_model == 'oblate': 617 self.interpreter.diffusion_tensor.init((10e-9, 0, 0, 0), spheroid_type='oblate', fixed=False) 618 inc = self.diff_tensor_grid_inc['oblate'] 619 elif self.diff_model == 'ellipsoid': 620 self.interpreter.diffusion_tensor.init((10e-09, 0, 0, 0, 0, 0), fixed=False) 621 inc = self.diff_tensor_grid_inc['ellipsoid'] 622 623 # Minimise just the diffusion tensor. 624 self.interpreter.fix('all_spins') 625 self.interpreter.grid_search(inc=inc) 626 self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 627 628 # Write the results. 629 self.interpreter.results.write(file='results', dir=self.base_dir, force=True) 630 631 632 # Normal round of optimisation for diffusion models MII to MV. 633 else: 634 # Base directory to place files into. 635 self.base_dir = self.results_dir+self.diff_model + sep+'round_'+repr(self.round)+sep 636 637 # Load the optimised diffusion tensor from either the previous round. 638 self.load_tensor() 639 640 # Sequential optimisation of all model-free models (function must be modified to suit). 641 self.multi_model() 642 643 # Model selection. 644 self.model_selection(modsel_pipe=self.name_pipe('aic'), dir=self.base_dir + 'aic') 645 646 # Final optimisation of all diffusion and model-free parameters. 647 self.interpreter.fix('all', fixed=False) 648 649 # Minimise all parameters. 650 self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 651 652 # Write the results. 653 dir = self.base_dir + 'opt' 654 self.interpreter.results.write(file='results', dir=dir, force=True) 655 656 # Test for convergence. 657 converged = self.convergence() 658 659 # Break out of the infinite while loop if automatic looping is not activated or if convergence has occurred. 660 if converged or not self.conv_loop: 661 break 662 663 # Unset the status. 664 status.auto_analysis[self.pipe_bundle].round = None 665 666 667 # Final run. 668 ############ 669 670 elif self.diff_model == 'final': 671 # Diffusion model selection. 672 ############################ 673 674 # The contents of the results directory. 675 dir_list = listdir(self.results_dir) 676 677 # Check that the minimal set of global diffusion models required for the protocol has been optimised. 678 min_models = ['local_tm', 'sphere'] 679 for model in min_models: 680 if model not in dir_list: 681 raise RelaxError("The minimum set of global diffusion models required for the protocol have not been optimised, the '%s' model results cannot be found." % model) 682 683 # Build a list of all global diffusion models optimised. 684 all_models = ['local_tm', 'sphere', 'prolate', 'oblate', 'ellipsoid'] 685 self.opt_models = [] 686 self.pipes = [] 687 for model in all_models: 688 if model in dir_list: 689 self.opt_models.append(model) 690 self.pipes.append(self.name_pipe(model)) 691 692 # Remove all temporary pipes used in this auto-analysis. 693 for name in pipe_names(bundle=self.pipe_bundle): 694 if name in self.pipes + self.mf_model_pipes + self.local_tm_model_pipes + [self.name_pipe('aic'), self.name_pipe('previous')]: 695 self.interpreter.pipe.delete(name) 696 697 # Create the local_tm data pipe. 698 self.interpreter.pipe.create(self.name_pipe('local_tm'), 'mf', bundle=self.pipe_bundle) 699 700 # Load the local tm diffusion model MI results. 701 self.interpreter.results.read(file='results', dir=self.results_dir+'local_tm'+sep+'aic') 702 703 # Loop over models MII to MV. 704 for model in ['sphere', 'prolate', 'oblate', 'ellipsoid']: 705 # Skip missing models. 706 if model not in self.opt_models: 707 continue 708 709 # Determine which was the last round of optimisation for each of the models. 710 self.round = self.determine_rnd(model=model) - 1 711 712 # If no directories begining with 'round_' exist, the script has not been properly utilised! 713 if self.round < 1: 714 # Construct the name of the diffusion tensor. 715 name = model 716 if model == 'prolate' or model == 'oblate': 717 name = name + ' spheroid' 718 719 # Throw an error to prevent misuse of the script. 720 raise RelaxError("Multiple rounds of optimisation of the " + name + " (between 8 to 15) are required for the proper execution of this script.") 721 722 # Create the data pipe. 723 self.interpreter.pipe.create(self.name_pipe(model), 'mf', bundle=self.pipe_bundle) 724 725 # Load the diffusion model results. 726 self.interpreter.results.read(file='results', dir=self.results_dir+model + sep+'round_'+repr(self.round)+sep+'opt') 727 728 # Model selection between MI to MV. 729 self.model_selection(modsel_pipe=self.name_pipe('final'), write_flag=False) 730 731 732 # Monte Carlo simulations. 733 ########################## 734 735 # Fix the diffusion tensor, if it exists. 736 if hasattr(get_pipe(self.name_pipe('final')), 'diff_tensor'): 737 self.interpreter.fix('diff') 738 739 # Simulations. 740 self.interpreter.monte_carlo.setup(number=self.mc_sim_num) 741 self.interpreter.monte_carlo.create_data() 742 self.interpreter.monte_carlo.initial_values() 743 self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 744 self.interpreter.eliminate() 745 self.interpreter.monte_carlo.error_analysis() 746 747 748 # Write the final results. 749 ########################## 750 751 # Create results files and plots of the data. 752 self.write_results() 753 754 755 # Unknown script behaviour. 756 ########################### 757 758 else: 759 raise RelaxError("Unknown diffusion model, change the value of 'self.diff_model'")
760 761
762 - def load_tensor(self):
763 """Function for loading the optimised diffusion tensor.""" 764 765 # Create the data pipe for the previous data (deleting the old data pipe first if necessary). 766 if has_pipe(self.name_pipe('previous')): 767 self.interpreter.pipe.delete(self.name_pipe('previous')) 768 self.interpreter.pipe.create(self.name_pipe('previous'), 'mf', bundle=self.pipe_bundle) 769 770 # Load the optimised diffusion tensor from the initial round. 771 if self.round == 1: 772 self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'init') 773 774 # Load the optimised diffusion tensor from the previous round. 775 else: 776 self.interpreter.results.read('results', self.results_dir+self.diff_model + sep+'round_'+repr(self.round-1)+sep+'opt')
777 778
779 - def model_selection(self, modsel_pipe=None, dir=None, write_flag=True):
780 """Model selection function.""" 781 782 # Model selection (delete the model selection pipe if it already exists). 783 if has_pipe(modsel_pipe): 784 self.interpreter.pipe.delete(modsel_pipe) 785 self.interpreter.model_selection(method='AIC', modsel_pipe=modsel_pipe, bundle=self.pipe_bundle, pipes=self.pipes) 786 787 # Write the results. 788 if write_flag: 789 self.interpreter.results.write(file='results', dir=dir, force=True)
790 791
792 - def multi_model(self, local_tm=False):
793 """Function for optimisation of all model-free models.""" 794 795 # Set the data pipe names (also the names of preset model-free models). 796 if local_tm: 797 models = self.local_tm_models 798 self.pipes = self.local_tm_models 799 else: 800 models = self.mf_models 801 self.pipes = [] 802 for i in range(len(models)): 803 self.pipes.append(self.name_pipe(models[i])) 804 805 # Loop over the data pipes. 806 for i in range(len(models)): 807 # Place the model name into the status container. 808 status.auto_analysis[self.pipe_bundle].current_model = models[i] 809 810 # Create the data pipe (by copying). 811 if has_pipe(self.pipes[i]): 812 self.interpreter.pipe.delete(self.pipes[i]) 813 self.interpreter.pipe.copy(self.pipe_name, self.pipes[i], bundle_to=self.pipe_bundle) 814 self.interpreter.pipe.switch(self.pipes[i]) 815 816 # Copy the diffusion tensor from the 'opt' data pipe and prevent it from being minimised. 817 if not local_tm: 818 self.interpreter.diffusion_tensor.copy(self.name_pipe('previous')) 819 self.interpreter.fix('diff') 820 821 # Select the model-free model. 822 self.interpreter.model_free.select_model(model=models[i]) 823 824 # Minimise. 825 self.interpreter.grid_search(inc=self.grid_inc) 826 self.interpreter.minimise(self.min_algor, func_tol=self.opt_func_tol, max_iter=self.opt_max_iterations) 827 828 # Model elimination. 829 self.interpreter.eliminate() 830 831 # Write the results. 832 dir = self.base_dir + models[i] 833 self.interpreter.results.write(file='results', dir=dir, force=True) 834 835 # Unset the status. 836 status.auto_analysis[self.pipe_bundle].current_model = None
837 838
839 - def name_pipe(self, prefix):
840 """Generate a unique name for the data pipe. 841 842 @param prefix: The prefix of the data pipe name. 843 @type prefix: str 844 """ 845 846 # The unique pipe name. 847 name = "%s - %s" % (prefix, self.pipe_bundle) 848 849 # Return the name. 850 return name
851 852
853 - def status_setup(self):
854 """Initialise the status object.""" 855 856 # Initialise the status object for this auto-analysis. 857 status.init_auto_analysis(self.pipe_bundle, type='dauvergne_protocol') 858 status.current_analysis = self.pipe_bundle 859 860 # The global diffusion model. 861 status.auto_analysis[self.pipe_bundle].diff_model = None 862 863 # The round of optimisation, i.e. the global iteration. 864 status.auto_analysis[self.pipe_bundle].round = None 865 866 # The list of model-free local tm models for optimisation, i.e. the global iteration. 867 status.auto_analysis[self.pipe_bundle].local_tm_models = self.local_tm_models 868 869 # The list of model-free models for optimisation, i.e. the global iteration. 870 status.auto_analysis[self.pipe_bundle].mf_models = self.mf_models 871 872 # The current model-free model. 873 status.auto_analysis[self.pipe_bundle].current_model = None 874 875 # The maximum number of iterations of the global model. 876 status.auto_analysis[self.pipe_bundle].max_iter = self.max_iter 877 878 # The convergence of the global model. 879 status.auto_analysis[self.pipe_bundle].convergence = False
880 881
882 - def write_results(self):
883 """Create Grace plots of the final model-free results.""" 884 885 # Save the results file. 886 dir = self.write_results_dir + 'final' 887 self.interpreter.results.write(file='results', dir=dir, force=True) 888 889 # The Grace plots. 890 dir = self.write_results_dir + 'final' + sep + 'grace' 891 self.interpreter.grace.write(x_data_type='spin', y_data_type='s2', file='s2.agr', dir=dir, force=True) 892 self.interpreter.grace.write(x_data_type='spin', y_data_type='s2f', file='s2f.agr', dir=dir, force=True) 893 self.interpreter.grace.write(x_data_type='spin', y_data_type='s2s', file='s2s.agr', dir=dir, force=True) 894 self.interpreter.grace.write(x_data_type='spin', y_data_type='te', file='te.agr', dir=dir, force=True) 895 self.interpreter.grace.write(x_data_type='spin', y_data_type='tf', file='tf.agr', dir=dir, force=True) 896 self.interpreter.grace.write(x_data_type='spin', y_data_type='ts', file='ts.agr', dir=dir, force=True) 897 self.interpreter.grace.write(x_data_type='spin', y_data_type='rex', file='rex.agr', dir=dir, force=True) 898 self.interpreter.grace.write(x_data_type='s2', y_data_type='te', file='s2_vs_te.agr', dir=dir, force=True) 899 self.interpreter.grace.write(x_data_type='s2', y_data_type='rex', file='s2_vs_rex.agr', dir=dir, force=True) 900 self.interpreter.grace.write(x_data_type='te', y_data_type='rex', file='te_vs_rex.agr', dir=dir, force=True) 901 902 # Write the values to text files. 903 dir = self.write_results_dir + 'final' 904 self.interpreter.value.write(param='s2', file='s2.txt', dir=dir, force=True) 905 self.interpreter.value.write(param='s2f', file='s2f.txt', dir=dir, force=True) 906 self.interpreter.value.write(param='s2s', file='s2s.txt', dir=dir, force=True) 907 self.interpreter.value.write(param='te', file='te.txt', dir=dir, force=True) 908 self.interpreter.value.write(param='tf', file='tf.txt', dir=dir, force=True) 909 self.interpreter.value.write(param='ts', file='ts.txt', dir=dir, force=True) 910 self.interpreter.value.write(param='rex', file='rex.txt', dir=dir, force=True) 911 self.interpreter.value.write(param='local_tm', file='local_tm.txt', dir=dir, force=True) 912 frqs = frq.get_values() 913 for i in range(len(frqs)): 914 comment = "This is the Rex value with units rad.s^-1 scaled to a magnetic field strength of %s MHz." % (frqs[i]/1e6) 915 self.interpreter.value.write(param='rex', file='rex_%s.txt'%int(frqs[i]/1e6), dir=dir, scaling=(2.0*pi*frqs[i])**2, comment=comment, force=True) 916 917 # Create the PyMOL macros. 918 dir = self.write_results_dir + 'final' + sep + 'pymol' 919 self.interpreter.pymol.macro_write(data_type='s2', dir=dir, force=True) 920 self.interpreter.pymol.macro_write(data_type='s2f', dir=dir, force=True) 921 self.interpreter.pymol.macro_write(data_type='s2s', dir=dir, force=True) 922 self.interpreter.pymol.macro_write(data_type='amp_fast', dir=dir, force=True) 923 self.interpreter.pymol.macro_write(data_type='amp_slow', dir=dir, force=True) 924 self.interpreter.pymol.macro_write(data_type='te', dir=dir, force=True) 925 self.interpreter.pymol.macro_write(data_type='tf', dir=dir, force=True) 926 self.interpreter.pymol.macro_write(data_type='ts', dir=dir, force=True) 927 self.interpreter.pymol.macro_write(data_type='time_fast', dir=dir, force=True) 928 self.interpreter.pymol.macro_write(data_type='time_slow', dir=dir, force=True) 929 self.interpreter.pymol.macro_write(data_type='rex', dir=dir, force=True) 930 931 # Create the Molmol macros. 932 dir = self.write_results_dir + 'final' + sep + 'molmol' 933 self.interpreter.molmol.macro_write(data_type='s2', dir=dir, force=True) 934 self.interpreter.molmol.macro_write(data_type='s2f', dir=dir, force=True) 935 self.interpreter.molmol.macro_write(data_type='s2s', dir=dir, force=True) 936 self.interpreter.molmol.macro_write(data_type='amp_fast', dir=dir, force=True) 937 self.interpreter.molmol.macro_write(data_type='amp_slow', dir=dir, force=True) 938 self.interpreter.molmol.macro_write(data_type='te', dir=dir, force=True) 939 self.interpreter.molmol.macro_write(data_type='tf', dir=dir, force=True) 940 self.interpreter.molmol.macro_write(data_type='ts', dir=dir, force=True) 941 self.interpreter.molmol.macro_write(data_type='time_fast', dir=dir, force=True) 942 self.interpreter.molmol.macro_write(data_type='time_slow', dir=dir, force=True) 943 self.interpreter.molmol.macro_write(data_type='rex', dir=dir, force=True) 944 945 # Create a diffusion tensor representation of the tensor, if a PDB file is present and the local tm global model has not been selected. 946 if hasattr(cdp, 'structure') and hasattr(cdp, 'diff_tensor'): 947 dir = self.write_results_dir + 'final' 948 self.interpreter.structure.create_diff_tensor_pdb(file="tensor.pdb", dir=dir, force=True)
949 950 951
952 -class Container:
953 """Empty container for data storage."""
954