generic_fns.monte

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2005, 2007-2009 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for performing Monte Carlo simulations for error analysis.""" 24 25 # Python module imports. 26 from copy import deepcopy 27 from math import sqrt 28 from numpy import ndarray 29 from random import gauss 30 31 # relax module imports. 32 from generic_fns.mol_res_spin import exists_mol_res_spin_data 33 from generic_fns import pipes 34 from relax_errors import RelaxError, RelaxNoSequenceError 35 from specific_fns.setup import get_specific_fn 36 37

38 -def create_data(method=None):

39 """Function for creating simulation data. 40 41 @keyword method: The type of Monte Carlo simulation to perform. 42 @type method: str 43 """ 44 45 # Test if the current data pipe exists. 46 pipes.test() 47 48 # Test if simulations have been set up. 49 if not hasattr(cdp, 'sim_state'): 50 raise RelaxError("Monte Carlo simulations have not been set up.") 51 52 # Test the method argument. 53 valid_methods = ['back_calc', 'direct'] 54 if method not in valid_methods: 55 raise RelaxError("The simulation creation method " + repr(method) + " is not valid.") 56 57 # Specific Monte Carlo data creation, data return, and error return function setup. 58 base_data_loop = get_specific_fn('base_data_loop', cdp.pipe_type) 59 if method == 'back_calc': 60 create_mc_data = get_specific_fn('create_mc_data', cdp.pipe_type) 61 return_data = get_specific_fn('return_data', cdp.pipe_type) 62 return_error = get_specific_fn('return_error', cdp.pipe_type) 63 pack_sim_data = get_specific_fn('pack_sim_data', cdp.pipe_type) 64 65 # Loop over the models. 66 for data_index in base_data_loop(): 67 # Create the Monte Carlo data. 68 if method == 'back_calc': 69 data = create_mc_data(data_index) 70 71 # Get the original data. 72 else: 73 data = return_data(data_index) 74 75 # Get the errors. 76 error = return_error(data_index) 77 78 # List type data. 79 if isinstance(data, list) or isinstance(data, ndarray): 80 # Loop over the Monte Carlo simulations. 81 random = [] 82 for j in range(cdp.sim_number): 83 # Randomise the data. 84 random.append([]) 85 for k in range(len(data)): 86 # No data or errors. 87 if data[k] == None or error[k] == None: 88 random[j].append(None) 89 continue 90 91 # Gaussian randomisation. 92 random[j].append(gauss(data[k], error[k])) 93 94 # Dictionary type data. 95 if isinstance(data, dict): 96 # Loop over the Monte Carlo simulations. 97 random = [] 98 for j in range(cdp.sim_number): 99 # Randomise the data. 100 random.append({}) 101 for id in data.keys(): 102 # No data or errors. 103 if data[id] == None or error[id] == None: 104 random[j][id] = None 105 continue 106 107 # Gaussian randomisation. 108 random[j][id] = gauss(data[id], error[id]) 109 110 # Pack the simulation data. 111 pack_sim_data(data_index, random)

112 113

114 -def error_analysis(prune=0.0):

115 """Function for calculating errors from the Monte Carlo simulations. 116 117 The standard deviation formula used to calculate the errors is the square root of the 118 bias-corrected variance, given by the formula:: 119 120 __________________________ 121 / 1 122 sd = / ----- * sum({Xi - Xav}^2) 123 \/ n - 1 124 125 where 126 - n is the total number of simulations. 127 - Xi is the parameter value for simulation i. 128 - Xav is the mean parameter value for all simulations. 129 130 131 @keyword prune: The amount to prune the upper and lower tails the distribution. If set to 132 0.0, no simulations will be pruned. If set to 1.0, all simulations will be 133 pruned. This argument should be set 0.0 to avoid meaningless statistics. 134 @type prune: float 135 """ 136 137 # Test if the current data pipe exists. 138 pipes.test() 139 140 # Test if simulations have been set up. 141 if not hasattr(cdp, 'sim_state'): 142 raise RelaxError("Monte Carlo simulations have not been set up.") 143 144 # Model loop, return simulation chi2 array, return selected simulation array, return simulation parameter array, and set error functions. 145 model_loop = get_specific_fn('model_loop', cdp.pipe_type) 146 if prune > 0.0: 147 return_sim_chi2 = get_specific_fn('return_sim_chi2', cdp.pipe_type) 148 return_selected_sim = get_specific_fn('return_selected_sim', cdp.pipe_type) 149 return_sim_param = get_specific_fn('return_sim_param', cdp.pipe_type) 150 set_error = get_specific_fn('set_error', cdp.pipe_type) 151 152 # Loop over the models. 153 for model_info in model_loop(): 154 # Get the selected simulation array. 155 select_sim = return_selected_sim(model_info) 156 157 # Initialise an array of indices to prune (an empty array means no pruning). 158 indices_to_skip = [] 159 160 # Pruning. 161 if prune > 0.0: 162 # Get the array of simulation chi-squared values. 163 chi2_array = return_sim_chi2(model_info) 164 165 # The total number of simulations. 166 n = len(chi2_array) 167 168 # Create a sorted array of chi-squared values. 169 chi2_sorted = sorted(deepcopy(chi2_array)) 170 171 # Number of indices to remove from one side of the chi2 distribution. 172 num = int(float(n) * 0.5 * prune) 173 174 # Remove the lower tail. 175 for i in range(num): 176 indices_to_skip.append(chi2_array.index(chi2_sorted[i])) 177 178 # Remove the upper tail. 179 for i in range(n-num, n): 180 indices_to_skip.append(chi2_array.index(chi2_sorted[i])) 181 182 # Loop over the parameters. 183 index = 0 184 while True: 185 # Get the array of simulation parameters for the index. 186 param_array = return_sim_param(model_info, index) 187 188 # Break (no more parameters). 189 if param_array == None: 190 break 191 192 # Simulation parameters with values (ie not None). 193 if param_array[0] != None: 194 # The total number of simulations. 195 n = 0 196 for i in range(len(param_array)): 197 # Skip deselected simulations. 198 if not select_sim[i]: 199 continue 200 201 # Prune. 202 if i in indices_to_skip: 203 continue 204 205 # Increment n. 206 n = n + 1 207 208 # Calculate the sum of the parameter value for all simulations. 209 Xsum = 0.0 210 for i in range(len(param_array)): 211 # Skip deselected simulations. 212 if not select_sim[i]: 213 continue 214 215 # Prune. 216 if i in indices_to_skip: 217 continue 218 219 # Sum. 220 Xsum = Xsum + param_array[i] 221 222 # Calculate the mean parameter value for all simulations. 223 if n == 0: 224 Xav = 0.0 225 else: 226 Xav = Xsum / float(n) 227 228 # Calculate the sum part of the standard deviation. 229 sd = 0.0 230 for i in range(len(param_array)): 231 # Skip deselected simulations. 232 if not select_sim[i]: 233 continue 234 235 # Prune. 236 if i in indices_to_skip: 237 continue 238 239 # Sum. 240 sd = sd + (param_array[i] - Xav)**2 241 242 # Calculate the standard deviation. 243 if n <= 1: 244 sd = 0.0 245 else: 246 sd = sqrt(sd / (float(n) - 1.0)) 247 248 # Simulation parameters with the value None. 249 else: 250 sd = None 251 252 # Set the parameter error. 253 set_error(model_info, index, sd) 254 255 # Increment the parameter index. 256 index = index + 1 257 258 # Turn off the Monte Carlo simulation state, as the MC analysis is now finished. 259 cdp.sim_state = False

260 261

262 -def initial_values():

263 """Set the initial simulation parameter values.""" 264 265 # Test if the current data pipe exists. 266 pipes.test() 267 268 # Test if simulations have been set up. 269 if not hasattr(cdp, 'sim_state'): 270 raise RelaxError("Monte Carlo simulations have not been set up.") 271 272 # Specific initial Monte Carlo parameter value function setup. 273 init_sim_values = get_specific_fn('init_sim_values', cdp.pipe_type) 274 275 # Set the initial parameter values. 276 init_sim_values()

277 278

279 -def off():

280 """Turn simulations off.""" 281 282 # Test if the current data pipe exists. 283 pipes.test() 284 285 # Test if simulations have been set up. 286 if not hasattr(cdp, 'sim_state'): 287 raise RelaxError("Monte Carlo simulations have not been set up.") 288 289 # Turn simulations off. 290 cdp.sim_state = False

291 292

293 -def on():

294 """Turn simulations on.""" 295 296 # Test if the current data pipe exists. 297 pipes.test() 298 299 # Test if simulations have been set up. 300 if not hasattr(cdp, 'sim_state'): 301 raise RelaxError("Monte Carlo simulations have not been set up.") 302 303 # Turn simulations on. 304 cdp.sim_state = True

305 306

307 -def select_all_sims(number=None, all_select_sim=None):

308 """Set the select flag of all simulations of all models to one. 309 310 @keyword number: The number of Monte Carlo simulations to set up. 311 @type number: int 312 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first 313 dimension of this matrix corresponds to the simulation and the 314 second corresponds to the models. 315 @type all_select_sim: list of lists of bool 316 """ 317 318 # Model loop and set the selected simulation array functions. 319 model_loop = get_specific_fn('model_loop', cdp.pipe_type) 320 set_selected_sim = get_specific_fn('set_selected_sim', cdp.pipe_type) 321 skip_function = get_specific_fn('skip_function', cdp.pipe_type) 322 323 # Create the selected simulation array with all simulations selected. 324 if all_select_sim == None: 325 select_sim = [True]*number 326 327 # Loop over the models. 328 i = 0 329 for model_info in model_loop(): 330 # Skip function. 331 if skip_function(model_info): 332 continue 333 334 # Set up the selected simulation array. 335 if all_select_sim != None: 336 select_sim = all_select_sim[i] 337 338 # Set the selected simulation array. 339 set_selected_sim(model_info, select_sim) 340 341 # Model index. 342 i += 1

343 344

345 -def setup(number=None, all_select_sim=None):

346 """Function for setting up Monte Carlo simulations. 347 348 @keyword number: The number of Monte Carlo simulations to set up. 349 @type number: int 350 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first 351 dimension of this matrix corresponds to the simulation and the 352 second corresponds to the instance. 353 @type all_select_sim: list of lists of bool 354 """ 355 356 # Test if the current data pipe exists. 357 pipes.test() 358 359 # Create a number of MC sim data structures. 360 cdp.sim_number = number 361 cdp.sim_state = True 362 363 # Select all simulations. 364 select_all_sims(number=number, all_select_sim=all_select_sim)

365

Source Code for Module generic_fns.monte_carlo