pipe_control.error

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2008,2010,2013-2014 Edward d'Auvergne # 4 # Copyright (C) 2015 Troels E. Linnet # 5 # # 6 # This file is part of the program relax (http://www.nmr-relax.com). # 7 # # 8 # This program is free software: you can redistribute it and/or modify # 9 # it under the terms of the GNU General Public License as published by # 10 # the Free Software Foundation, either version 3 of the License, or # 11 # (at your option) any later version. # 12 # # 13 # This program is distributed in the hope that it will be useful, # 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 # GNU General Public License for more details. # 17 # # 18 # You should have received a copy of the GNU General Public License # 19 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 20 # # 21 ############################################################################### 22 23 # Module docstring. 24 """Module for performing Monte Carlo simulations for error analysis.""" 25 26 # Python module imports. 27 from numpy import diag, ndarray, sqrt 28 from random import gauss 29 30 # relax module imports. 31 from lib import statistics 32 from lib.errors import RelaxError 33 from pipe_control.pipes import check_pipe 34 from specific_analyses.api import return_api 35 36

37 -def covariance_matrix(epsrel=0.0, verbosity=2):

38 """Estimate model parameter errors via the covariance matrix technique. 39 40 Note that the covariance matrix error estimate is always of lower quality than Monte Carlo simulations. 41 42 43 @param epsrel: Any columns of R which satisfy |R_{kk}| <= epsrel |R_{11}| are considered linearly-dependent and are excluded from the covariance matrix, where the corresponding rows and columns of the covariance matrix are set to zero. 44 @type epsrel: float 45 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 46 @type verbosity: int 47 """ 48 49 # Test if the current data pipe exists. 50 check_pipe() 51 52 # The specific analysis API object. 53 api = return_api() 54 55 # Loop over the models. 56 for model_info in api.model_loop(): 57 # Get the Jacobian and weighting matrix. 58 jacobian, weights = api.covariance_matrix(model_info=model_info, verbosity=verbosity) 59 60 # Calculate the covariance matrix. 61 pcov = statistics.multifit_covar(J=jacobian, weights=weights) 62 63 # To compute one standard deviation errors on the parameters, take the square root of the diagonal covariance. 64 sd = sqrt(diag(pcov)) 65 66 # Loop over the parameters. 67 index = 0 68 for name in api.get_param_names(model_info): 69 # Set the parameter error. 70 api.set_error(index, sd[index], model_info=model_info) 71 72 # Increment the parameter index. 73 index = index + 1

74 75

76 -def monte_carlo_create_data(method=None, distribution=None, fixed_error=None):

77 """Function for creating simulation data. 78 79 @keyword method: The type of Monte Carlo simulation to perform. 80 @type method: str 81 @keyword distribution: Which gauss distribution to draw errors from. Can be: 'measured', 'red_chi2', 'fixed'. 82 @type distribution: str 83 @keyword fixed_error: If distribution is set to 'fixed', use this value as the standard deviation for the gauss distribution. 84 @type fixed_error: float 85 """ 86 87 # Test if the current data pipe exists. 88 check_pipe() 89 90 # Test if simulations have been set up. 91 if not hasattr(cdp, 'sim_state'): 92 raise RelaxError("Monte Carlo simulations have not been set up.") 93 94 # Test the method argument. 95 valid_methods = ['back_calc', 'direct'] 96 if method not in valid_methods: 97 raise RelaxError("The simulation creation method " + repr(method) + " is not valid.") 98 99 # Test the distribution argument. 100 valid_distributions = ['measured', 'red_chi2', 'fixed'] 101 if distribution not in valid_distributions: 102 raise RelaxError("The simulation error distribution method " + repr(distribution) + " is not valid. Try one of the following: " + repr(valid_distributions)) 103 104 # Test the fixed_error argument. 105 if fixed_error != None and distribution != 'fixed': 106 raise RelaxError("The argument 'fixed_error' is set to a value, but the argument 'distribution' is not set to 'fixed'.") 107 108 # Test the distribution argument, equal to 'fixed', but no error is set. 109 if distribution == 'fixed' and fixed_error == None: 110 raise RelaxError("The argument 'distribution' is set to 'fixed', but you have not provided a value to the argument 'fixed_error'.") 111 112 # The specific analysis API object. 113 api = return_api() 114 115 # Loop over the models. 116 for data_index in api.base_data_loop(): 117 # Create the Monte Carlo data. 118 if method == 'back_calc': 119 data = api.create_mc_data(data_index) 120 121 # Get the original data. 122 else: 123 data = api.return_data(data_index) 124 125 # No data, so skip. 126 if data == None: 127 continue 128 129 # Possible get the errors from reduced chi2 distribution. 130 if distribution == 'red_chi2': 131 error_red_chi2 = api.return_error_red_chi2(data_index) 132 133 # Get the errors. 134 error = api.return_error(data_index) 135 136 # List type data. 137 if isinstance(data, list) or isinstance(data, ndarray): 138 # Loop over the Monte Carlo simulations. 139 random = [] 140 for j in range(cdp.sim_number): 141 # Randomise the data. 142 random.append([]) 143 for k in range(len(data)): 144 # No data or errors. 145 if data[k] == None or error[k] == None: 146 random[j].append(None) 147 continue 148 149 # Gaussian randomisation. 150 if distribution == 'fixed': 151 random[j].append(gauss(data[k], float(fixed_error))) 152 153 else: 154 random[j].append(gauss(data[k], error[k])) 155 156 # Dictionary type data. 157 if isinstance(data, dict): 158 # Loop over the Monte Carlo simulations. 159 random = [] 160 for j in range(cdp.sim_number): 161 # Randomise the data. 162 random.append({}) 163 for id in data: 164 # No data or errors. 165 if data[id] == None or error[id] == None: 166 random[j][id] = None 167 continue 168 169 # If errors are drawn from the reduced chi2 distribution. 170 if distribution == 'red_chi2': 171 # Gaussian randomisation, centered at 0, with width of reduced chi2 distribution. 172 g_error = gauss(0.0, error_red_chi2[id]) 173 174 # We need to scale the gauss error, before adding to datapoint. 175 new_point = data[id] + g_error * error[id] 176 177 # If errors are drawn from fixed distribution. 178 elif distribution == 'fixed': 179 # Gaussian randomisation, centered at data point, with width of fixed error. 180 new_point = gauss(data[id], float(fixed_error)) 181 182 # If errors are drawn from measured values. 183 else: 184 # Gaussian randomisation, centered at data point, with width of measured error. 185 new_point = gauss(data[id], error[id]) 186 187 # Assign datapoint the new value. 188 random[j][id] = new_point 189 190 # Pack the simulation data. 191 api.sim_pack_data(data_index, random)

192 193

194 -def monte_carlo_error_analysis():

195 """Function for calculating errors from the Monte Carlo simulations. 196 197 The standard deviation formula used to calculate the errors is the square root of the 198 bias-corrected variance, given by the formula:: 199 200 __________________________ 201 / 1 202 sd = / ----- * sum({Xi - Xav}^2) 203 \/ n - 1 204 205 where 206 - n is the total number of simulations. 207 - Xi is the parameter value for simulation i. 208 - Xav is the mean parameter value for all simulations. 209 """ 210 211 # Test if the current data pipe exists. 212 check_pipe() 213 214 # Test if simulations have been set up. 215 if not hasattr(cdp, 'sim_state'): 216 raise RelaxError("Monte Carlo simulations have not been set up.") 217 218 # The specific analysis API object. 219 api = return_api() 220 221 # Loop over the models. 222 for model_info in api.model_loop(): 223 # Get the selected simulation array. 224 select_sim = api.sim_return_selected(model_info=model_info) 225 226 # Loop over the parameters. 227 index = 0 228 while True: 229 # Get the array of simulation parameters for the index. 230 param_array = api.sim_return_param(index, model_info=model_info) 231 232 # Break (no more parameters). 233 if param_array == None: 234 break 235 236 # Handle dictionary type parameters. 237 if isinstance(param_array[0], dict): 238 # Initialise the standard deviation structure as a dictionary. 239 sd = {} 240 241 # Loop over each key. 242 for key in param_array[0]: 243 # Create a list of the values for the current key. 244 data = [] 245 for i in range(len(param_array)): 246 data.append(param_array[i][key]) 247 248 # Calculate and store the SD. 249 sd[key] = statistics.std(values=data, skip=select_sim) 250 251 # Handle list type parameters. 252 elif isinstance(param_array[0], list): 253 # Initialise the standard deviation structure as a list. 254 sd = [] 255 256 # Loop over each element. 257 for j in range(len(param_array[0])): 258 # Create a list of the values for the current key. 259 data = [] 260 for i in range(len(param_array)): 261 data.append(param_array[i][j]) 262 263 # Calculate and store the SD. 264 sd.append(statistics.std(values=data, skip=select_sim)) 265 266 # SD of simulation parameters with values (ie not None). 267 elif param_array[0] != None: 268 sd = statistics.std(values=param_array, skip=select_sim) 269 270 # Simulation parameters with the value None. 271 else: 272 sd = None 273 274 # Set the parameter error. 275 api.set_error(index, sd, model_info=model_info) 276 277 # Increment the parameter index. 278 index = index + 1 279 280 # Turn off the Monte Carlo simulation state, as the MC analysis is now finished. 281 cdp.sim_state = False

282 283

284 -def monte_carlo_initial_values():

285 """Set the initial simulation parameter values.""" 286 287 # Test if the current data pipe exists. 288 check_pipe() 289 290 # Test if simulations have been set up. 291 if not hasattr(cdp, 'sim_state'): 292 raise RelaxError("Monte Carlo simulations have not been set up.") 293 294 # The specific analysis API object. 295 api = return_api() 296 297 # Set the initial parameter values. 298 api.sim_init_values()

299 300

301 -def monte_carlo_off():

302 """Turn simulations off.""" 303 304 # Test if the current data pipe exists. 305 check_pipe() 306 307 # Test if simulations have been set up. 308 if not hasattr(cdp, 'sim_state'): 309 raise RelaxError("Monte Carlo simulations have not been set up.") 310 311 # Turn simulations off. 312 cdp.sim_state = False

313 314

315 -def monte_carlo_on():

316 """Turn simulations on.""" 317 318 # Test if the current data pipe exists. 319 check_pipe() 320 321 # Test if simulations have been set up. 322 if not hasattr(cdp, 'sim_state'): 323 raise RelaxError("Monte Carlo simulations have not been set up.") 324 325 # Turn simulations on. 326 cdp.sim_state = True

327 328

329 -def monte_carlo_select_all_sims(number=None, all_select_sim=None):

330 """Set the select flag of all simulations of all models to one. 331 332 @keyword number: The number of Monte Carlo simulations to set up. 333 @type number: int 334 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first 335 dimension of this matrix corresponds to the simulation and the 336 second corresponds to the models. 337 @type all_select_sim: list of lists of bool 338 """ 339 340 # The specific analysis API object. 341 api = return_api() 342 343 # Create the selected simulation array with all simulations selected. 344 if all_select_sim == None: 345 select_sim = [True]*number 346 347 # Loop over the models. 348 i = 0 349 for model_info in api.model_loop(): 350 # Skip function. 351 if api.skip_function(model_info=model_info): 352 continue 353 354 # Set up the selected simulation array. 355 if all_select_sim != None: 356 select_sim = all_select_sim[i] 357 358 # Set the selected simulation array. 359 api.set_selected_sim(select_sim, model_info=model_info) 360 361 # Model index. 362 i += 1

363 364

365 -def monte_carlo_setup(number=None, all_select_sim=None):

366 """Store the Monte Carlo simulation number. 367 368 @keyword number: The number of Monte Carlo simulations to set up. 369 @type number: int 370 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first dimension of this matrix corresponds to the simulation and the second corresponds to the instance. 371 @type all_select_sim: list of lists of bool 372 """ 373 374 # Test if the current data pipe exists. 375 check_pipe() 376 377 # Check the value. 378 if number < 3: 379 raise RelaxError("A minimum of 3 Monte Carlo simulations is required.") 380 381 # Create a number of MC sim data structures. 382 cdp.sim_number = number 383 cdp.sim_state = True 384 385 # Select all simulations. 386 monte_carlo_select_all_sims(number=number, all_select_sim=all_select_sim)

387

Source Code for Module pipe_control.error_analysis