pipe_control.error

1 ############################################################################### 2 # # 3 # Copyright (C) 2004-2015 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for performing Monte Carlo simulations for error analysis.""" 24 25 # Python module imports. 26 from numpy import diag, ndarray, sqrt 27 from random import gauss 28 29 # relax module imports. 30 from lib import statistics 31 from lib.errors import RelaxError 32 from pipe_control.pipes import check_pipe 33 from specific_analyses.api import return_api 34 35

36 -def covariance_matrix(epsrel=0.0, verbosity=2):

37 """Estimate model parameter errors via the covariance matrix technique. 38 39 Note that the covariance matrix error estimate is always of lower quality than Monte Carlo simulations. 40 41 42 @param epsrel: Any columns of R which satisfy |R_{kk}| <= epsrel |R_{11}| are considered linearly-dependent and are excluded from the covariance matrix, where the corresponding rows and columns of the covariance matrix are set to zero. 43 @type epsrel: float 44 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity. 45 @type verbosity: int 46 """ 47 48 # Test if the current data pipe exists. 49 check_pipe() 50 51 # The specific analysis API object. 52 api = return_api() 53 54 # Loop over the models. 55 for model_info in api.model_loop(): 56 # Get the Jacobian and weighting matrix. 57 jacobian, weights = api.covariance_matrix(model_info=model_info, verbosity=verbosity) 58 59 # Calculate the covariance matrix. 60 pcov = statistics.multifit_covar(J=jacobian, weights=weights) 61 62 # To compute one standard deviation errors on the parameters, take the square root of the diagonal covariance. 63 sd = sqrt(diag(pcov)) 64 65 # Loop over the parameters. 66 index = 0 67 for name in api.get_param_names(model_info): 68 # Set the parameter error. 69 api.set_error(index, sd[index], model_info=model_info) 70 71 # Increment the parameter index. 72 index = index + 1

73 74

75 -def monte_carlo_create_data(method=None, distribution=None, fixed_error=None):

76 """Function for creating simulation data. 77 78 @keyword method: The type of Monte Carlo simulation to perform. 79 @type method: str 80 @keyword distribution: Which gauss distribution to draw errors from. Can be: 'measured', 'red_chi2', 'fixed'. 81 @type distribution: str 82 @keyword fixed_error: If distribution is set to 'fixed', use this value as the standard deviation for the gauss distribution. 83 @type fixed_error: float 84 """ 85 86 # Test if the current data pipe exists. 87 check_pipe() 88 89 # Test if simulations have been set up. 90 if not hasattr(cdp, 'sim_state'): 91 raise RelaxError("Monte Carlo simulations have not been set up.") 92 93 # Test the method argument. 94 valid_methods = ['back_calc', 'direct'] 95 if method not in valid_methods: 96 raise RelaxError("The simulation creation method " + repr(method) + " is not valid.") 97 98 # Test the distribution argument. 99 valid_distributions = ['measured', 'red_chi2', 'fixed'] 100 if distribution not in valid_distributions: 101 raise RelaxError("The simulation error distribution method " + repr(distribution) + " is not valid. Try one of the following: " + repr(valid_distributions)) 102 103 # Test the fixed_error argument. 104 if fixed_error != None and distribution != 'fixed': 105 raise RelaxError("The argument 'fixed_error' is set to a value, but the argument 'distribution' is not set to 'fixed'.") 106 107 # Test the distribution argument, equal to 'fixed', but no error is set. 108 if distribution == 'fixed' and fixed_error == None: 109 raise RelaxError("The argument 'distribution' is set to 'fixed', but you have not provided a value to the argument 'fixed_error'.") 110 111 # The specific analysis API object. 112 api = return_api() 113 114 # Loop over the models. 115 for data_index in api.base_data_loop(): 116 # Create the Monte Carlo data. 117 if method == 'back_calc': 118 data = api.create_mc_data(data_index) 119 120 # Get the original data. 121 else: 122 data = api.return_data(data_index) 123 124 # No data, so skip. 125 if data == None: 126 continue 127 128 # Possible get the errors from reduced chi2 distribution. 129 if distribution == 'red_chi2': 130 error_red_chi2 = api.return_error_red_chi2(data_index) 131 132 # Get the errors. 133 error = api.return_error(data_index) 134 135 # List type data. 136 if isinstance(data, list) or isinstance(data, ndarray): 137 # Loop over the Monte Carlo simulations. 138 random = [] 139 for j in range(cdp.sim_number): 140 # Randomise the data. 141 random.append([]) 142 for k in range(len(data)): 143 # No data or errors. 144 if data[k] == None or error[k] == None: 145 random[j].append(None) 146 continue 147 148 # Gaussian randomisation. 149 if distribution == 'fixed': 150 random[j].append(gauss(data[k], float(fixed_error))) 151 152 else: 153 random[j].append(gauss(data[k], error[k])) 154 155 # Dictionary type data. 156 if isinstance(data, dict): 157 # Loop over the Monte Carlo simulations. 158 random = [] 159 for j in range(cdp.sim_number): 160 # Randomise the data. 161 random.append({}) 162 for id in data: 163 # No data or errors. 164 if data[id] == None or error[id] == None: 165 random[j][id] = None 166 continue 167 168 # If errors are drawn from the reduced chi2 distribution. 169 if distribution == 'red_chi2': 170 # Gaussian randomisation, centered at 0, with width of reduced chi2 distribution. 171 g_error = gauss(0.0, error_red_chi2[id]) 172 173 # We need to scale the gauss error, before adding to datapoint. 174 new_point = data[id] + g_error * error[id] 175 176 # If errors are drawn from fixed distribution. 177 elif distribution == 'fixed': 178 # Gaussian randomisation, centered at data point, with width of fixed error. 179 new_point = gauss(data[id], float(fixed_error)) 180 181 # If errors are drawn from measured values. 182 else: 183 # Gaussian randomisation, centered at data point, with width of measured error. 184 new_point = gauss(data[id], error[id]) 185 186 # Assign datapoint the new value. 187 random[j][id] = new_point 188 189 # Pack the simulation data. 190 api.sim_pack_data(data_index, random)

191 192

193 -def monte_carlo_error_analysis():

194 """Function for calculating errors from the Monte Carlo simulations. 195 196 The standard deviation formula used to calculate the errors is the square root of the 197 bias-corrected variance, given by the formula:: 198 199 __________________________ 200 / 1 201 sd = / ----- * sum({Xi - Xav}^2) 202 \/ n - 1 203 204 where 205 - n is the total number of simulations. 206 - Xi is the parameter value for simulation i. 207 - Xav is the mean parameter value for all simulations. 208 """ 209 210 # Test if the current data pipe exists. 211 check_pipe() 212 213 # Test if simulations have been set up. 214 if not hasattr(cdp, 'sim_state'): 215 raise RelaxError("Monte Carlo simulations have not been set up.") 216 217 # The specific analysis API object. 218 api = return_api() 219 220 # Loop over the models. 221 for model_info in api.model_loop(): 222 # Get the selected simulation array. 223 select_sim = api.sim_return_selected(model_info=model_info) 224 225 # Loop over the parameters. 226 index = 0 227 while True: 228 # Get the array of simulation parameters for the index. 229 param_array = api.sim_return_param(index, model_info=model_info) 230 231 # Break (no more parameters). 232 if param_array == None: 233 break 234 235 # Handle dictionary type parameters. 236 if isinstance(param_array[0], dict): 237 # Initialise the standard deviation structure as a dictionary. 238 sd = {} 239 240 # Loop over each key. 241 for key in param_array[0]: 242 # Create a list of the values for the current key. 243 data = [] 244 for i in range(len(param_array)): 245 data.append(param_array[i][key]) 246 247 # Calculate and store the SD. 248 sd[key] = statistics.std(values=data, skip=select_sim) 249 250 # Handle list type parameters. 251 elif isinstance(param_array[0], list): 252 # Initialise the standard deviation structure as a list. 253 sd = [] 254 255 # Loop over each element. 256 for j in range(len(param_array[0])): 257 # Create a list of the values for the current key. 258 data = [] 259 for i in range(len(param_array)): 260 data.append(param_array[i][j]) 261 262 # Calculate and store the SD. 263 sd.append(statistics.std(values=data, skip=select_sim)) 264 265 # SD of simulation parameters with values (ie not None). 266 elif param_array[0] != None: 267 sd = statistics.std(values=param_array, skip=select_sim) 268 269 # Simulation parameters with the value None. 270 else: 271 sd = None 272 273 # Set the parameter error. 274 api.set_error(index, sd, model_info=model_info) 275 276 # Increment the parameter index. 277 index = index + 1 278 279 # Turn off the Monte Carlo simulation state, as the MC analysis is now finished. 280 cdp.sim_state = False

281 282

283 -def monte_carlo_initial_values():

284 """Set the initial simulation parameter values.""" 285 286 # Test if the current data pipe exists. 287 check_pipe() 288 289 # Test if simulations have been set up. 290 if not hasattr(cdp, 'sim_state'): 291 raise RelaxError("Monte Carlo simulations have not been set up.") 292 293 # The specific analysis API object. 294 api = return_api() 295 296 # Set the initial parameter values. 297 api.sim_init_values()

298 299

300 -def monte_carlo_off():

301 """Turn simulations off.""" 302 303 # Test if the current data pipe exists. 304 check_pipe() 305 306 # Test if simulations have been set up. 307 if not hasattr(cdp, 'sim_state'): 308 raise RelaxError("Monte Carlo simulations have not been set up.") 309 310 # Turn simulations off. 311 cdp.sim_state = False

312 313

314 -def monte_carlo_on():

315 """Turn simulations on.""" 316 317 # Test if the current data pipe exists. 318 check_pipe() 319 320 # Test if simulations have been set up. 321 if not hasattr(cdp, 'sim_state'): 322 raise RelaxError("Monte Carlo simulations have not been set up.") 323 324 # Turn simulations on. 325 cdp.sim_state = True

326 327

328 -def monte_carlo_select_all_sims(number=None, all_select_sim=None):

329 """Set the select flag of all simulations of all models to one. 330 331 @keyword number: The number of Monte Carlo simulations to set up. 332 @type number: int 333 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first 334 dimension of this matrix corresponds to the simulation and the 335 second corresponds to the models. 336 @type all_select_sim: list of lists of bool 337 """ 338 339 # The specific analysis API object. 340 api = return_api() 341 342 # Create the selected simulation array with all simulations selected. 343 if all_select_sim == None: 344 select_sim = [True]*number 345 346 # Loop over the models. 347 i = 0 348 for model_info in api.model_loop(): 349 # Skip function. 350 if api.skip_function(model_info=model_info): 351 continue 352 353 # Set up the selected simulation array. 354 if all_select_sim != None: 355 select_sim = all_select_sim[i] 356 357 # Set the selected simulation array. 358 api.set_selected_sim(select_sim, model_info=model_info) 359 360 # Model index. 361 i += 1

362 363

364 -def monte_carlo_setup(number=None, all_select_sim=None):

365 """Store the Monte Carlo simulation number. 366 367 @keyword number: The number of Monte Carlo simulations to set up. 368 @type number: int 369 @keyword all_select_sim: The selection status of the Monte Carlo simulations. The first dimension of this matrix corresponds to the simulation and the second corresponds to the instance. 370 @type all_select_sim: list of lists of bool 371 """ 372 373 # Test if the current data pipe exists. 374 check_pipe() 375 376 # Check the value. 377 if number < 3: 378 raise RelaxError("A minimum of 3 Monte Carlo simulations is required.") 379 380 # Create a number of MC sim data structures. 381 cdp.sim_number = number 382 cdp.sim_state = True 383 384 # Select all simulations. 385 monte_carlo_select_all_sims(number=number, all_select_sim=all_select_sim)

386

Source Code for Module pipe_control.error_analysis