Package generic_fns :: Module model_selection
[hide private]
[frames] | no frames]

Source Code for Module generic_fns.model_selection

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003-2012 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax.                                     # 
  6  #                                                                             # 
  7  # relax is free software; you can redistribute it and/or modify               # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation; either version 2 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # relax is distributed in the hope that it will be useful,                    # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with relax; if not, write to the Free Software                        # 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23  # Module docstring. 
 24  """Module for selecting the best model.""" 
 25   
 26  # Python module imports. 
 27  from math import log 
 28   
 29  # relax module imports. 
 30  import generic_fns.pipes 
 31  from generic_fns.pipes import get_type, has_pipe, pipe_names, switch 
 32  from relax_errors import RelaxError, RelaxPipeError 
 33  from specific_fns.setup import get_specific_fn 
 34   
 35   
36 -def aic(chi2, k, n):
37 """Akaike's Information Criteria (AIC). 38 39 The formula is:: 40 41 AIC = chi2 + 2k 42 43 44 @param chi2: The minimised chi-squared value. 45 @type chi2: float 46 @param k: The number of parameters in the model. 47 @type k: int 48 @param n: The dimension of the relaxation data set. 49 @type n: int 50 @return: The AIC value. 51 @rtype: float 52 """ 53 54 return chi2 + 2.0*k
55 56
57 -def aicc(chi2, k, n):
58 """Small sample size corrected AIC. 59 60 The formula is:: 61 62 2k(k + 1) 63 AICc = chi2 + 2k + --------- 64 n - k - 1 65 66 67 @param chi2: The minimised chi-squared value. 68 @type chi2: float 69 @param k: The number of parameters in the model. 70 @type k: int 71 @param n: The dimension of the relaxation data set. 72 @type n: int 73 @return: The AIC value. 74 @rtype: float 75 """ 76 77 if n > (k+1): 78 return chi2 + 2.0*k + 2.0*k*(k + 1.0) / (n - k - 1.0) 79 elif n == (k+1): 80 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation causes a fatal division by zero as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k)) 81 elif n < (k+1): 82 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation produces a negative, and hence nonsense, AICc score as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k))
83 84
85 -def bic(chi2, k, n):
86 """Bayesian or Schwarz Information Criteria. 87 88 The formula is:: 89 90 BIC = chi2 + k ln n 91 92 93 @param chi2: The minimised chi-squared value. 94 @type chi2: float 95 @param k: The number of parameters in the model. 96 @type k: int 97 @param n: The dimension of the relaxation data set. 98 @type n: int 99 @return: The AIC value. 100 @rtype: float 101 """ 102 103 return chi2 + k * log(n)
104 105
106 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
107 """Model selection function. 108 109 @keyword method: The model selection method. This can currently be one of: 110 - 'AIC', Akaike's Information Criteria. 111 - 'AICc', Small sample size corrected AIC. 112 - 'BIC', Bayesian or Schwarz Information Criteria. 113 - 'CV', Single-item-out cross-validation. 114 None of the other model selection techniques are currently supported. 115 @type method: str 116 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 117 @type modsel_pipe: str 118 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 119 @type bundle: str or None 120 @keyword pipes: A list of the data pipes to use in the model selection. 121 @type pipes: list of str 122 """ 123 124 # Test if the pipe already exists. 125 if has_pipe(modsel_pipe): 126 raise RelaxPipeError(modsel_pipe) 127 128 # Use all pipes. 129 if pipes == None: 130 # Get all data pipe names from the relax data store. 131 pipes = pipe_names() 132 133 # Select the model selection technique. 134 if method == 'AIC': 135 print("AIC model selection.") 136 formula = aic 137 elif method == 'AICc': 138 print("AICc model selection.") 139 formula = aicc 140 elif method == 'BIC': 141 print("BIC model selection.") 142 formula = bic 143 elif method == 'CV': 144 print("CV model selection.") 145 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 146 else: 147 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 148 149 # No pipes. 150 if len(pipes) == 0: 151 raise RelaxError("No data pipes are available for use in model selection.") 152 153 # Initialise. 154 function_type = {} 155 model_loop = {} 156 model_type = {} 157 duplicate_data = {} 158 model_statistics = {} 159 skip_function = {} 160 modsel_pipe_exists = False 161 162 # Cross validation setup. 163 if isinstance(pipes[0], list): 164 # No pipes. 165 if len(pipes[0]) == 0: 166 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 167 168 # Loop over the data pipes. 169 for i in xrange(len(pipes)): 170 for j in xrange(len(pipes[i])): 171 # Specific functions. 172 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 173 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 174 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 175 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 176 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 177 178 # The model loop should be the same for all data pipes! 179 for i in xrange(len(pipes)): 180 for j in xrange(len(pipes[i])): 181 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 182 raise RelaxError("The models for each data pipes should be the same.") 183 model_loop = model_loop[pipes[0][0]] 184 185 # The model description. 186 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 187 188 # Global vs. local models. 189 global_flag = False 190 for i in xrange(len(pipes)): 191 for j in xrange(len(pipes[i])): 192 if model_type[pipes[i][j]]() == 'global': 193 global_flag = True 194 195 # All other model selection setup. 196 else: 197 # Loop over the data pipes. 198 for i in xrange(len(pipes)): 199 # Specific functions. 200 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 201 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 202 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 203 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 204 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 205 206 model_loop = model_loop[pipes[0]] 207 208 # The model description. 209 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 210 211 # Global vs. local models. 212 global_flag = False 213 for j in xrange(len(pipes)): 214 if model_type[pipes[j]]() == 'global': 215 global_flag = True 216 217 218 # Loop over the base models. 219 for model_info in model_loop(): 220 # Print out. 221 print(("\n" + model_desc(model_info))) 222 print(("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"))) 223 224 # Initial model. 225 best_model = None 226 best_crit = 1e300 227 228 # Loop over the pipes. 229 for j in xrange(len(pipes)): 230 # Single-item-out cross validation. 231 if method == 'CV': 232 # Sum of chi-squared values. 233 sum_crit = 0.0 234 235 # Loop over the validation samples and sum the chi-squared values. 236 for k in xrange(len(pipes[j])): 237 # Alias the data pipe name. 238 pipe = pipes[j][k] 239 240 # Switch to this pipe. 241 switch(pipe) 242 243 # Skip function. 244 if skip_function[pipe](model_info): 245 continue 246 247 # Get the model statistics. 248 k, n, chi2 = model_statistics[pipe](model_info) 249 250 # Missing data sets. 251 if k == None or n == None or chi2 == None: 252 continue 253 254 # Chi2 sum. 255 sum_crit = sum_crit + chi2 256 257 # Cross-validation criterion (average chi-squared value). 258 crit = sum_crit / float(len(pipes[j])) 259 260 # Other model selection methods. 261 else: 262 # Reassign the pipe. 263 pipe = pipes[j] 264 265 # Switch to this pipe. 266 switch(pipe) 267 268 # Skip function. 269 if skip_function[pipe](model_info): 270 continue 271 272 # Get the model statistics. 273 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 274 275 # Missing data sets. 276 if k == None or n == None or chi2 == None: 277 continue 278 279 # Calculate the criterion value. 280 crit = formula(chi2, float(k), float(n)) 281 282 # Print out. 283 print(("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit))) 284 285 # Select model. 286 if crit < best_crit: 287 best_model = pipe 288 best_crit = crit 289 290 # Duplicate the data from the 'best_model' to the model selection data pipe. 291 if best_model != None: 292 # Print out of selected model. 293 print(("The model from the data pipe " + repr(best_model) + " has been selected.")) 294 295 # Switch to the selected data pipe. 296 switch(best_model) 297 298 # Duplicate. 299 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 300 301 # Model selection pipe now exists. 302 modsel_pipe_exists = True 303 304 # No model selected. 305 else: 306 # Print out of selected model. 307 print("No model has been selected.") 308 309 # Switch to the model selection pipe. 310 if modsel_pipe_exists: 311 switch(modsel_pipe) 312 313 # Bundle the data pipe. 314 if bundle: 315 generic_fns.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
316