Package generic_fns :: Module model_selection
[hide private]
[frames] | no frames]

Source Code for Module generic_fns.model_selection

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003-2004, 2007-2008 Edward d'Auvergne                        # 
  4  #                                                                             # 
  5  # This file is part of the program relax.                                     # 
  6  #                                                                             # 
  7  # relax is free software; you can redistribute it and/or modify               # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation; either version 2 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # relax is distributed in the hope that it will be useful,                    # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with relax; if not, write to the Free Software                        # 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23  # Module docstring. 
 24  """Module for selecting the best model.""" 
 25   
 26  # Python module imports. 
 27  from math import log 
 28   
 29  # relax module imports. 
 30  from pipes import get_type, has_pipe, pipe_names, switch 
 31  from relax_errors import RelaxError, RelaxPipeError 
 32  from specific_fns.setup import get_specific_fn 
 33   
 34   
35 -def aic(chi2, k, n):
36 """Akaike's Information Criteria (AIC). 37 38 The formula is:: 39 40 AIC = chi2 + 2k 41 42 43 @param chi2: The minimised chi-squared value. 44 @type chi2: float 45 @param k: The number of parameters in the model. 46 @type k: int 47 @param n: The dimension of the relaxation data set. 48 @type n: int 49 @return: The AIC value. 50 @rtype: float 51 """ 52 53 return chi2 + 2.0*k
54 55
56 -def aicc(chi2, k, n):
57 """Small sample size corrected AIC. 58 59 The formula is:: 60 61 2k(k + 1) 62 AICc = chi2 + 2k + --------- 63 n - k - 1 64 65 66 @param chi2: The minimised chi-squared value. 67 @type chi2: float 68 @param k: The number of parameters in the model. 69 @type k: int 70 @param n: The dimension of the relaxation data set. 71 @type n: int 72 @return: The AIC value. 73 @rtype: float 74 """ 75 76 if n > (k+1): 77 return chi2 + 2.0*k + 2.0*k*(k + 1.0) / (n - k - 1.0) 78 elif n == (k+1): 79 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation causes a fatal division by zero as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k)) 80 elif n < (k+1): 81 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation produces a negative, and hence nonsense, AICc score as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k))
82 83
84 -def bic(chi2, k, n):
85 """Bayesian or Schwarz Information Criteria. 86 87 The formula is:: 88 89 BIC = chi2 + k ln n 90 91 92 @param chi2: The minimised chi-squared value. 93 @type chi2: float 94 @param k: The number of parameters in the model. 95 @type k: int 96 @param n: The dimension of the relaxation data set. 97 @type n: int 98 @return: The AIC value. 99 @rtype: float 100 """ 101 102 return chi2 + k * log(n)
103 104
105 -def select(method=None, modsel_pipe=None, pipes=None):
106 """Model selection function. 107 108 @keyword method: The model selection method. This can currently be one of: 109 - 'AIC', Akaike's Information Criteria. 110 - 'AICc', Small sample size corrected AIC. 111 - 'BIC', Bayesian or Schwarz Information Criteria. 112 - 'CV', Single-item-out cross-validation. 113 None of the other model selection techniques are currently supported. 114 @type method: str 115 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected 116 data pipe. 117 @type modsel_pipe: str 118 @keyword pipes: A list of the data pipes to use in the model selection. 119 @type pipes: list of str 120 """ 121 122 # Test if the pipe already exists. 123 if has_pipe(modsel_pipe): 124 raise RelaxPipeError(modsel_pipe) 125 126 # Use all pipes. 127 if pipes == None: 128 # Get all data pipe names from the relax data store. 129 pipes = pipe_names() 130 131 # Select the model selection technique. 132 if method == 'AIC': 133 print("AIC model selection.") 134 formula = aic 135 elif method == 'AICc': 136 print("AICc model selection.") 137 formula = aicc 138 elif method == 'BIC': 139 print("BIC model selection.") 140 formula = bic 141 elif method == 'CV': 142 print("CV model selection.") 143 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 144 else: 145 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 146 147 # No pipes. 148 if len(pipes) == 0: 149 raise RelaxError("No data pipes are available for use in model selection.") 150 151 # Initialise. 152 function_type = {} 153 model_loop = {} 154 model_type = {} 155 duplicate_data = {} 156 model_statistics = {} 157 skip_function = {} 158 modsel_pipe_exists = False 159 160 # Cross validation setup. 161 if isinstance(pipes[0], list): 162 # No pipes. 163 if len(pipes[0]) == 0: 164 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 165 166 # Loop over the data pipes. 167 for i in xrange(len(pipes)): 168 for j in xrange(len(pipes[i])): 169 # Specific functions. 170 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 171 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 172 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 173 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 174 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 175 176 # The model loop should be the same for all data pipes! 177 for i in xrange(len(pipes)): 178 for j in xrange(len(pipes[i])): 179 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 180 raise RelaxError("The models for each data pipes should be the same.") 181 model_loop = model_loop[pipes[0][0]] 182 183 # The model description. 184 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 185 186 # Global vs. local models. 187 global_flag = False 188 for i in xrange(len(pipes)): 189 for j in xrange(len(pipes[i])): 190 if model_type[pipes[i][j]]() == 'global': 191 global_flag = True 192 193 # All other model selection setup. 194 else: 195 # Loop over the data pipes. 196 for i in xrange(len(pipes)): 197 # Specific functions. 198 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 199 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 200 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 201 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 202 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 203 204 model_loop = model_loop[pipes[0]] 205 206 # The model description. 207 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 208 209 # Global vs. local models. 210 global_flag = False 211 for j in xrange(len(pipes)): 212 if model_type[pipes[j]]() == 'global': 213 global_flag = True 214 215 216 # Loop over the base models. 217 for model_info in model_loop(): 218 # Print out. 219 print(("\n" + model_desc(model_info))) 220 print(("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"))) 221 222 # Initial model. 223 best_model = None 224 best_crit = 1e300 225 226 # Loop over the pipes. 227 for j in xrange(len(pipes)): 228 # Single-item-out cross validation. 229 if method == 'CV': 230 # Sum of chi-squared values. 231 sum_crit = 0.0 232 233 # Loop over the validation samples and sum the chi-squared values. 234 for k in xrange(len(pipes[j])): 235 # Alias the data pipe name. 236 pipe = pipes[j][k] 237 238 # Switch to this pipe. 239 switch(pipe) 240 241 # Skip function. 242 if skip_function[pipe](model_info): 243 continue 244 245 # Get the model statistics. 246 k, n, chi2 = model_statistics[pipe](model_info) 247 248 # Missing data sets. 249 if k == None or n == None or chi2 == None: 250 continue 251 252 # Chi2 sum. 253 sum_crit = sum_crit + chi2 254 255 # Cross-validation criterion (average chi-squared value). 256 crit = sum_crit / float(len(pipes[j])) 257 258 # Other model selection methods. 259 else: 260 # Reassign the pipe. 261 pipe = pipes[j] 262 263 # Switch to this pipe. 264 switch(pipe) 265 266 # Skip function. 267 if skip_function[pipe](model_info): 268 continue 269 270 # Get the model statistics. 271 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 272 273 # Missing data sets. 274 if k == None or n == None or chi2 == None: 275 continue 276 277 # Calculate the criterion value. 278 crit = formula(chi2, float(k), float(n)) 279 280 # Print out. 281 print(("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit))) 282 283 # Select model. 284 if crit < best_crit: 285 best_model = pipe 286 best_crit = crit 287 288 # Duplicate the data from the 'best_model' to the model selection data pipe. 289 if best_model != None: 290 # Print out of selected model. 291 print(("The model from the data pipe " + repr(best_model) + " has been selected.")) 292 293 # Switch to the selected data pipe. 294 switch(best_model) 295 296 # Duplicate. 297 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 298 299 # Model selection pipe now exists. 300 modsel_pipe_exists = True 301 302 # No model selected. 303 else: 304 # Print out of selected model. 305 print("No model has been selected.") 306 307 # Switch to the model selection pipe. 308 if modsel_pipe_exists: 309 switch(modsel_pipe)
310