Package generic_fns :: Module model_selection
[hide private]
[frames] | no frames]

Source Code for Module generic_fns.model_selection

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003-2012 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Module for selecting the best model.""" 
 24   
 25  # Python module imports. 
 26  from math import log 
 27   
 28  # relax module imports. 
 29  import generic_fns.pipes 
 30  from generic_fns.pipes import get_type, has_pipe, pipe_names, switch 
 31  from relax_errors import RelaxError, RelaxPipeError 
 32  from specific_fns.setup import get_specific_fn 
 33   
 34   
35 -def aic(chi2, k, n):
36 """Akaike's Information Criteria (AIC). 37 38 The formula is:: 39 40 AIC = chi2 + 2k 41 42 43 @param chi2: The minimised chi-squared value. 44 @type chi2: float 45 @param k: The number of parameters in the model. 46 @type k: int 47 @param n: The dimension of the relaxation data set. 48 @type n: int 49 @return: The AIC value. 50 @rtype: float 51 """ 52 53 return chi2 + 2.0*k
54 55
56 -def aicc(chi2, k, n):
57 """Small sample size corrected AIC. 58 59 The formula is:: 60 61 2k(k + 1) 62 AICc = chi2 + 2k + --------- 63 n - k - 1 64 65 66 @param chi2: The minimised chi-squared value. 67 @type chi2: float 68 @param k: The number of parameters in the model. 69 @type k: int 70 @param n: The dimension of the relaxation data set. 71 @type n: int 72 @return: The AIC value. 73 @rtype: float 74 """ 75 76 if n > (k+1): 77 return chi2 + 2.0*k + 2.0*k*(k + 1.0) / (n - k - 1.0) 78 elif n == (k+1): 79 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation causes a fatal division by zero as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k)) 80 elif n < (k+1): 81 raise RelaxError("The size of the dataset, n=%s, is too small for this model of size k=%s. This situation produces a negative, and hence nonsense, AICc score as:\n AICc = chi2 + 2k + 2k*(k + 1) / (n - k - 1).\n\nPlease use AIC model selection instead." % (n, k))
82 83
84 -def bic(chi2, k, n):
85 """Bayesian or Schwarz Information Criteria. 86 87 The formula is:: 88 89 BIC = chi2 + k ln n 90 91 92 @param chi2: The minimised chi-squared value. 93 @type chi2: float 94 @param k: The number of parameters in the model. 95 @type k: int 96 @param n: The dimension of the relaxation data set. 97 @type n: int 98 @return: The AIC value. 99 @rtype: float 100 """ 101 102 return chi2 + k * log(n)
103 104
105 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
106 """Model selection function. 107 108 @keyword method: The model selection method. This can currently be one of: 109 - 'AIC', Akaike's Information Criteria. 110 - 'AICc', Small sample size corrected AIC. 111 - 'BIC', Bayesian or Schwarz Information Criteria. 112 - 'CV', Single-item-out cross-validation. 113 None of the other model selection techniques are currently supported. 114 @type method: str 115 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 116 @type modsel_pipe: str 117 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 118 @type bundle: str or None 119 @keyword pipes: A list of the data pipes to use in the model selection. 120 @type pipes: list of str 121 """ 122 123 # Test if the pipe already exists. 124 if has_pipe(modsel_pipe): 125 raise RelaxPipeError(modsel_pipe) 126 127 # Use all pipes. 128 if pipes == None: 129 # Get all data pipe names from the relax data store. 130 pipes = pipe_names() 131 132 # Select the model selection technique. 133 if method == 'AIC': 134 print("AIC model selection.") 135 formula = aic 136 elif method == 'AICc': 137 print("AICc model selection.") 138 formula = aicc 139 elif method == 'BIC': 140 print("BIC model selection.") 141 formula = bic 142 elif method == 'CV': 143 print("CV model selection.") 144 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 145 else: 146 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 147 148 # No pipes. 149 if len(pipes) == 0: 150 raise RelaxError("No data pipes are available for use in model selection.") 151 152 # Initialise. 153 function_type = {} 154 model_loop = {} 155 model_type = {} 156 duplicate_data = {} 157 model_statistics = {} 158 skip_function = {} 159 modsel_pipe_exists = False 160 161 # Cross validation setup. 162 if isinstance(pipes[0], list): 163 # No pipes. 164 if len(pipes[0]) == 0: 165 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 166 167 # Loop over the data pipes. 168 for i in range(len(pipes)): 169 for j in range(len(pipes[i])): 170 # Specific functions. 171 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 172 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 173 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 174 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 175 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 176 177 # The model loop should be the same for all data pipes! 178 for i in range(len(pipes)): 179 for j in range(len(pipes[i])): 180 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 181 raise RelaxError("The models for each data pipes should be the same.") 182 model_loop = model_loop[pipes[0][0]] 183 184 # The model description. 185 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 186 187 # Global vs. local models. 188 global_flag = False 189 for i in range(len(pipes)): 190 for j in range(len(pipes[i])): 191 if model_type[pipes[i][j]]() == 'global': 192 global_flag = True 193 194 # All other model selection setup. 195 else: 196 # Loop over the data pipes. 197 for i in range(len(pipes)): 198 # Specific functions. 199 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 200 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 201 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 202 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 203 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 204 205 model_loop = model_loop[pipes[0]] 206 207 # The model description. 208 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 209 210 # Global vs. local models. 211 global_flag = False 212 for j in range(len(pipes)): 213 if model_type[pipes[j]]() == 'global': 214 global_flag = True 215 216 217 # Loop over the base models. 218 for model_info in model_loop(): 219 # Print out. 220 print("\n" + model_desc(model_info)) 221 print("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion")) 222 223 # Initial model. 224 best_model = None 225 best_crit = 1e300 226 227 # Loop over the pipes. 228 for j in range(len(pipes)): 229 # Single-item-out cross validation. 230 if method == 'CV': 231 # Sum of chi-squared values. 232 sum_crit = 0.0 233 234 # Loop over the validation samples and sum the chi-squared values. 235 for k in range(len(pipes[j])): 236 # Alias the data pipe name. 237 pipe = pipes[j][k] 238 239 # Switch to this pipe. 240 switch(pipe) 241 242 # Skip function. 243 if skip_function[pipe](model_info): 244 continue 245 246 # Get the model statistics. 247 k, n, chi2 = model_statistics[pipe](model_info) 248 249 # Missing data sets. 250 if k == None or n == None or chi2 == None: 251 continue 252 253 # Chi2 sum. 254 sum_crit = sum_crit + chi2 255 256 # Cross-validation criterion (average chi-squared value). 257 crit = sum_crit / float(len(pipes[j])) 258 259 # Other model selection methods. 260 else: 261 # Reassign the pipe. 262 pipe = pipes[j] 263 264 # Switch to this pipe. 265 switch(pipe) 266 267 # Skip function. 268 if skip_function[pipe](model_info): 269 continue 270 271 # Get the model statistics. 272 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 273 274 # Missing data sets. 275 if k == None or n == None or chi2 == None: 276 continue 277 278 # Calculate the criterion value. 279 crit = formula(chi2, float(k), float(n)) 280 281 # Print out. 282 print("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit)) 283 284 # Select model. 285 if crit < best_crit: 286 best_model = pipe 287 best_crit = crit 288 289 # Duplicate the data from the 'best_model' to the model selection data pipe. 290 if best_model != None: 291 # Print out of selected model. 292 print("The model from the data pipe " + repr(best_model) + " has been selected.") 293 294 # Switch to the selected data pipe. 295 switch(best_model) 296 297 # Duplicate. 298 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 299 300 # Model selection pipe now exists. 301 modsel_pipe_exists = True 302 303 # No model selected. 304 else: 305 # Print out of selected model. 306 print("No model has been selected.") 307 308 # Switch to the model selection pipe. 309 if modsel_pipe_exists: 310 switch(modsel_pipe) 311 312 # Bundle the data pipe. 313 if bundle: 314 generic_fns.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
315