generic_fns.model

107 """Model selection function. 108 109 @keyword method: The model selection method. This can currently be one of: 110 - 'AIC', Akaike's Information Criteria. 111 - 'AICc', Small sample size corrected AIC. 112 - 'BIC', Bayesian or Schwarz Information Criteria. 113 - 'CV', Single-item-out cross-validation. 114 None of the other model selection techniques are currently supported. 115 @type method: str 116 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 117 @type modsel_pipe: str 118 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 119 @type bundle: str or None 120 @keyword pipes: A list of the data pipes to use in the model selection. 121 @type pipes: list of str 122 """ 123 124 # Test if the pipe already exists. 125 if has_pipe(modsel_pipe): 126 raise RelaxPipeError(modsel_pipe) 127 128 # Use all pipes. 129 if pipes == None: 130 # Get all data pipe names from the relax data store. 131 pipes = pipe_names() 132 133 # Select the model selection technique. 134 if method == 'AIC': 135 print("AIC model selection.") 136 formula = aic 137 elif method == 'AICc': 138 print("AICc model selection.") 139 formula = aicc 140 elif method == 'BIC': 141 print("BIC model selection.") 142 formula = bic 143 elif method == 'CV': 144 print("CV model selection.") 145 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 146 else: 147 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 148 149 # No pipes. 150 if len(pipes) == 0: 151 raise RelaxError("No data pipes are available for use in model selection.") 152 153 # Initialise. 154 function_type = {} 155 model_loop = {} 156 model_type = {} 157 duplicate_data = {} 158 model_statistics = {} 159 skip_function = {} 160 modsel_pipe_exists = False 161 162 # Cross validation setup. 163 if isinstance(pipes[0], list): 164 # No pipes. 165 if len(pipes[0]) == 0: 166 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 167 168 # Loop over the data pipes. 169 for i in xrange(len(pipes)): 170 for j in xrange(len(pipes[i])): 171 # Specific functions. 172 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 173 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 174 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 175 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 176 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 177 178 # The model loop should be the same for all data pipes! 179 for i in xrange(len(pipes)): 180 for j in xrange(len(pipes[i])): 181 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 182 raise RelaxError("The models for each data pipes should be the same.") 183 model_loop = model_loop[pipes[0][0]] 184 185 # The model description. 186 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 187 188 # Global vs. local models. 189 global_flag = False 190 for i in xrange(len(pipes)): 191 for j in xrange(len(pipes[i])): 192 if model_type[pipes[i][j]]() == 'global': 193 global_flag = True 194 195 # All other model selection setup. 196 else: 197 # Loop over the data pipes. 198 for i in xrange(len(pipes)): 199 # Specific functions. 200 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 201 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 202 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 203 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 204 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 205 206 model_loop = model_loop[pipes[0]] 207 208 # The model description. 209 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 210 211 # Global vs. local models. 212 global_flag = False 213 for j in xrange(len(pipes)): 214 if model_type[pipes[j]]() == 'global': 215 global_flag = True 216 217 218 # Loop over the base models. 219 for model_info in model_loop(): 220 # Print out. 221 print(("\n" + model_desc(model_info))) 222 print(("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"))) 223 224 # Initial model. 225 best_model = None 226 best_crit = 1e300 227 228 # Loop over the pipes. 229 for j in xrange(len(pipes)): 230 # Single-item-out cross validation. 231 if method == 'CV': 232 # Sum of chi-squared values. 233 sum_crit = 0.0 234 235 # Loop over the validation samples and sum the chi-squared values. 236 for k in xrange(len(pipes[j])): 237 # Alias the data pipe name. 238 pipe = pipes[j][k] 239 240 # Switch to this pipe. 241 switch(pipe) 242 243 # Skip function. 244 if skip_function[pipe](model_info): 245 continue 246 247 # Get the model statistics. 248 k, n, chi2 = model_statistics[pipe](model_info) 249 250 # Missing data sets. 251 if k == None or n == None or chi2 == None: 252 continue 253 254 # Chi2 sum. 255 sum_crit = sum_crit + chi2 256 257 # Cross-validation criterion (average chi-squared value). 258 crit = sum_crit / float(len(pipes[j])) 259 260 # Other model selection methods. 261 else: 262 # Reassign the pipe. 263 pipe = pipes[j] 264 265 # Switch to this pipe. 266 switch(pipe) 267 268 # Skip function. 269 if skip_function[pipe](model_info): 270 continue 271 272 # Get the model statistics. 273 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 274 275 # Missing data sets. 276 if k == None or n == None or chi2 == None: 277 continue 278 279 # Calculate the criterion value. 280 crit = formula(chi2, float(k), float(n)) 281 282 # Print out. 283 print(("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit))) 284 285 # Select model. 286 if crit < best_crit: 287 best_model = pipe 288 best_crit = crit 289 290 # Duplicate the data from the 'best_model' to the model selection data pipe. 291 if best_model != None: 292 # Print out of selected model. 293 print(("The model from the data pipe " + repr(best_model) + " has been selected.")) 294 295 # Switch to the selected data pipe. 296 switch(best_model) 297 298 # Duplicate. 299 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 300 301 # Model selection pipe now exists. 302 modsel_pipe_exists = True 303 304 # No model selected. 305 else: 306 # Print out of selected model. 307 print("No model has been selected.") 308 309 # Switch to the model selection pipe. 310 if modsel_pipe_exists: 311 switch(modsel_pipe) 312 313 # Bundle the data pipe. 314 if bundle: 315 generic_fns.pipes.bundle(bundle=bundle, pipe=modsel_pipe)

Source Code for Module generic_fns.model_selection