generic_fns.model

108 """Model selection function. 109 110 @keyword method: The model selection method. This can currently be one of: 111 - 'AIC', Akaike's Information Criteria. 112 - 'AICc', Small sample size corrected AIC. 113 - 'BIC', Bayesian or Schwarz Information Criteria. 114 - 'CV', Single-item-out cross-validation. 115 None of the other model selection techniques are currently supported. 116 @type method: str 117 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 118 @type modsel_pipe: str 119 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 120 @type bundle: str or None 121 @keyword pipes: A list of the data pipes to use in the model selection. 122 @type pipes: list of str 123 """ 124 125 # Test if the pipe already exists. 126 if has_pipe(modsel_pipe): 127 raise RelaxPipeError(modsel_pipe) 128 129 # Use all pipes. 130 if pipes == None: 131 # Get all data pipe names from the relax data store. 132 pipes = pipe_names() 133 134 # Select the model selection technique. 135 if method == 'AIC': 136 print("AIC model selection.") 137 formula = aic 138 elif method == 'AICc': 139 print("AICc model selection.") 140 formula = aicc 141 elif method == 'BIC': 142 print("BIC model selection.") 143 formula = bic 144 elif method == 'CV': 145 print("CV model selection.") 146 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 147 else: 148 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 149 150 # No pipes. 151 if len(pipes) == 0: 152 raise RelaxError("No data pipes are available for use in model selection.") 153 154 # Initialise. 155 function_type = {} 156 model_loop = {} 157 model_type = {} 158 duplicate_data = {} 159 model_statistics = {} 160 skip_function = {} 161 modsel_pipe_exists = False 162 163 # Cross validation setup. 164 if isinstance(pipes[0], list): 165 # No pipes. 166 if len(pipes[0]) == 0: 167 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 168 169 # Loop over the data pipes. 170 for i in range(len(pipes)): 171 for j in range(len(pipes[i])): 172 # Specific functions. 173 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 174 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 175 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 176 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 177 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 178 179 # The model loop should be the same for all data pipes! 180 for i in range(len(pipes)): 181 for j in range(len(pipes[i])): 182 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 183 raise RelaxError("The models for each data pipes should be the same.") 184 model_loop = model_loop[pipes[0][0]] 185 186 # The model description. 187 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 188 189 # Global vs. local models. 190 global_flag = False 191 for i in range(len(pipes)): 192 for j in range(len(pipes[i])): 193 if model_type[pipes[i][j]]() == 'global': 194 global_flag = True 195 196 # All other model selection setup. 197 else: 198 # Loop over the data pipes. 199 for i in range(len(pipes)): 200 # Specific functions. 201 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 202 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 203 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 204 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 205 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 206 207 model_loop = model_loop[pipes[0]] 208 209 # The model description. 210 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 211 212 # Global vs. local models. 213 global_flag = False 214 for j in range(len(pipes)): 215 if model_type[pipes[j]]() == 'global': 216 global_flag = True 217 218 219 # Loop over the base models. 220 for model_info in model_loop(): 221 # Print out. 222 print("\n") 223 desc = model_desc(model_info) 224 if desc: 225 print(desc) 226 227 # Initial model. 228 best_model = None 229 best_crit = 1e300 230 data = [] 231 232 # Loop over the pipes. 233 for j in range(len(pipes)): 234 # Single-item-out cross validation. 235 if method == 'CV': 236 # Sum of chi-squared values. 237 sum_crit = 0.0 238 239 # Loop over the validation samples and sum the chi-squared values. 240 for k in range(len(pipes[j])): 241 # Alias the data pipe name. 242 pipe = pipes[j][k] 243 244 # Switch to this pipe. 245 switch(pipe) 246 247 # Skip function. 248 if skip_function[pipe](model_info): 249 continue 250 251 # Get the model statistics. 252 k, n, chi2 = model_statistics[pipe](model_info) 253 254 # Missing data sets. 255 if k == None or n == None or chi2 == None: 256 continue 257 258 # Chi2 sum. 259 sum_crit = sum_crit + chi2 260 261 # Cross-validation criterion (average chi-squared value). 262 crit = sum_crit / float(len(pipes[j])) 263 264 # Other model selection methods. 265 else: 266 # Reassign the pipe. 267 pipe = pipes[j] 268 269 # Switch to this pipe. 270 switch(pipe) 271 272 # Skip function. 273 if skip_function[pipe](model_info): 274 continue 275 276 # Get the model statistics. 277 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 278 279 # Missing data sets. 280 if k == None or n == None or chi2 == None: 281 continue 282 283 # Calculate the criterion value. 284 crit = formula(chi2, float(k), float(n)) 285 286 # Store the values for a later printout. 287 data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) 288 289 # Select model. 290 if crit < best_crit: 291 best_model = pipe 292 best_crit = crit 293 294 # Write out the table. 295 write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) 296 297 # Duplicate the data from the 'best_model' to the model selection data pipe. 298 if best_model != None: 299 # Print out of selected model. 300 print("The model from the data pipe " + repr(best_model) + " has been selected.") 301 302 # Switch to the selected data pipe. 303 switch(best_model) 304 305 # Duplicate. 306 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 307 308 # Model selection pipe now exists. 309 modsel_pipe_exists = True 310 311 # No model selected. 312 else: 313 # Print out of selected model. 314 print("No model has been selected.") 315 316 # Switch to the model selection pipe. 317 if modsel_pipe_exists: 318 switch(modsel_pipe) 319 320 # Bundle the data pipe. 321 if bundle: 322 generic_fns.pipes.bundle(bundle=bundle, pipe=modsel_pipe)

Source Code for Module generic_fns.model_selection