generic_fns.model

106 """Model selection function. 107 108 @keyword method: The model selection method. This can currently be one of: 109 - 'AIC', Akaike's Information Criteria. 110 - 'AICc', Small sample size corrected AIC. 111 - 'BIC', Bayesian or Schwarz Information Criteria. 112 - 'CV', Single-item-out cross-validation. 113 None of the other model selection techniques are currently supported. 114 @type method: str 115 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 116 @type modsel_pipe: str 117 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 118 @type bundle: str or None 119 @keyword pipes: A list of the data pipes to use in the model selection. 120 @type pipes: list of str 121 """ 122 123 # Test if the pipe already exists. 124 if has_pipe(modsel_pipe): 125 raise RelaxPipeError(modsel_pipe) 126 127 # Use all pipes. 128 if pipes == None: 129 # Get all data pipe names from the relax data store. 130 pipes = pipe_names() 131 132 # Select the model selection technique. 133 if method == 'AIC': 134 print("AIC model selection.") 135 formula = aic 136 elif method == 'AICc': 137 print("AICc model selection.") 138 formula = aicc 139 elif method == 'BIC': 140 print("BIC model selection.") 141 formula = bic 142 elif method == 'CV': 143 print("CV model selection.") 144 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 145 else: 146 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 147 148 # No pipes. 149 if len(pipes) == 0: 150 raise RelaxError("No data pipes are available for use in model selection.") 151 152 # Initialise. 153 function_type = {} 154 model_loop = {} 155 model_type = {} 156 duplicate_data = {} 157 model_statistics = {} 158 skip_function = {} 159 modsel_pipe_exists = False 160 161 # Cross validation setup. 162 if isinstance(pipes[0], list): 163 # No pipes. 164 if len(pipes[0]) == 0: 165 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 166 167 # Loop over the data pipes. 168 for i in range(len(pipes)): 169 for j in range(len(pipes[i])): 170 # Specific functions. 171 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 172 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 173 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 174 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 175 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 176 177 # The model loop should be the same for all data pipes! 178 for i in range(len(pipes)): 179 for j in range(len(pipes[i])): 180 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 181 raise RelaxError("The models for each data pipes should be the same.") 182 model_loop = model_loop[pipes[0][0]] 183 184 # The model description. 185 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 186 187 # Global vs. local models. 188 global_flag = False 189 for i in range(len(pipes)): 190 for j in range(len(pipes[i])): 191 if model_type[pipes[i][j]]() == 'global': 192 global_flag = True 193 194 # All other model selection setup. 195 else: 196 # Loop over the data pipes. 197 for i in range(len(pipes)): 198 # Specific functions. 199 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 200 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 201 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 202 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 203 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 204 205 model_loop = model_loop[pipes[0]] 206 207 # The model description. 208 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 209 210 # Global vs. local models. 211 global_flag = False 212 for j in range(len(pipes)): 213 if model_type[pipes[j]]() == 'global': 214 global_flag = True 215 216 217 # Loop over the base models. 218 for model_info in model_loop(): 219 # Print out. 220 print("\n" + model_desc(model_info)) 221 print("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion")) 222 223 # Initial model. 224 best_model = None 225 best_crit = 1e300 226 227 # Loop over the pipes. 228 for j in range(len(pipes)): 229 # Single-item-out cross validation. 230 if method == 'CV': 231 # Sum of chi-squared values. 232 sum_crit = 0.0 233 234 # Loop over the validation samples and sum the chi-squared values. 235 for k in range(len(pipes[j])): 236 # Alias the data pipe name. 237 pipe = pipes[j][k] 238 239 # Switch to this pipe. 240 switch(pipe) 241 242 # Skip function. 243 if skip_function[pipe](model_info): 244 continue 245 246 # Get the model statistics. 247 k, n, chi2 = model_statistics[pipe](model_info) 248 249 # Missing data sets. 250 if k == None or n == None or chi2 == None: 251 continue 252 253 # Chi2 sum. 254 sum_crit = sum_crit + chi2 255 256 # Cross-validation criterion (average chi-squared value). 257 crit = sum_crit / float(len(pipes[j])) 258 259 # Other model selection methods. 260 else: 261 # Reassign the pipe. 262 pipe = pipes[j] 263 264 # Switch to this pipe. 265 switch(pipe) 266 267 # Skip function. 268 if skip_function[pipe](model_info): 269 continue 270 271 # Get the model statistics. 272 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 273 274 # Missing data sets. 275 if k == None or n == None or chi2 == None: 276 continue 277 278 # Calculate the criterion value. 279 crit = formula(chi2, float(k), float(n)) 280 281 # Print out. 282 print("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit)) 283 284 # Select model. 285 if crit < best_crit: 286 best_model = pipe 287 best_crit = crit 288 289 # Duplicate the data from the 'best_model' to the model selection data pipe. 290 if best_model != None: 291 # Print out of selected model. 292 print("The model from the data pipe " + repr(best_model) + " has been selected.") 293 294 # Switch to the selected data pipe. 295 switch(best_model) 296 297 # Duplicate. 298 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 299 300 # Model selection pipe now exists. 301 modsel_pipe_exists = True 302 303 # No model selected. 304 else: 305 # Print out of selected model. 306 print("No model has been selected.") 307 308 # Switch to the model selection pipe. 309 if modsel_pipe_exists: 310 switch(modsel_pipe) 311 312 # Bundle the data pipe. 313 if bundle: 314 generic_fns.pipes.bundle(bundle=bundle, pipe=modsel_pipe)

Source Code for Module generic_fns.model_selection