generic_fns.model

106 """Model selection function. 107 108 @keyword method: The model selection method. This can currently be one of: 109 - 'AIC', Akaike's Information Criteria. 110 - 'AICc', Small sample size corrected AIC. 111 - 'BIC', Bayesian or Schwarz Information Criteria. 112 - 'CV', Single-item-out cross-validation. 113 None of the other model selection techniques are currently supported. 114 @type method: str 115 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected 116 data pipe. 117 @type modsel_pipe: str 118 @keyword pipes: A list of the data pipes to use in the model selection. 119 @type pipes: list of str 120 """ 121 122 # Test if the pipe already exists. 123 if has_pipe(modsel_pipe): 124 raise RelaxPipeError(modsel_pipe) 125 126 # Use all pipes. 127 if pipes == None: 128 # Get all data pipe names from the relax data store. 129 pipes = pipe_names() 130 131 # Select the model selection technique. 132 if method == 'AIC': 133 print("AIC model selection.") 134 formula = aic 135 elif method == 'AICc': 136 print("AICc model selection.") 137 formula = aicc 138 elif method == 'BIC': 139 print("BIC model selection.") 140 formula = bic 141 elif method == 'CV': 142 print("CV model selection.") 143 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 144 else: 145 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 146 147 # No pipes. 148 if len(pipes) == 0: 149 raise RelaxError("No data pipes are available for use in model selection.") 150 151 # Initialise. 152 function_type = {} 153 model_loop = {} 154 model_type = {} 155 duplicate_data = {} 156 model_statistics = {} 157 skip_function = {} 158 modsel_pipe_exists = False 159 160 # Cross validation setup. 161 if isinstance(pipes[0], list): 162 # No pipes. 163 if len(pipes[0]) == 0: 164 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 165 166 # Loop over the data pipes. 167 for i in xrange(len(pipes)): 168 for j in xrange(len(pipes[i])): 169 # Specific functions. 170 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 171 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 172 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 173 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 174 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 175 176 # The model loop should be the same for all data pipes! 177 for i in xrange(len(pipes)): 178 for j in xrange(len(pipes[i])): 179 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 180 raise RelaxError("The models for each data pipes should be the same.") 181 model_loop = model_loop[pipes[0][0]] 182 183 # The model description. 184 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 185 186 # Global vs. local models. 187 global_flag = False 188 for i in xrange(len(pipes)): 189 for j in xrange(len(pipes[i])): 190 if model_type[pipes[i][j]]() == 'global': 191 global_flag = True 192 193 # All other model selection setup. 194 else: 195 # Loop over the data pipes. 196 for i in xrange(len(pipes)): 197 # Specific functions. 198 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 199 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 200 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 201 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 202 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 203 204 model_loop = model_loop[pipes[0]] 205 206 # The model description. 207 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 208 209 # Global vs. local models. 210 global_flag = False 211 for j in xrange(len(pipes)): 212 if model_type[pipes[j]]() == 'global': 213 global_flag = True 214 215 216 # Loop over the base models. 217 for model_info in model_loop(): 218 # Print out. 219 print(("\n" + model_desc(model_info))) 220 print(("%-20s %-20s %-20s %-20s %-20s" % ("Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"))) 221 222 # Initial model. 223 best_model = None 224 best_crit = 1e300 225 226 # Loop over the pipes. 227 for j in xrange(len(pipes)): 228 # Single-item-out cross validation. 229 if method == 'CV': 230 # Sum of chi-squared values. 231 sum_crit = 0.0 232 233 # Loop over the validation samples and sum the chi-squared values. 234 for k in xrange(len(pipes[j])): 235 # Alias the data pipe name. 236 pipe = pipes[j][k] 237 238 # Switch to this pipe. 239 switch(pipe) 240 241 # Skip function. 242 if skip_function[pipe](model_info): 243 continue 244 245 # Get the model statistics. 246 k, n, chi2 = model_statistics[pipe](model_info) 247 248 # Missing data sets. 249 if k == None or n == None or chi2 == None: 250 continue 251 252 # Chi2 sum. 253 sum_crit = sum_crit + chi2 254 255 # Cross-validation criterion (average chi-squared value). 256 crit = sum_crit / float(len(pipes[j])) 257 258 # Other model selection methods. 259 else: 260 # Reassign the pipe. 261 pipe = pipes[j] 262 263 # Switch to this pipe. 264 switch(pipe) 265 266 # Skip function. 267 if skip_function[pipe](model_info): 268 continue 269 270 # Get the model statistics. 271 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 272 273 # Missing data sets. 274 if k == None or n == None or chi2 == None: 275 continue 276 277 # Calculate the criterion value. 278 crit = formula(chi2, float(k), float(n)) 279 280 # Print out. 281 print(("%-20s %-20i %-20i %-20.5f %-20.5f" % (pipe, k, n, chi2, crit))) 282 283 # Select model. 284 if crit < best_crit: 285 best_model = pipe 286 best_crit = crit 287 288 # Duplicate the data from the 'best_model' to the model selection data pipe. 289 if best_model != None: 290 # Print out of selected model. 291 print(("The model from the data pipe " + repr(best_model) + " has been selected.")) 292 293 # Switch to the selected data pipe. 294 switch(best_model) 295 296 # Duplicate. 297 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 298 299 # Model selection pipe now exists. 300 modsel_pipe_exists = True 301 302 # No model selected. 303 else: 304 # Print out of selected model. 305 print("No model has been selected.") 306 307 # Switch to the model selection pipe. 308 if modsel_pipe_exists: 309 switch(modsel_pipe)

Source Code for Module generic_fns.model_selection