Package pipe_control :: Module model_selection
[hide private]
[frames] | no frames]

Source Code for Module pipe_control.model_selection

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003-2014 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Module for selecting the best model.""" 
 24   
 25  # Python module imports. 
 26  import sys 
 27   
 28  # relax module imports. 
 29  from lib.errors import RelaxError, RelaxPipeError 
 30  from lib.io import write_data 
 31  from lib.model_selection import aic, aicc, bic 
 32  import pipe_control.pipes 
 33  from pipe_control.pipes import has_pipe, pipe_names, switch 
 34  from specific_analyses.api import return_api 
 35   
 36   
37 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
38 """Model selection function. 39 40 @keyword method: The model selection method. This can currently be one of: 41 - 'AIC', Akaike's Information Criteria. 42 - 'AICc', Small sample size corrected AIC. 43 - 'BIC', Bayesian or Schwarz Information Criteria. 44 - 'CV', Single-item-out cross-validation. 45 None of the other model selection techniques are currently supported. 46 @type method: str 47 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 48 @type modsel_pipe: str 49 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 50 @type bundle: str or None 51 @keyword pipes: A list of the data pipes to use in the model selection. 52 @type pipes: list of str 53 """ 54 55 # Test if the pipe already exists. 56 if has_pipe(modsel_pipe): 57 raise RelaxPipeError(modsel_pipe) 58 59 # Use all pipes. 60 if pipes == None: 61 # Get all data pipe names from the relax data store. 62 pipes = pipe_names() 63 64 # Select the model selection technique. 65 if method == 'AIC': 66 print("AIC model selection.") 67 formula = aic 68 elif method == 'AICc': 69 print("AICc model selection.") 70 formula = aicc 71 elif method == 'BIC': 72 print("BIC model selection.") 73 formula = bic 74 elif method == 'CV': 75 print("CV model selection.") 76 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 77 else: 78 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 79 80 # No pipes. 81 if len(pipes) == 0: 82 raise RelaxError("No data pipes are available for use in model selection.") 83 84 # Initialise. 85 function_type = {} 86 model_loop = {} 87 model_type = {} 88 duplicate_data = {} 89 model_statistics = {} 90 skip_function = {} 91 modsel_pipe_exists = False 92 93 # Cross validation setup. 94 if isinstance(pipes[0], list): 95 # No pipes. 96 if len(pipes[0]) == 0: 97 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 98 99 # Loop over the data pipes. 100 for i in range(len(pipes)): 101 for j in range(len(pipes[i])): 102 # The specific analysis API object. 103 api = return_api(pipe_name=pipes[i][j]) 104 105 # Store the specific functions. 106 model_loop[pipes[i][j]] = api.model_loop 107 model_type[pipes[i][j]] = api.model_type 108 duplicate_data[pipes[i][j]] = api.duplicate_data 109 model_statistics[pipes[i][j]] = api.model_statistics 110 skip_function[pipes[i][j]] = api.skip_function 111 112 # The model loop should be the same for all data pipes! 113 for i in range(len(pipes)): 114 for j in range(len(pipes[i])): 115 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 116 raise RelaxError("The models for each data pipes should be the same.") 117 118 # Alias some function from the specific API of the first data pipe. 119 api = return_api(pipe_name=pipes[0][0]) 120 model_loop = api.model_loop 121 model_desc = api.model_desc 122 123 # Global vs. local models. 124 global_flag = False 125 for i in range(len(pipes)): 126 for j in range(len(pipes[i])): 127 if model_type[pipes[i][j]]() == 'global': 128 global_flag = True 129 130 # All other model selection setup. 131 else: 132 # Loop over the data pipes. 133 for i in range(len(pipes)): 134 # The specific analysis API object. 135 api = return_api() 136 137 # Store the specific functions. 138 model_loop[pipes[i]] = api.model_loop 139 model_type[pipes[i]] = api.model_type 140 duplicate_data[pipes[i]] = api.duplicate_data 141 model_statistics[pipes[i]] = api.model_statistics 142 skip_function[pipes[i]] = api.skip_function 143 144 # Alias some function from the specific API of the first data pipe. 145 api = return_api(pipe_name=pipes[0]) 146 model_loop = api.model_loop 147 model_desc = api.model_desc 148 149 # Global vs. local models. 150 global_flag = False 151 for j in range(len(pipes)): 152 if model_type[pipes[j]]() == 'global': 153 global_flag = True 154 155 156 # Loop over the base models. 157 for model_info in model_loop(): 158 # Print out. 159 print("\n") 160 desc = model_desc(model_info) 161 if desc: 162 print(desc) 163 164 # Initial model. 165 best_model = None 166 best_crit = 1e300 167 data = [] 168 169 # Loop over the pipes. 170 for j in range(len(pipes)): 171 # Single-item-out cross validation. 172 if method == 'CV': 173 # Sum of chi-squared values. 174 sum_crit = 0.0 175 176 # Loop over the validation samples and sum the chi-squared values. 177 for k in range(len(pipes[j])): 178 # Alias the data pipe name. 179 pipe = pipes[j][k] 180 181 # Switch to this pipe. 182 switch(pipe) 183 184 # Skip function. 185 if skip_function[pipe](model_info): 186 continue 187 188 # Get the model statistics. 189 k, n, chi2 = model_statistics[pipe](model_info) 190 191 # Missing data sets. 192 if k == None or n == None or chi2 == None: 193 continue 194 195 # Chi2 sum. 196 sum_crit = sum_crit + chi2 197 198 # Cross-validation criterion (average chi-squared value). 199 crit = sum_crit / float(len(pipes[j])) 200 201 # Other model selection methods. 202 else: 203 # Reassign the pipe. 204 pipe = pipes[j] 205 206 # Switch to this pipe. 207 switch(pipe) 208 209 # Skip function. 210 if skip_function[pipe](model_info): 211 continue 212 213 # Get the model statistics. 214 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 215 216 # Missing data sets. 217 if k == None or n == None or chi2 == None: 218 continue 219 220 # Calculate the criterion value. 221 crit = formula(chi2, float(k), float(n)) 222 223 # Store the values for a later printout. 224 data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) 225 226 # Select model. 227 if crit < best_crit: 228 best_model = pipe 229 best_crit = crit 230 231 # Write out the table. 232 write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) 233 234 # Duplicate the data from the 'best_model' to the model selection data pipe. 235 if best_model != None: 236 # Print out of selected model. 237 print("The model from the data pipe " + repr(best_model) + " has been selected.") 238 239 # Switch to the selected data pipe. 240 switch(best_model) 241 242 # Duplicate. 243 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 244 245 # Model selection pipe now exists. 246 modsel_pipe_exists = True 247 248 # No model selected. 249 else: 250 # Print out of selected model. 251 print("No model has been selected.") 252 253 # Switch to the model selection pipe. 254 if modsel_pipe_exists: 255 switch(modsel_pipe) 256 257 # Bundle the data pipe. 258 if bundle: 259 pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
260