Package pipe_control :: Module model_selection
[hide private]
[frames] | no frames]

Source Code for Module pipe_control.model_selection

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003-2013 Edward d'Auvergne                                   # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Module for selecting the best model.""" 
 24   
 25  # Python module imports. 
 26  import sys 
 27   
 28  # relax module imports. 
 29  from lib.errors import RelaxError, RelaxPipeError 
 30  from lib.io import write_data 
 31  from lib.model_selection import aic, aicc, bic 
 32  import pipe_control.pipes 
 33  from pipe_control.pipes import get_type, has_pipe, pipe_names, switch 
 34  from specific_analyses.setup import get_specific_fn 
 35   
 36   
37 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
38 """Model selection function. 39 40 @keyword method: The model selection method. This can currently be one of: 41 - 'AIC', Akaike's Information Criteria. 42 - 'AICc', Small sample size corrected AIC. 43 - 'BIC', Bayesian or Schwarz Information Criteria. 44 - 'CV', Single-item-out cross-validation. 45 None of the other model selection techniques are currently supported. 46 @type method: str 47 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe. 48 @type modsel_pipe: str 49 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with. 50 @type bundle: str or None 51 @keyword pipes: A list of the data pipes to use in the model selection. 52 @type pipes: list of str 53 """ 54 55 # Test if the pipe already exists. 56 if has_pipe(modsel_pipe): 57 raise RelaxPipeError(modsel_pipe) 58 59 # Use all pipes. 60 if pipes == None: 61 # Get all data pipe names from the relax data store. 62 pipes = pipe_names() 63 64 # Select the model selection technique. 65 if method == 'AIC': 66 print("AIC model selection.") 67 formula = aic 68 elif method == 'AICc': 69 print("AICc model selection.") 70 formula = aicc 71 elif method == 'BIC': 72 print("BIC model selection.") 73 formula = bic 74 elif method == 'CV': 75 print("CV model selection.") 76 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 77 else: 78 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 79 80 # No pipes. 81 if len(pipes) == 0: 82 raise RelaxError("No data pipes are available for use in model selection.") 83 84 # Initialise. 85 function_type = {} 86 model_loop = {} 87 model_type = {} 88 duplicate_data = {} 89 model_statistics = {} 90 skip_function = {} 91 modsel_pipe_exists = False 92 93 # Cross validation setup. 94 if isinstance(pipes[0], list): 95 # No pipes. 96 if len(pipes[0]) == 0: 97 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 98 99 # Loop over the data pipes. 100 for i in range(len(pipes)): 101 for j in range(len(pipes[i])): 102 # Specific functions. 103 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 104 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 105 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 106 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 107 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 108 109 # The model loop should be the same for all data pipes! 110 for i in range(len(pipes)): 111 for j in range(len(pipes[i])): 112 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 113 raise RelaxError("The models for each data pipes should be the same.") 114 model_loop = model_loop[pipes[0][0]] 115 116 # The model description. 117 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 118 119 # Global vs. local models. 120 global_flag = False 121 for i in range(len(pipes)): 122 for j in range(len(pipes[i])): 123 if model_type[pipes[i][j]]() == 'global': 124 global_flag = True 125 126 # All other model selection setup. 127 else: 128 # Loop over the data pipes. 129 for i in range(len(pipes)): 130 # Specific functions. 131 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 132 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 133 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 134 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 135 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 136 137 model_loop = model_loop[pipes[0]] 138 139 # The model description. 140 model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 141 142 # Global vs. local models. 143 global_flag = False 144 for j in range(len(pipes)): 145 if model_type[pipes[j]]() == 'global': 146 global_flag = True 147 148 149 # Loop over the base models. 150 for model_info in model_loop(): 151 # Print out. 152 print("\n") 153 desc = model_desc(model_info) 154 if desc: 155 print(desc) 156 157 # Initial model. 158 best_model = None 159 best_crit = 1e300 160 data = [] 161 162 # Loop over the pipes. 163 for j in range(len(pipes)): 164 # Single-item-out cross validation. 165 if method == 'CV': 166 # Sum of chi-squared values. 167 sum_crit = 0.0 168 169 # Loop over the validation samples and sum the chi-squared values. 170 for k in range(len(pipes[j])): 171 # Alias the data pipe name. 172 pipe = pipes[j][k] 173 174 # Switch to this pipe. 175 switch(pipe) 176 177 # Skip function. 178 if skip_function[pipe](model_info): 179 continue 180 181 # Get the model statistics. 182 k, n, chi2 = model_statistics[pipe](model_info) 183 184 # Missing data sets. 185 if k == None or n == None or chi2 == None: 186 continue 187 188 # Chi2 sum. 189 sum_crit = sum_crit + chi2 190 191 # Cross-validation criterion (average chi-squared value). 192 crit = sum_crit / float(len(pipes[j])) 193 194 # Other model selection methods. 195 else: 196 # Reassign the pipe. 197 pipe = pipes[j] 198 199 # Switch to this pipe. 200 switch(pipe) 201 202 # Skip function. 203 if skip_function[pipe](model_info): 204 continue 205 206 # Get the model statistics. 207 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 208 209 # Missing data sets. 210 if k == None or n == None or chi2 == None: 211 continue 212 213 # Calculate the criterion value. 214 crit = formula(chi2, float(k), float(n)) 215 216 # Store the values for a later printout. 217 data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) 218 219 # Select model. 220 if crit < best_crit: 221 best_model = pipe 222 best_crit = crit 223 224 # Write out the table. 225 write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) 226 227 # Duplicate the data from the 'best_model' to the model selection data pipe. 228 if best_model != None: 229 # Print out of selected model. 230 print("The model from the data pipe " + repr(best_model) + " has been selected.") 231 232 # Switch to the selected data pipe. 233 switch(best_model) 234 235 # Duplicate. 236 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 237 238 # Model selection pipe now exists. 239 modsel_pipe_exists = True 240 241 # No model selected. 242 else: 243 # Print out of selected model. 244 print("No model has been selected.") 245 246 # Switch to the model selection pipe. 247 if modsel_pipe_exists: 248 switch(modsel_pipe) 249 250 # Bundle the data pipe. 251 if bundle: 252 pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
253