1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  """Module for selecting the best model.""" 
 24   
 25   
 26  import sys 
 27   
 28   
 29  from lib.errors import RelaxError, RelaxPipeError 
 30  from lib.io import write_data 
 31  from lib.model_selection import aic, aicc, bic 
 32  import pipe_control.pipes 
 33  from pipe_control.pipes import get_type, has_pipe, pipe_names, switch 
 34  from specific_analyses.setup import get_specific_fn 
 35   
 36   
 37 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None): 
  38      """Model selection function. 
 39   
 40      @keyword method:        The model selection method.  This can currently be one of: 
 41                                  - 'AIC', Akaike's Information Criteria. 
 42                                  - 'AICc', Small sample size corrected AIC. 
 43                                  - 'BIC', Bayesian or Schwarz Information Criteria. 
 44                                  - 'CV', Single-item-out cross-validation. 
 45                              None of the other model selection techniques are currently supported. 
 46      @type method:           str 
 47      @keyword modsel_pipe:   The name of the new data pipe to be created by copying of the selected data pipe. 
 48      @type modsel_pipe:      str 
 49      @keyword bundle:        The optional data pipe bundle to associate the newly created pipe with. 
 50      @type bundle:           str or None 
 51      @keyword pipes:         A list of the data pipes to use in the model selection. 
 52      @type pipes:            list of str 
 53      """ 
 54   
 55       
 56      if has_pipe(modsel_pipe): 
 57          raise RelaxPipeError(modsel_pipe) 
 58   
 59       
 60      if pipes == None: 
 61           
 62          pipes = pipe_names() 
 63   
 64       
 65      if method == 'AIC': 
 66          print("AIC model selection.") 
 67          formula = aic 
 68      elif method == 'AICc': 
 69          print("AICc model selection.") 
 70          formula = aicc 
 71      elif method == 'BIC': 
 72          print("BIC model selection.") 
 73          formula = bic 
 74      elif method == 'CV': 
 75          print("CV model selection.") 
 76          raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 
 77      else: 
 78          raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.") 
 79   
 80       
 81      if len(pipes) == 0: 
 82          raise RelaxError("No data pipes are available for use in model selection.") 
 83   
 84       
 85      function_type = {} 
 86      model_loop = {} 
 87      model_type = {} 
 88      duplicate_data = {} 
 89      model_statistics = {} 
 90      skip_function = {} 
 91      modsel_pipe_exists = False 
 92   
 93       
 94      if isinstance(pipes[0], list): 
 95           
 96          if len(pipes[0]) == 0: 
 97              raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".") 
 98   
 99           
100          for i in range(len(pipes)): 
101              for j in range(len(pipes[i])): 
102                   
103                  model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j])) 
104                  model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j])) 
105                  duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j])) 
106                  model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j])) 
107                  skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j])) 
108   
109           
110          for i in range(len(pipes)): 
111              for j in range(len(pipes[i])): 
112                  if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]: 
113                      raise RelaxError("The models for each data pipes should be the same.") 
114          model_loop = model_loop[pipes[0][0]] 
115   
116           
117          model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 
118   
119           
120          global_flag = False 
121          for i in range(len(pipes)): 
122              for j in range(len(pipes[i])): 
123                  if model_type[pipes[i][j]]() == 'global': 
124                      global_flag = True 
125   
126       
127      else: 
128           
129          for i in range(len(pipes)): 
130               
131              model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i])) 
132              model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i])) 
133              duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i])) 
134              model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i])) 
135              skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i])) 
136   
137          model_loop = model_loop[pipes[0]] 
138   
139           
140          model_desc = get_specific_fn('model_desc', get_type(pipes[0])) 
141   
142           
143          global_flag = False 
144          for j in range(len(pipes)): 
145              if model_type[pipes[j]]() == 'global': 
146                  global_flag = True 
147   
148   
149       
150      for model_info in model_loop(): 
151           
152          print("\n") 
153          desc = model_desc(model_info) 
154          if desc: 
155              print(desc) 
156   
157           
158          best_model = None 
159          best_crit = 1e300 
160          data = [] 
161   
162           
163          for j in range(len(pipes)): 
164               
165              if method == 'CV': 
166                   
167                  sum_crit = 0.0 
168   
169                   
170                  for k in range(len(pipes[j])): 
171                       
172                      pipe = pipes[j][k] 
173   
174                       
175                      switch(pipe) 
176   
177                       
178                      if skip_function[pipe](model_info): 
179                          continue 
180   
181                       
182                      k, n, chi2 = model_statistics[pipe](model_info) 
183   
184                       
185                      if k == None or n == None or chi2 == None: 
186                          continue 
187   
188                       
189                      sum_crit = sum_crit + chi2 
190   
191                   
192                  crit = sum_crit / float(len(pipes[j])) 
193   
194               
195              else: 
196                   
197                  pipe = pipes[j] 
198   
199                   
200                  switch(pipe) 
201   
202                   
203                  if skip_function[pipe](model_info): 
204                      continue 
205   
206                   
207                  k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag) 
208   
209                   
210                  if k == None or n == None or chi2 == None: 
211                      continue 
212   
213                   
214                  crit = formula(chi2, float(k), float(n)) 
215   
216                   
217                  data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit]) 
218   
219               
220              if crit < best_crit: 
221                  best_model = pipe 
222                  best_crit = crit 
223   
224           
225          write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data) 
226   
227           
228          if best_model != None: 
229               
230              print("The model from the data pipe " + repr(best_model) + " has been selected.") 
231   
232               
233              switch(best_model) 
234   
235               
236              duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False) 
237   
238               
239              modsel_pipe_exists = True 
240   
241           
242          else: 
243               
244              print("No model has been selected.") 
245   
246       
247      if modsel_pipe_exists: 
248          switch(modsel_pipe) 
249   
250       
251      if bundle: 
252          pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe) 
 253