1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for selecting the best model."""
24
25
26 import sys
27
28
29 from lib.errors import RelaxError, RelaxPipeError
30 from lib.io import write_data
31 from lib.model_selection import aic, aicc, bic
32 import pipe_control.pipes
33 from pipe_control.pipes import has_pipe, pipe_names, switch
34 from specific_analyses.api import return_api
35
36
37 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
38 """Model selection function.
39
40 @keyword method: The model selection method. This can currently be one of:
41 - 'AIC', Akaike's Information Criteria.
42 - 'AICc', Small sample size corrected AIC.
43 - 'BIC', Bayesian or Schwarz Information Criteria.
44 - 'CV', Single-item-out cross-validation.
45 None of the other model selection techniques are currently supported.
46 @type method: str
47 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe.
48 @type modsel_pipe: str
49 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with.
50 @type bundle: str or None
51 @keyword pipes: A list of the data pipes to use in the model selection.
52 @type pipes: list of str
53 """
54
55
56 if has_pipe(modsel_pipe):
57 raise RelaxPipeError(modsel_pipe)
58
59
60 if pipes == None:
61
62 pipes = pipe_names()
63
64
65 if method == 'AIC':
66 print("AIC model selection.")
67 formula = aic
68 elif method == 'AICc':
69 print("AICc model selection.")
70 formula = aicc
71 elif method == 'BIC':
72 print("BIC model selection.")
73 formula = bic
74 elif method == 'CV':
75 print("CV model selection.")
76 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
77 else:
78 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
79
80
81 if len(pipes) == 0:
82 raise RelaxError("No data pipes are available for use in model selection.")
83
84
85 function_type = {}
86 model_loop = {}
87 model_type = {}
88 duplicate_data = {}
89 model_statistics = {}
90 skip_function = {}
91 modsel_pipe_exists = False
92
93
94 if isinstance(pipes[0], list):
95
96 if len(pipes[0]) == 0:
97 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".")
98
99
100 for i in range(len(pipes)):
101 for j in range(len(pipes[i])):
102
103 api = return_api(pipe_name=pipes[i][j])
104
105
106 model_loop[pipes[i][j]] = api.model_loop
107 model_type[pipes[i][j]] = api.model_type
108 duplicate_data[pipes[i][j]] = api.duplicate_data
109 model_statistics[pipes[i][j]] = api.model_statistics
110 skip_function[pipes[i][j]] = api.skip_function
111
112
113 for i in range(len(pipes)):
114 for j in range(len(pipes[i])):
115 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]:
116 raise RelaxError("The models for each data pipes should be the same.")
117
118
119 api = return_api(pipe_name=pipes[0][0])
120 model_loop = api.model_loop
121 model_desc = api.model_desc
122
123
124 global_flag = False
125 for i in range(len(pipes)):
126 for j in range(len(pipes[i])):
127 if model_type[pipes[i][j]]() == 'global':
128 global_flag = True
129
130
131 else:
132
133 for i in range(len(pipes)):
134
135 api = return_api()
136
137
138 model_loop[pipes[i]] = api.model_loop
139 model_type[pipes[i]] = api.model_type
140 duplicate_data[pipes[i]] = api.duplicate_data
141 model_statistics[pipes[i]] = api.model_statistics
142 skip_function[pipes[i]] = api.skip_function
143
144
145 api = return_api(pipe_name=pipes[0])
146 model_loop = api.model_loop
147 model_desc = api.model_desc
148
149
150 global_flag = False
151 for j in range(len(pipes)):
152 if model_type[pipes[j]]() == 'global':
153 global_flag = True
154
155
156
157 for model_info in model_loop():
158
159 print("\n")
160 desc = model_desc(model_info)
161 if desc:
162 print(desc)
163
164
165 best_model = None
166 best_crit = 1e300
167 data = []
168
169
170 for j in range(len(pipes)):
171
172 if method == 'CV':
173
174 sum_crit = 0.0
175
176
177 for k in range(len(pipes[j])):
178
179 pipe = pipes[j][k]
180
181
182 switch(pipe)
183
184
185 if skip_function[pipe](model_info):
186 continue
187
188
189 k, n, chi2 = model_statistics[pipe](model_info)
190
191
192 if k == None or n == None or chi2 == None:
193 continue
194
195
196 sum_crit = sum_crit + chi2
197
198
199 crit = sum_crit / float(len(pipes[j]))
200
201
202 else:
203
204 pipe = pipes[j]
205
206
207 switch(pipe)
208
209
210 if skip_function[pipe](model_info):
211 continue
212
213
214 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag)
215
216
217 if k == None or n == None or chi2 == None:
218 continue
219
220
221 crit = formula(chi2, float(k), float(n))
222
223
224 data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit])
225
226
227 if crit < best_crit:
228 best_model = pipe
229 best_crit = crit
230
231
232 write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data)
233
234
235 if best_model != None:
236
237 print("The model from the data pipe " + repr(best_model) + " has been selected.")
238
239
240 switch(best_model)
241
242
243 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False)
244
245
246 modsel_pipe_exists = True
247
248
249 else:
250
251 print("No model has been selected.")
252
253
254 if modsel_pipe_exists:
255 switch(modsel_pipe)
256
257
258 if bundle:
259 pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
260