1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for selecting the best model."""
24
25
26 import sys
27
28
29 from lib.errors import RelaxError, RelaxPipeError
30 from lib.io import write_data
31 from lib.model_selection import aic, aicc, bic
32 import pipe_control.pipes
33 from pipe_control.pipes import get_type, has_pipe, pipe_names, switch
34 from specific_analyses.setup import get_specific_fn
35
36
37 -def select(method=None, modsel_pipe=None, bundle=None, pipes=None):
38 """Model selection function.
39
40 @keyword method: The model selection method. This can currently be one of:
41 - 'AIC', Akaike's Information Criteria.
42 - 'AICc', Small sample size corrected AIC.
43 - 'BIC', Bayesian or Schwarz Information Criteria.
44 - 'CV', Single-item-out cross-validation.
45 None of the other model selection techniques are currently supported.
46 @type method: str
47 @keyword modsel_pipe: The name of the new data pipe to be created by copying of the selected data pipe.
48 @type modsel_pipe: str
49 @keyword bundle: The optional data pipe bundle to associate the newly created pipe with.
50 @type bundle: str or None
51 @keyword pipes: A list of the data pipes to use in the model selection.
52 @type pipes: list of str
53 """
54
55
56 if has_pipe(modsel_pipe):
57 raise RelaxPipeError(modsel_pipe)
58
59
60 if pipes == None:
61
62 pipes = pipe_names()
63
64
65 if method == 'AIC':
66 print("AIC model selection.")
67 formula = aic
68 elif method == 'AICc':
69 print("AICc model selection.")
70 formula = aicc
71 elif method == 'BIC':
72 print("BIC model selection.")
73 formula = bic
74 elif method == 'CV':
75 print("CV model selection.")
76 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
77 else:
78 raise RelaxError("The model selection technique " + repr(method) + " is not currently supported.")
79
80
81 if len(pipes) == 0:
82 raise RelaxError("No data pipes are available for use in model selection.")
83
84
85 function_type = {}
86 model_loop = {}
87 model_type = {}
88 duplicate_data = {}
89 model_statistics = {}
90 skip_function = {}
91 modsel_pipe_exists = False
92
93
94 if isinstance(pipes[0], list):
95
96 if len(pipes[0]) == 0:
97 raise RelaxError("No pipes are available for use in model selection in the array " + repr(pipes[0]) + ".")
98
99
100 for i in range(len(pipes)):
101 for j in range(len(pipes[i])):
102
103 model_loop[pipes[i][j]] = get_specific_fn('model_loop', get_type(pipes[i][j]))
104 model_type[pipes[i][j]] = get_specific_fn('model_type', get_type(pipes[i][j]))
105 duplicate_data[pipes[i][j]] = get_specific_fn('duplicate_data', get_type(pipes[i][j]))
106 model_statistics[pipes[i][j]] = get_specific_fn('model_stats', get_type(pipes[i][j]))
107 skip_function[pipes[i][j]] = get_specific_fn('skip_function', get_type(pipes[i][j]))
108
109
110 for i in range(len(pipes)):
111 for j in range(len(pipes[i])):
112 if model_loop[pipes[0][j]] != model_loop[pipes[i][j]]:
113 raise RelaxError("The models for each data pipes should be the same.")
114 model_loop = model_loop[pipes[0][0]]
115
116
117 model_desc = get_specific_fn('model_desc', get_type(pipes[0]))
118
119
120 global_flag = False
121 for i in range(len(pipes)):
122 for j in range(len(pipes[i])):
123 if model_type[pipes[i][j]]() == 'global':
124 global_flag = True
125
126
127 else:
128
129 for i in range(len(pipes)):
130
131 model_loop[pipes[i]] = get_specific_fn('model_loop', get_type(pipes[i]))
132 model_type[pipes[i]] = get_specific_fn('model_type', get_type(pipes[i]))
133 duplicate_data[pipes[i]] = get_specific_fn('duplicate_data', get_type(pipes[i]))
134 model_statistics[pipes[i]] = get_specific_fn('model_stats', get_type(pipes[i]))
135 skip_function[pipes[i]] = get_specific_fn('skip_function', get_type(pipes[i]))
136
137 model_loop = model_loop[pipes[0]]
138
139
140 model_desc = get_specific_fn('model_desc', get_type(pipes[0]))
141
142
143 global_flag = False
144 for j in range(len(pipes)):
145 if model_type[pipes[j]]() == 'global':
146 global_flag = True
147
148
149
150 for model_info in model_loop():
151
152 print("\n")
153 desc = model_desc(model_info)
154 if desc:
155 print(desc)
156
157
158 best_model = None
159 best_crit = 1e300
160 data = []
161
162
163 for j in range(len(pipes)):
164
165 if method == 'CV':
166
167 sum_crit = 0.0
168
169
170 for k in range(len(pipes[j])):
171
172 pipe = pipes[j][k]
173
174
175 switch(pipe)
176
177
178 if skip_function[pipe](model_info):
179 continue
180
181
182 k, n, chi2 = model_statistics[pipe](model_info)
183
184
185 if k == None or n == None or chi2 == None:
186 continue
187
188
189 sum_crit = sum_crit + chi2
190
191
192 crit = sum_crit / float(len(pipes[j]))
193
194
195 else:
196
197 pipe = pipes[j]
198
199
200 switch(pipe)
201
202
203 if skip_function[pipe](model_info):
204 continue
205
206
207 k, n, chi2 = model_statistics[pipe](model_info, global_stats=global_flag)
208
209
210 if k == None or n == None or chi2 == None:
211 continue
212
213
214 crit = formula(chi2, float(k), float(n))
215
216
217 data.append([pipe, repr(k), repr(n), "%.5f" % chi2, "%.5f" % crit])
218
219
220 if crit < best_crit:
221 best_model = pipe
222 best_crit = crit
223
224
225 write_data(out=sys.stdout, headings=["Data pipe", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion"], data=data)
226
227
228 if best_model != None:
229
230 print("The model from the data pipe " + repr(best_model) + " has been selected.")
231
232
233 switch(best_model)
234
235
236 duplicate_data[best_model](best_model, modsel_pipe, model_info, global_stats=global_flag, verbose=False)
237
238
239 modsel_pipe_exists = True
240
241
242 else:
243
244 print("No model has been selected.")
245
246
247 if modsel_pipe_exists:
248 switch(modsel_pipe)
249
250
251 if bundle:
252 pipe_control.pipes.bundle(bundle=bundle, pipe=modsel_pipe)
253