1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 from copy import deepcopy
24 from math import log
25
26
29 """Class containing functions specific to model selection."""
30
31 self.relax = relax
32
33
34 - def select(self, method=None, modsel_run=None, runs=None):
35 """Model selection function."""
36
37
38 if not modsel_run in self.relax.data.run_names:
39 raise RelaxNoRunError, modsel_run
40
41
42 if runs == None:
43
44 self.runs = deepcopy(self.relax.data.run_names)
45
46
47 if modsel_run in self.runs:
48 self.runs.remove(modsel_run)
49 else:
50 self.runs = runs
51
52
53 if method == 'AIC':
54 print "AIC model selection."
55 self.formula = self.aic
56 elif method == 'AICc':
57 print "AICc model selection."
58 self.formula = self.aicc
59 elif method == 'BIC':
60 print "BIC model selection."
61 self.formula = self.bic
62 elif method == 'CV':
63 print "CV model selection."
64 raise RelaxError, "The model selection technique " + `method` + " is not currently supported."
65 else:
66 raise RelaxError, "The model selection technique " + `method` + " is not currently supported."
67
68
69 if len(self.runs) == 0:
70 raise RelaxError, "No runs are availible for use in model selection."
71
72
73 self.first_run = None
74 self.function_type = {}
75 self.duplicate_data = {}
76 self.count_num_instances = {}
77 self.model_statistics = {}
78 self.skip_function = {}
79
80
81 if type(self.runs[0]) == list:
82
83 if len(run) == 0:
84 raise RelaxError, "No runs are availible for use in model selection in the array " + `run` + "."
85
86
87 for i in xrange(len(self.runs)):
88 for j in xrange(len(self.runs[i])):
89
90 run = self.runs[i][j]
91
92
93 self.function_type[run] = self.relax.data.run_types[self.relax.data.run_names.index(run)]
94
95
96 if not self.first_run and self.function_type[run] != 'hybrid':
97 self.first_run = run
98
99
100 self.duplicate_data[run] = self.relax.specific_setup.setup('duplicate_data', self.function_type[run])
101 self.count_num_instances[run] = self.relax.specific_setup.setup('num_instances', self.function_type[run])
102 self.model_statistics[run] = self.relax.specific_setup.setup('model_stats', self.function_type[run])
103 self.skip_function[run] = self.relax.specific_setup.setup('skip_function', self.function_type[run])
104
105
106 self.tests(run)
107
108
109 else:
110
111 for i in xrange(len(self.runs)):
112
113 run = self.runs[i]
114
115
116 self.function_type[run] = self.relax.data.run_types[self.relax.data.run_names.index(run)]
117
118
119 if not self.first_run and self.function_type[run] != 'hybrid':
120 self.first_run = run
121
122
123 self.duplicate_data[run] = self.relax.specific_setup.setup('duplicate_data', self.function_type[run])
124 self.count_num_instances[run] = self.relax.specific_setup.setup('num_instances', self.function_type[run])
125 self.model_statistics[run] = self.relax.specific_setup.setup('model_stats', self.function_type[run])
126 self.skip_function[run] = self.relax.specific_setup.setup('skip_function', self.function_type[run])
127
128
129 self.tests(run)
130
131
132
133
134 self.min_instances = 1e99
135 self.num_instances = []
136 for i in xrange(len(self.runs)):
137
138 if type(self.runs[i]) == list:
139 self.num_instances.append([])
140
141
142 for j in xrange(len(self.runs[i])):
143
144 num = self.count_num_instances[self.runs[i][j]](self.runs[i][j])
145 self.num_instances[i].append(num)
146
147
148 if num < self.min_instances:
149 self.min_instances = num
150
151
152 else:
153
154 num = self.count_num_instances[self.runs[i]](self.runs[i])
155 self.num_instances.append(num)
156
157
158 if num < self.min_instances:
159 self.min_instances = num
160
161
162
163 for i in xrange(self.min_instances):
164
165 print "\nInstance " + `i` + ".\n"
166 print "%-20s %-20s %-20s %-20s %-20s" % ("Run", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion")
167
168
169 best_model = None
170 best_crit = 1e300
171
172
173 for j in xrange(len(self.runs)):
174
175 if method == 'CV':
176
177 sum_crit = 0.0
178
179
180 for k in xrange(len(self.runs[j])):
181
182 run = self.runs[j][k]
183
184
185 if self.skip_function[run](run=run, instance=i):
186 continue
187
188
189 k, n, chi2 = self.model_statistics[run](run=run, instance=i, min_instances=self.min_instances)
190
191
192 if k == None or n == None or chi2 == None:
193 continue
194
195
196 sum_crit = sum_crit + chi2
197
198
199 crit = sum_crit / float(len(self.runs[j]))
200
201
202 else:
203
204 run = self.runs[j]
205
206
207 if self.skip_function[run](run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]):
208 continue
209
210
211 if self.num_instances[j] > self.min_instances or self.num_instances[j] == 1:
212 global_stats = 1
213 else:
214 global_stats = 0
215
216
217 k, n, chi2 = self.model_statistics[run](run=run, instance=i, global_stats=global_stats)
218
219
220 if k == None or n == None or chi2 == None:
221 continue
222
223
224 crit = self.formula(chi2, float(k), float(n))
225
226
227 print "%-20s %-20i %-20i %-20.5f %-20.5f" % (run, k, n, chi2, crit)
228
229
230 if crit < best_crit:
231 best_model = run
232 best_crit = crit
233
234
235 print "\nThe model from the run " + `best_model` + " has been selected."
236
237
238 if best_model != None:
239 self.duplicate_data[best_model](new_run=modsel_run, old_run=best_model, instance=i, global_stats=global_stats)
240
241
242 - def aic(self, chi2, k, n):
243 """Akaike's Information Criteria (AIC).
244
245 The formula is:
246
247 AIC = chi2 + 2k
248
249 where:
250 chi2 is the minimised chi-squared value.
251 k is the number of parameters in the model.
252 """
253
254 return chi2 + 2.0*k
255
256
257 - def aicc(self, chi2, k, n):
258 """Small sample size corrected AIC.
259
260 The formula is:
261
262 2k(k + 1)
263 AICc = chi2 + 2k + ---------
264 n - k - 1
265
266 where:
267 chi2 is the minimised chi-squared value.
268 k is the number of parameters in the model.
269 n is the dimension of the relaxation data set.
270 """
271
272 return chi2 + 2.0*k + 2.0*k*(k + 1.0) / (n - k - 1.0)
273
274
275 - def bic(self, chi2, k, n):
276 """Bayesian or Schwarz Information Criteria.
277
278 The formula is:
279
280 BIC = chi2 + k ln n
281
282 where:
283 chi2 - is the minimised chi-squared value.
284 k - is the number of parameters in the model.
285 n is the dimension of the relaxation data set.
286 """
287
288 return chi2 + k * log(n)
289
290
327