1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 from copy import deepcopy
24 from math import log
25
26
29 """Class containing functions specific to model selection."""
30
31 self.relax = relax
32
33
34 - def select(self, method=None, modsel_run=None, runs=None):
35 """Model selection function."""
36
37
38 if not modsel_run in self.relax.data.run_names:
39 raise RelaxNoRunError, modsel_run
40
41
42 if runs == None:
43
44 self.runs = deepcopy(self.relax.data.run_names)
45
46
47 if modsel_run in self.runs:
48 self.runs.remove(modsel_run)
49 else:
50 self.runs = runs
51
52
53 if method == 'AIC':
54 print "AIC model selection."
55 self.formula = self.aic
56 elif method == 'AICc':
57 print "AICc model selection."
58 self.formula = self.aicc
59 elif method == 'BIC':
60 print "BIC model selection."
61 self.formula = self.bic
62 elif method == 'CV':
63 print "CV model selection."
64 raise RelaxError, "The model selection technique " + `method` + " is not currently supported."
65 else:
66 raise RelaxError, "The model selection technique " + `method` + " is not currently supported."
67
68
69 if len(self.runs) == 0:
70 raise RelaxError, "No runs are availible for use in model selection."
71
72
73 if type(self.runs[0]) == list:
74 self.first_run = self.runs[0][0]
75 else:
76 self.first_run = self.runs[0]
77
78
79 self.function_type = self.relax.data.run_types[self.relax.data.run_names.index(self.first_run)]
80
81
82 self.duplicate_data = self.relax.specific_setup.setup('duplicate_data', self.function_type)
83 self.count_num_instances = self.relax.specific_setup.setup('num_instances', self.function_type)
84 self.model_statistics = self.relax.specific_setup.setup('model_stats', self.function_type)
85 self.skip_function = self.relax.specific_setup.setup('skip_function', self.function_type)
86
87
88
89 self.min_instances = 1e99
90 self.num_instances = []
91 for i in xrange(len(self.runs)):
92
93 if type(self.runs[i]) == list:
94 self.num_instances.append([])
95
96
97 for j in xrange(len(self.runs[i])):
98
99 num = self.count_num_instances(self.runs[i][j])
100 self.num_instances[i].append(num)
101
102
103 if num < self.min_instances:
104 self.min_instances = num
105
106 else:
107
108 num = self.count_num_instances(self.runs[i])
109 self.num_instances.append(num)
110
111
112 if num < self.min_instances:
113 self.min_instances = num
114
115
116 for run in self.runs:
117
118 if type(run) == list:
119
120 if len(run) == 0:
121 raise RelaxError, "No runs are availible for use in model selection in the array " + `run` + "."
122
123
124 for run2 in run:
125
126 self.tests(run2)
127
128
129 else:
130
131 self.tests(run)
132
133
134
135 for i in xrange(self.min_instances):
136
137 print "\nInstance " + `i` + ".\n"
138 print "%-20s %-20s %-20s %-20s %-20s" % ("Run", "Num_params_(k)", "Num_data_sets_(n)", "Chi2", "Criterion")
139
140
141 best_model = None
142 best_crit = float('inf')
143
144
145 for j in xrange(len(self.runs)):
146
147 if method == 'CV':
148
149 run = self.runs[j][0]
150
151
152 sum_crit = 0.0
153
154
155 for k in xrange(len(self.runs[j])):
156
157 if self.skip_function(run=self.runs[j][k], instance=i):
158 continue
159
160
161 k, n, chi2 = self.model_statistics(run=self.runs[j][k], instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j][k])
162
163
164 if k == None or n == None or chi2 == None:
165 continue
166
167
168 sum_crit = sum_crit + chi2
169
170
171 crit = sum_crit / float(len(self.runs[j]))
172
173
174 else:
175
176 run = self.runs[j]
177
178
179 if self.skip_function(run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]):
180 continue
181
182
183 k, n, chi2 = self.model_statistics(run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j])
184
185
186 if k == None or n == None or chi2 == None:
187 continue
188
189
190 crit = self.formula(chi2, float(k), float(n))
191
192
193 print "%-20s %-20i %-20i %-20.5f %-20.5f" % (run, k, n, chi2, crit)
194
195
196 if crit < best_crit:
197 best_model = run
198 best_crit = crit
199
200
201 print "\nThe model from the run " + `best_model` + " has been selected."
202
203
204 if best_model != None:
205 self.duplicate_data(new_run=modsel_run, old_run=best_model, instance=i)
206
207
208 - def aic(self, chi2, k, n):
209 """Akaike's Information Criteria (AIC).
210
211 The formula is:
212
213 AIC = chi2 + 2k
214
215 where:
216 chi2 is the minimised chi-squared value.
217 k is the number of parameters in the model.
218 """
219
220 return chi2 + 2.0*k
221
222
223 - def aicc(self, chi2, k, n):
224 """Small sample size corrected AIC.
225
226 The formula is:
227
228 2k(k + 1)
229 AICc = chi2 + 2k + ---------
230 n - k - 1
231
232 where:
233 chi2 is the minimised chi-squared value.
234 k is the number of parameters in the model.
235 n is the dimension of the relaxation data set.
236 """
237
238 return chi2 + 2.0*k + 2.0*k*(k + 1.0) / (n - k - 1.0)
239
240
241 - def bic(self, chi2, k, n):
242 """Bayesian or Schwarz Information Criteria.
243
244 The formula is:
245
246 BIC = chi2 + k ln n
247
248 where:
249 chi2 - is the minimised chi-squared value.
250 k - is the number of parameters in the model.
251 n is the dimension of the relaxation data set.
252 """
253
254 return chi2 + k * log(n)
255
256
288