1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module for calculating simple statistics."""
25
26
27 from math import exp
28 from numpy import absolute, array, diag, dot, eye, float64, log, multiply, transpose
29 from numpy.linalg import inv, qr
30
31
32 from math import exp, pi, sqrt
33
34
35 -def bucket(values=None, lower=0.0, upper=200.0, inc=100, verbose=False):
36 """Generate a discrete probability distribution for the given values.
37
38 @keyword values: The list of values to convert.
39 @type values: list of float
40 @keyword lower: The lower bound of the distribution.
41 @type lower: float
42 @keyword upper: The upper bound of the distribution.
43 @type upper: float
44 @keyword inc: The number of discrete increments for the distribution between the lower and upper bounds.
45 @type inc: int
46 @keyword verbose: A flag which if True will enable printouts.
47 @type verbose: bool
48 @return: The discrete probability distribution.
49 @rtype: list of lists of float
50 """
51
52
53 bin_width = (upper - lower)/float(inc)
54
55
56 dist = []
57 for i in range(inc):
58 dist.append([bin_width*i+lower, 0])
59
60
61 for val in values:
62
63 bin = int((val - lower)/bin_width)
64
65
66 if bin < 0 or bin >= inc:
67 if verbose:
68 print("Outside of the limits: '%s'" % val)
69 continue
70
71
72 dist[bin][1] = dist[bin][1] + 1
73
74
75 total_pr = 0.0
76 for i in range(inc):
77 dist[i][1] = dist[i][1] / float(len(values))
78 total_pr = total_pr + dist[i][1]
79
80
81 if verbose:
82 print("Total Pr: %s" % total_pr)
83
84
85 return dist
86
87
89 """Calculate the probability for a Gaussian probability distribution for a given x value.
90
91 @keyword x: The x value to calculate the probability for.
92 @type x: float
93 @keyword mu: The mean of the distribution.
94 @type mu: float
95 @keyword sigma: The standard deviation of the distribution.
96 @type sigma: float
97 @return: The probability corresponding to x.
98 @rtype: float
99 """
100
101
102 return exp(-(x-mu)**2 / (2.0*sigma**2)) / (sigma * sqrt(2.0 * pi))
103
104
106 """Calculate the geometric mean for the given values.
107
108 @keyword values: The list of values to calculate the geometric mean of.
109 @type values: list of float
110 @return: The geometric mean.
111 @rtype: float
112 """
113
114
115 n = len(values)
116 values = array(values, float64)
117
118
119 log_vals = log(values)
120
121
122 Xsum = log_vals.sum()
123
124
125 Xav = Xsum / float(n)
126
127
128 return exp(Xav)
129
130
132 """Calculate the geometric standard deviation for the given values.
133
134 @keyword values: The list of values to calculate the geometric mean of.
135 @type values: list of float
136 @keyword mean: The pre-calculated geometric mean. If not supplied, the value will be calculated.
137 @type mean: float
138 @return: The geometric mean.
139 @rtype: float
140 """
141
142
143 n = len(values)
144 values = array(values, float64)
145
146
147 if mean == None:
148 mean = geometric_mean(values=values)
149
150
151 factor = (log(values / mean))**2
152
153
154 factor2 = sqrt(factor.sum() / n)
155
156
157 return exp(factor2)
158
159
160 -def std(values=None, skip=None, dof=1):
161 """Calculate the standard deviation of the given values, skipping values if asked.
162
163 @keyword values: The list of values to calculate the standard deviation of.
164 @type values: list of float
165 @keyword skip: An optional list of booleans specifying if a value should be skipped. The length of this list must match the values. An element of True will cause the corresponding value to not be included in the calculation.
166 @type skip: list of bool or None.
167 @keyword dof: The degrees of freedom, whereby the standard deviation is multipled by 1/(N - dof).
168 @type dof: int
169 @return: The standard deviation.
170 @rtype: float
171 """
172
173
174 n = 0
175 for i in range(len(values)):
176
177 if skip != None and not skip[i]:
178 continue
179
180
181 n = n + 1
182
183
184 Xsum = 0.0
185 for i in range(len(values)):
186
187 if skip != None and not skip[i]:
188 continue
189
190
191 Xsum = Xsum + values[i]
192
193
194 if n == 0:
195 Xav = 0.0
196 else:
197 Xav = Xsum / float(n)
198
199
200 sd = 0.0
201 for i in range(len(values)):
202
203 if skip != None and not skip[i]:
204 continue
205
206
207 sd = sd + (values[i] - Xav)**2
208
209
210 if n <= 1:
211 sd = 0.0
212 else:
213 sd = sqrt(sd / (float(n) - float(dof)))
214
215
216 return sd
217
218
220 """This is the implementation of the multifit covariance.
221
222 This is inspired from GNU Scientific Library (GSL).
223
224 This function uses the Jacobian matrix J to compute the covariance matrix of the best-fit parameters, covar.
225
226 The parameter 'epsrel' is used to remove linear-dependent columns when J is rank deficient.
227
228 The weighting matrix 'W', is a square symmetric matrix. For independent measurements, this is a diagonal matrix. Larger values indicate greater significance. It is formed by multiplying and Identity matrix with the supplied weights vector::
229
230 W = I. w
231
232 The weights should normally be supplied as a vector: 1 / errors^2.
233
234 The covariance matrix is given by::
235
236 covar = (J^T.W.J)^{-1} ,
237
238 and is computed by QR decomposition of J with column-pivoting. Any columns of R which satisfy::
239
240 |R_{kk}| <= epsrel |R_{11}| ,
241
242 are considered linearly-dependent and are excluded from the covariance matrix (the corresponding rows and columns of the covariance matrix are set to zero). If the minimisation uses the weighted least-squares function::
243
244 f_i = (Y(x, t_i) - y_i) / sigma_i ,
245
246 then the covariance matrix above gives the statistical error on the best-fit parameters resulting from the Gaussian errors 'sigma_i' on the underlying data 'y_i'.
247
248 This can be verified from the relation 'd_f = J d_c' and the fact that the fluctuations in 'f' from the data 'y_i' are normalised by 'sigma_i' and so satisfy::
249
250 <d_f d_f^T> = I. ,
251
252 For an unweighted least-squares function f_i = (Y(x, t_i) - y_i) the covariance matrix above should be multiplied by the variance of the residuals about the best-fit::
253
254 sigma^2 = sum ( (y_i - Y(x, t_i))^2 / (n-p) ) ,
255
256 to give the variance-covariance matrix sigma^2 C. This estimates the statistical error on the best-fit parameters from the scatter of the underlying data.
257
258 Links
259 =====
260
261 More information ca be found here:
262
263 - U{GSL - GNU Scientific Library<http://www.gnu.org/software/gsl/>}
264 - U{Manual: Overview<http://www.gnu.org/software/gsl/manual/gsl-ref_37.html#SEC510>}
265 - U{Manual: Computing the covariance matrix of best fit parameters<http://www.gnu.org/software/gsl/manual/gsl-ref_38.html#SEC528>}
266 - U{Other reference<http://www.orbitals.com/self/least/least.htm>}
267
268 @param J: The Jacobian matrix.
269 @type J: numpy array
270 @param epsrel: Any columns of R which satisfy |R_{kk}| <= epsrel |R_{11}| are considered linearly-dependent and are excluded from the covariance matrix, where the corresponding rows and columns of the covariance matrix are set to zero.
271 @type epsrel: float
272 @keyword weigths: The weigths which to scale with. Normally submitted as the 1 over standard deviation of the measured intensity values per time point in power 2. weigths = 1 / sd_i^2.
273 @type weigths: numpy array
274 @return: The co-variance matrix
275 @rtype: square numpy array
276 """
277
278
279
280
281
282 eye_mat = eye(weights.shape[0])
283
284
285 W = multiply(eye_mat, weights)
286
287
288
289
290
291 Jt = transpose(J)
292 Jt_W = dot(Jt, W)
293 Jt_W_J = dot(Jt_W, J)
294
295
296 Q, R = qr(Jt_W_J)
297
298
299 abs_epsrel_R11 = absolute( multiply(epsrel, R[0, 0]) )
300
301
302
303
304 epsrel_check = absolute(R) <= abs_epsrel_R11
305
306
307 Qxx = dot(inv(R), transpose(Q) )
308
309
310
311
312
313
314
315 Qxx[epsrel_check] = 0.0
316
317
318
319 diag_epsrel_check = diag(epsrel_check)
320
321
322 if any(diag_epsrel_check):
323 for i in range(diag_epsrel_check.shape[0]):
324 abs_Rkk = absolute(R[i, i])
325 if abs_Rkk <= abs_epsrel_R11:
326 warn(RelaxWarning("Co-Variance element k,k=%i was found to meet |R_{kk}| <= epsrel |R_{11}|, meaning %1.1f <= %1.3f * %1.1f , and is therefore determined to be linearly-dependent and are excluded from the covariance matrix by setting the value to 0.0." % (i+1, abs_Rkk, epsrel, abs_epsrel_R11/epsrel) ))
327
328
329 return Qxx
330