lib.statistics

1 ############################################################################### 2 # # 3 # Copyright (C) 2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for calculating simple statistics.""" 24 25 # Python module imports. 26 from math import exp, pi, sqrt 27 28

29 -def bucket(values=None, lower=0.0, upper=200.0, inc=100, verbose=False):

30 """Generate a discrete probability distribution for the given values. 31 32 @keyword values: The list of values to convert. 33 @type values: list of float 34 @keyword lower: The lower bound of the distribution. 35 @type lower: float 36 @keyword upper: The upper bound of the distribution. 37 @type upper: float 38 @keyword inc: The number of discrete increments for the distribution between the lower and upper bounds. 39 @type inc: int 40 @keyword verbose: A flag which if True will enable printouts. 41 @type verbose: bool 42 @return: The discrete probability distribution. 43 @rtype: list of lists of float 44 """ 45 46 # The bin width. 47 bin_width = (upper - lower)/float(inc) 48 49 # Init the dist object. 50 dist = [] 51 for i in range(inc): 52 dist.append([bin_width*i+lower, 0]) 53 54 # Loop over the values. 55 for val in values: 56 # The bin. 57 bin = int((val - lower)/bin_width) 58 59 # Outside of the limits. 60 if bin < 0 or bin >= inc: 61 if verbose: 62 print("Outside of the limits: '%s'" % val) 63 continue 64 65 # Increment the count. 66 dist[bin][1] = dist[bin][1] + 1 67 68 # Convert the counts to frequencies. 69 total_pr = 0.0 70 for i in range(inc): 71 dist[i][1] = dist[i][1] / float(len(values)) 72 total_pr = total_pr + dist[i][1] 73 74 # Printout. 75 if verbose: 76 print("Total Pr: %s" % total_pr) 77 78 # Return the dist. 79 return dist

80 81

82 -def gaussian(x=None, mu=0.0, sigma=1.0):

83 """Calculate the probability for a Gaussian probability distribution for a given x value. 84 85 @keyword x: The x value to calculate the probability for. 86 @type x: float 87 @keyword mu: The mean of the distribution. 88 @type mu: float 89 @keyword sigma: The standard deviation of the distribution. 90 @type sigma: float 91 @return: The probability corresponding to x. 92 @rtype: float 93 """ 94 95 # Calculate and return the probability. 96 return exp(-(x-mu)**2 / (2.0*sigma**2)) / (sigma * sqrt(2.0 * pi))

97 98

99 -def std(values=None, skip=None, dof=1):

100 """Calculate the standard deviation of the given values, skipping values if asked. 101 102 @keyword values: The list of values to calculate the standard deviation of. 103 @type values: list of float 104 @keyword skip: An optional list of booleans specifying if a value should be skipped. The length of this list must match the values. An element of True will cause the corresponding value to not be included in the calculation. 105 @type skip: list of bool or None. 106 @keyword dof: The degrees of freedom, whereby the standard deviation is multipled by 1/(N - dof). 107 @type dof: int 108 @return: The standard deviation. 109 @rtype: float 110 """ 111 112 # The total number of points. 113 n = 0 114 for i in range(len(values)): 115 # Skip deselected values. 116 if skip != None and not skip[i]: 117 continue 118 119 # Increment n. 120 n = n + 1 121 122 # Calculate the sum of the values for all points. 123 Xsum = 0.0 124 for i in range(len(values)): 125 # Skip deselected values. 126 if skip != None and not skip[i]: 127 continue 128 129 # Sum. 130 Xsum = Xsum + values[i] 131 132 # Calculate the mean value for all points. 133 if n == 0: 134 Xav = 0.0 135 else: 136 Xav = Xsum / float(n) 137 138 # Calculate the sum part of the standard deviation. 139 sd = 0.0 140 for i in range(len(values)): 141 # Skip deselected values. 142 if skip != None and not skip[i]: 143 continue 144 145 # Sum. 146 sd = sd + (values[i] - Xav)**2 147 148 # Calculate the standard deviation. 149 if n <= 1: 150 sd = 0.0 151 else: 152 sd = sqrt(sd / (float(n) - float(dof))) 153 154 # Return the SD. 155 return sd

156

Source Code for Module lib.statistics