Author: bugman Date: Thu Oct 16 21:49:57 2008 New Revision: 7786 URL: http://svn.gna.org/viewcvs/relax?rev=7786&view=rev Log: Ported r7784 from the 1.3 line. This is the fix for bug #12456 (https://gna.org/bugs/?12456). The command used was: svn merge -r7783:7784 svn+ssh://bugman@xxxxxxxxxxx/svn/relax/1.3 ..... r7784 | bugman | 2008-10-16 21:22:02 +0200 (Thu, 16 Oct 2008) | 7 lines Changed paths: M /1.3/prompt/relax_fit.py M /1.3/specific_fns/relax_fit.py Fix for bug #12456 (https://gna.org/bugs/?12456). When calculating the errors from replicate spectra, the standard deviations of all spins were being averaged rather than the variance. The same error occurred when averaging across spectra - when not all are duplicated, triplicated, etc. ..... Modified: 1.2/prompt/relax_fit.py 1.2/specific_fns/relax_fit.py Modified: 1.2/prompt/relax_fit.py URL: http://svn.gna.org/viewcvs/relax/1.2/prompt/relax_fit.py?rev=7786&r1=7785&r2=7786&view=diff ============================================================================== --- 1.2/prompt/relax_fit.py (original) +++ 1.2/prompt/relax_fit.py Thu Oct 16 21:49:57 2008 @@ -54,23 +54,24 @@ formula ----- - ____________________________ - sd = \/ sum({Ii - Iav}^2) / (n - 1) , + + sigma = sum({Ii - Iav}^2) / (n - 1) , ----- - where n is the total number of collected spectra for the time point and i is the - corresponding index, Ii is the peak intensity for spectrum i, Iav is the mean over all - spectra, ie the sum of all peak intensities divided by n. + where sigma is the variance or square of the standard deviation, n is the total number of + collected spectra for the time point and i is the corresponding index, Ii is the peak + intensity for spectrum i, Iav is the mean over all spectra, ie the sum of all peak + intensities divided by n. Averaging of the errors ~~~~~~~~~~~~~~~~~~~~~~~ As the value of n in the above equation is always very low, normally only a couple of - spectra are collected per time point, the standard deviation of all residues is averaged for - a single time point. Although this results in all residues having the same error, the - accuracy of the error estimate is significantly improved. + spectra are collected per time point, the variance of all spins is averaged for a single + time point. Although this results in all spins having the same error, the accuracy of the + error estimate is significantly improved. Errors across multiple time points @@ -78,9 +79,9 @@ If all spectra are collected in duplicate (triplicate or higher number of spectra are supported), the each time point will have its own error estimate. However, if there are - time points in the series which only consist of a single spectrum, then the standard - deviations of replicated time points will be averaged. Hence, for the entire experiment - there will be a single error value for all residues and for all time points. + time points in the series which only consist of a single spectrum, then the variances of + replicated time points will be averaged. Hence, for the entire experiment there will be a + single error value for all spins and for all time points. A better approach rather than averaging across all time points would be to use a form of interpolation as the errors across time points generally decreases for longer time periods. Modified: 1.2/specific_fns/relax_fit.py URL: http://svn.gna.org/viewcvs/relax/1.2/specific_fns/relax_fit.py?rev=7786&r1=7785&r2=7786&view=diff ============================================================================== --- 1.2/specific_fns/relax_fit.py (original) +++ 1.2/specific_fns/relax_fit.py Thu Oct 16 21:49:57 2008 @@ -530,15 +530,15 @@ self.run = run # Test if the standard deviation is already calculated. - if hasattr(self.relax.data, 'sd'): + if hasattr(self.relax.data, 'sigma'): raise RelaxError, "The average intensity and standard deviation of all spectra has already been calculated." # Print out. print "\nCalculating the average intensity and standard deviation of all spectra." # Initialise. - self.relax.data.sd = {} - self.relax.data.sd[self.run] = [] + self.relax.data.sigma = {} + self.relax.data.sigma[self.run] = [] # Loop over the time points. for time_index in xrange(len(self.relax.data.relax_times[self.run])): @@ -549,7 +549,7 @@ print "%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD") # Append zero to the global standard deviation structure. - self.relax.data.sd[self.run].append(0.0) + self.relax.data.sigma[self.run].append(0.0) # Initialise the time point and residue specific sd. total_res = 0 @@ -577,8 +577,8 @@ # Initialise the average intensity and standard deviation data structures. if not hasattr(data, 'ave_intensities'): data.ave_intensities = [] - if not hasattr(data, 'sd'): - data.sd = [] + if not hasattr(data, 'sigma'): + data.sigma = [] # Average intensity. data.ave_intensities.append(average(data.intensities[time_index])) @@ -588,60 +588,65 @@ for j in xrange(self.relax.data.num_spectra[self.run][time_index]): SSE = SSE + (data.intensities[time_index][j] - data.ave_intensities[time_index]) ** 2 - # Standard deviation. - # ____________________________ - # / 1 - # sd = / ----- * sum({Xi - Xav}^2)] - # \/ n - 1 + # Variance. + # + # 1 + # sigma = ----- * sum({Xi - Xav}^2)] + # n - 1 # if self.relax.data.num_spectra[self.run][time_index] == 1: - sd = 0.0 + sigma = 0.0 else: - sd = sqrt(1.0 / (self.relax.data.num_spectra[self.run][time_index] - 1.0) * SSE) - data.sd.append(sd) + sigma = 1.0 / (self.relax.data.num_spectra[self.run][time_index] - 1.0) * SSE + data.sigma.append(sigma) # Print out. if print_flag: - print "%-5i%-6s%-20s%-20s" % (data.num, data.name, `data.ave_intensities[time_index]`, `data.sd[time_index]`) - - # Sum of standard deviations (for average). - self.relax.data.sd[self.run][time_index] = self.relax.data.sd[self.run][time_index] + data.sd[time_index] + print "%-5i%-6s%-20s%-20s" % (data.num, data.name, `data.ave_intensities[time_index]`, `data.sigma[time_index]`) + + # Sum of variances (for average). + self.relax.data.sigma[self.run][time_index] = self.relax.data.sigma[self.run][time_index] + data.sigma[time_index] # Increment the number of residues counter. total_res = total_res + 1 - # Average sd. - self.relax.data.sd[self.run][time_index] = self.relax.data.sd[self.run][time_index] / float(total_res) + # Average variance. + self.relax.data.sigma[self.run][time_index] = self.relax.data.sigma[self.run][time_index] / float(total_res) # Print out. - print "Standard deviation for time point %s: %s" % (`time_index`, `self.relax.data.sd[self.run][time_index]`) + print "Standard deviation for time point %s: %s" % (`time_index`, `self.relax.data.sigma[self.run][time_index]`) # Average across all spectra if there are time points with a single spectrum. - if 0.0 in self.relax.data.sd[self.run]: + if 0.0 in self.relax.data.sigma[self.run]: # Initialise. - sd = 0.0 + sigma = 0.0 num_dups = 0 # Loop over all time points. for i in xrange(len(self.relax.data.relax_times[self.run])): - # Single spectrum (or extrodinarily accurate NMR spectra!). - if self.relax.data.sd[self.run][i] == 0.0: + # Single spectrum (or extraordinarily accurate NMR spectra!). + if self.relax.data.sigma[self.run][i] == 0.0: continue # Sum and count. - sd = sd + self.relax.data.sd[self.run][i] + sigma = sigma + self.relax.data.sigma[self.run][i] num_dups = num_dups + 1 # Average value. - sd = sd / float(num_dups) + sigma = sigma / float(num_dups) # Assign the average value to all time points. for i in xrange(len(self.relax.data.relax_times[self.run])): - self.relax.data.sd[self.run][i] = sd + self.relax.data.sigma[self.run][i] = sigma # Print out. - print "\nStandard deviation (averaged over all spectra): " + `sd` + print "\nStandard deviation (averaged over all spectra): " + `sigma` + + # Create the standard deviation data structure. + cdp.sd = [] + for sigma in cdp.sigma: + cdp.sd.append(sqrt(sigma)) def minimise(self, run=None, min_algor=None, min_options=None, func_tol=None, grad_tol=None, max_iterations=None, constraints=0, scaling=1, print_flag=0, sim_index=None):