Author: bugman Date: Thu Oct 16 21:22:02 2008 New Revision: 7784 URL: http://svn.gna.org/viewcvs/relax?rev=7784&view=rev Log: Fix for bug #12456 (https://gna.org/bugs/?12456). When calculating the errors from replicate spectra, the standard deviations of all spins were being averaged rather than the variance. The same error occurred when averaging across spectra - when not all are duplicated, triplicated, etc. Modified: 1.3/prompt/relax_fit.py 1.3/specific_fns/relax_fit.py Modified: 1.3/prompt/relax_fit.py URL: http://svn.gna.org/viewcvs/relax/1.3/prompt/relax_fit.py?rev=7784&r1=7783&r2=7784&view=diff ============================================================================== --- 1.3/prompt/relax_fit.py (original) +++ 1.3/prompt/relax_fit.py Thu Oct 16 21:22:02 2008 @@ -52,23 +52,24 @@ The standard deviation for a single spin at a single time point is calculated by the formula ----- - ____________________________ - sd = \/ sum({Ii - Iav}^2) / (n - 1) , + + sigma = sum({Ii - Iav}^2) / (n - 1) , ----- - where n is the total number of collected spectra for the time point and i is the - corresponding index, Ii is the peak intensity for spectrum i, Iav is the mean over all - spectra, ie the sum of all peak intensities divided by n. + where sigma is the variance or square of the standard deviation, n is the total number of + collected spectra for the time point and i is the corresponding index, Ii is the peak + intensity for spectrum i, Iav is the mean over all spectra, ie the sum of all peak + intensities divided by n. Averaging of the errors ~~~~~~~~~~~~~~~~~~~~~~~ As the value of n in the above equation is always very low, normally only a couple of - spectra are collected per time point, the standard deviation of all spins is averaged for - a single time point. Although this results in all spins having the same error, the - accuracy of the error estimate is significantly improved. + spectra are collected per time point, the variance of all spins is averaged for a single + time point. Although this results in all spins having the same error, the accuracy of the + error estimate is significantly improved. Errors across multiple time points @@ -76,9 +77,9 @@ If all spectra are collected in duplicate (triplicate or higher number of spectra are supported), the each time point will have its own error estimate. However, if there are - time points in the series which only consist of a single spectrum, then the standard - deviations of replicated time points will be averaged. Hence, for the entire experiment - there will be a single error value for all spins and for all time points. + time points in the series which only consist of a single spectrum, then the variances of + replicated time points will be averaged. Hence, for the entire experiment there will be a + single error value for all spins and for all time points. A better approach rather than averaging across all time points would be to use a form of interpolation as the errors across time points generally decreases for longer time periods. Modified: 1.3/specific_fns/relax_fit.py URL: http://svn.gna.org/viewcvs/relax/1.3/specific_fns/relax_fit.py?rev=7784&r1=7783&r2=7784&view=diff ============================================================================== --- 1.3/specific_fns/relax_fit.py (original) +++ 1.3/specific_fns/relax_fit.py Thu Oct 16 21:22:02 2008 @@ -633,14 +633,14 @@ cdp = pipes.get_pipe() # Test if the standard deviation has already been calculated. - if hasattr(cdp, 'sd'): + if hasattr(cdp, 'sigma'): raise RelaxError, "The average intensity and standard deviation of all spectra has already been calculated." # Print out. print "\nCalculating the average intensity and standard deviation of all spectra." # Initialise. - cdp.sd = [] + cdp.sigma = [] # Loop over the time points. for time_index in xrange(len(cdp.relax_times)): @@ -650,8 +650,8 @@ if verbosity: print "%-5s%-6s%-20s%-20s" % ("Num", "Name", "Average", "SD") - # Append zero to the global standard deviation structure. - cdp.sd.append(0.0) + # Append zero to the global variance structure. + cdp.sigma.append(0.0) # Test for multiple spectra. if cdp.num_spectra[time_index] == 1: @@ -673,8 +673,8 @@ # Initialise the average intensity and standard deviation data structures. if not hasattr(spin, 'ave_intensities'): spin.ave_intensities = [] - if not hasattr(spin, 'sd'): - spin.sd = [] + if not hasattr(spin, 'sigma'): + spin.sigma = [] # Average intensity. spin.ave_intensities.append(average(spin.intensities[time_index])) @@ -684,57 +684,62 @@ for j in xrange(cdp.num_spectra[time_index]): SSE = SSE + (spin.intensities[time_index][j] - spin.ave_intensities[time_index]) ** 2 - # Standard deviation. - # ____________________________ - # / 1 - # sd = / ----- * sum({Xi - Xav}^2)] - # \/ n - 1 + # Variance. + # + # 1 + # sigma = ----- * sum({Xi - Xav}^2)] + # n - 1 # if cdp.num_spectra[time_index] == 1: - sd = 0.0 + sigma = 0.0 else: - sd = sqrt(1.0 / (cdp.num_spectra[time_index] - 1.0) * SSE) - spin.sd.append(sd) + sigma = 1.0 / (cdp.num_spectra[time_index] - 1.0) * SSE + spin.sigma.append(sigma) # Print out. if verbosity: - print "%-5i%-6s%-20s%-20s" % (spin.num, spin.name, `spin.ave_intensities[time_index]`, `spin.sd[time_index]`) - - # Sum of standard deviations (for average). - cdp.sd[time_index] = cdp.sd[time_index] + spin.sd[time_index] - - # Average sd. - cdp.sd[time_index] = cdp.sd[time_index] / float(count_spins()) + print "%-5i%-6s%-20s%-20s" % (spin.num, spin.name, `spin.ave_intensities[time_index]`, `spin.sigma[time_index]`) + + # Sum of variances (for average). + cdp.sigma[time_index] = cdp.sigma[time_index] + spin.sigma[time_index] + + # Average variance. + cdp.sigma[time_index] = cdp.sigma[time_index] / float(count_spins()) # Print out. - print "Standard deviation for time point %s: %s" % (`time_index`, `cdp.sd[time_index]`) + print "Standard deviation for time point %s: %s" % (`time_index`, `cdp.sigma[time_index]`) # Average across all spectra if there are time points with a single spectrum. - if 0.0 in cdp.sd: + if 0.0 in cdp.sigma: # Initialise. - sd = 0.0 + sigma = 0.0 num_dups = 0 # Loop over all time points. for i in xrange(len(cdp.relax_times)): - # Single spectrum (or extrodinarily accurate NMR spectra!). - if cdp.sd[i] == 0.0: + # Single spectrum (or extraordinarily accurate NMR spectra!). + if cdp.sigma[i] == 0.0: continue # Sum and count. - sd = sd + cdp.sd[i] + sigma = sigma + cdp.sigma[i] num_dups = num_dups + 1 # Average value. - sd = sd / float(num_dups) + sigma = sigma / float(num_dups) # Assign the average value to all time points. for i in xrange(len(cdp.relax_times)): - cdp.sd[i] = sd + cdp.sigma[i] = sigma # Print out. - print "\nStandard deviation (averaged over all spectra): " + `sd` + print "\nStandard deviation (averaged over all spectra): " + `sigma` + + # Create the standard deviation data structure. + cdp.sd = [] + for sigma in cdp.sigma: + cdp.sd.append(sqrt(sigma)) def minimise(self, min_algor=None, min_options=None, func_tol=None, grad_tol=None, max_iterations=None, constraints=False, scaling=True, verbosity=0, sim_index=None, lower=None, upper=None, inc=None):