r25234 - /trunk/specific_analyses/relax_disp/optimisation.py -- August 25, 2014

Author: tlinnet
Date: Mon Aug 25 01:08:57 2014
New Revision: 25234

URL: http://svn.gna.org/viewcvs/relax?rev=25234&view=rev
Log:
Implemented back end for estimating r2eff and errors by exponential curve 
fitting with scipy.optimize.leastsq.

task #7822(https://gna.org/task/index.php?7822): Implement user function to 
estimate R2eff and associated errors for exponential curve fitting.

Modified:
    trunk/specific_analyses/relax_disp/optimisation.py

Modified: trunk/specific_analyses/relax_disp/optimisation.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/specific_analyses/relax_disp/optimisation.py?rev=25234&r1=25233&r2=25234&view=diff
==============================================================================
--- trunk/specific_analyses/relax_disp/optimisation.py  (original)
+++ trunk/specific_analyses/relax_disp/optimisation.py  Mon Aug 25 01:08:57 
2014
@@ -23,26 +23,33 @@
 """Module for the optimisation of the relaxation dispersion models."""
 
 # Python module imports.
+from copy import deepcopy
 from minfx.generic import generic_minimise
 from minfx.grid import grid
-from numpy import dot, float64, int32, ones, zeros
+from numpy import asarray, diag, dot, float64, inf, int32, ones, sqrt, zeros
 from numpy.linalg import inv
 from operator import mul
 from re import match, search
 import sys
+from warnings import warn
 
 # relax module imports.
 from dep_check import C_module_exp_fn
 from lib.dispersion.two_point import calc_two_point_r2eff, 
calc_two_point_r2eff_err
 from lib.errors import RelaxError
+from lib.text.sectioning import section
 from lib.text.sectioning import subsection
+from lib.warnings import RelaxWarning
 from multi import Memo, Result_command, Slave_command
-from pipe_control.mol_res_spin import spin_loop
-from specific_analyses.relax_disp.checks import check_disp_points, 
check_exp_type, check_exp_type_fixed_time
+from pipe_control.mol_res_spin import generate_spin_string, spin_loop
+from pipe_control.spectrum import error_analysis
+from specific_analyses.relax_disp.checks import check_model_type, 
check_disp_points, check_exp_type, check_exp_type_fixed_time
 from specific_analyses.relax_disp.data import average_intensity, 
count_spins, find_intensity_keys, has_exponential_exp_type, 
has_proton_mmq_cpmg, is_r1_optimised, loop_exp, loop_exp_frq_offset_point, 
loop_exp_frq_offset_point_time, loop_frq, loop_offset, loop_time, 
pack_back_calc_r2eff, return_cpmg_frqs, return_offset_data, 
return_param_key_from_data, return_r1_data, return_r2eff_arrays, 
return_spin_lock_nu1
 from specific_analyses.relax_disp.parameters import assemble_param_vector, 
disassemble_param_vector, linear_constraints, param_conversion, param_num, 
r1_setup
-from specific_analyses.relax_disp.variables import EXP_TYPE_LIST_CPMG, 
MODEL_CR72, MODEL_CR72_FULL, MODEL_LIST_MMQ, MODEL_LM63, MODEL_M61, 
MODEL_MP05, MODEL_TAP03, MODEL_TP02
+from specific_analyses.relax_disp.variables import EXP_TYPE_LIST_CPMG, 
MODEL_CR72, MODEL_CR72_FULL, MODEL_LIST_MMQ, MODEL_LM63, MODEL_M61, 
MODEL_MP05, MODEL_R2EFF, MODEL_TAP03, MODEL_TP02
+from target_functions.chi2 import chi2_rankN
 from target_functions.relax_disp import Dispersion
+from target_functions.relax_disp_curve_fit import Exponential
 
 # C modules.
 if C_module_exp_fn:
@@ -284,6 +291,229 @@
 
             # Calculate the R2eff error.
             spin.r2eff_err[param_key] = 
calc_two_point_r2eff_err(relax_time=time, I_ref=ref_intensity, I=intensity, 
I_ref_err=ref_intensity_err, I_err=intensity_err)
+
+
+def estimate_r2eff(spin_id=None, ftol=1e-15, xtol=1e-15, maxfev=10000000, 
factor=100.0, verbosity=1):
+    """Estimate r2eff and errors by exponential curve fitting with 
scipy.optimize.leastsq.
+
+    scipy.optimize.leastsq is a wrapper around MINPACK's lmdif and lmder 
algorithms.
+
+    MINPACK is a FORTRAN90 library which solves systems of nonlinear 
equations, or carries out the least squares minimization of the residual of a 
set of linear or nonlinear equations.
+
+    Errors are calculated by taking the square root of the reported 
co-variance.
+
+    This can be an huge time saving step, when performing model fitting in 
R1rho.
+    Errors of R2eff values, are normally estimated by time-consuming 
Monte-Carlo simulations.
+
+    Initial guess for the starting parameter x0 = [r2eff_est, i0_est], is by 
converting the exponential curve to a linear problem.
+    Then solving initial guess by linear least squares of: ln(Intensity[j]) 
= ln(i0) - time[j]* r2eff.
+
+
+    @keyword spin_id:           The spin identification string.
+    @type spin_id:              str
+    @keyword ftol:              The function tolerance for the relative 
error desired in the sum of squares, parsed to leastsq.
+    @type ftol:                 float
+    @keyword xtol:              The error tolerance for the relative error 
desired in the approximate solution, parsed to leastsq.
+    @type xtol:                 float
+    @keyword maxfev:            The maximum number of function evaluations, 
parsed to leastsq.  If zero, then 100*(N+1) is the maximum function calls.  N 
is the number of elements in x0=[r2eff, i0].
+    @type maxfev:               int
+    @keyword factor:            The initial step bound, parsed to leastsq.  
It determines the initial step bound (''factor * || diag * x||'').  Should be 
in the interval (0.1, 100).
+    @type factor:               float
+    @keyword verbosity:         The amount of information to print.  The 
higher the value, the greater the verbosity.
+    @type verbosity:            int
+    """
+
+    # Perform checks.
+    check_model_type(model=MODEL_R2EFF)
+
+    # Import leastsq.
+    from scipy.optimize import leastsq
+
+    # Initialise target function class, to access functions.
+    exp_class = Exponential()
+
+    # Check if intensity errors have already been calculated by the user.
+    precalc = True
+    for cur_spin, mol_name, resi, resn, cur_spin_id in 
spin_loop(selection=spin_id, full_info=True, return_id=True, skip_desel=True):
+        # No structure.
+        if not hasattr(cur_spin, 'peak_intensity_err'):
+            precalc = False
+            break
+
+        # Determine if a spectrum ID is missing from the list.
+        for id in cdp.spectrum_ids:
+            if id not in cur_spin.peak_intensity_err:
+                precalc = False
+                break
+
+    # If no error analysis of peak heights exists.
+    if not precalc:
+        # Printout.
+        section(file=sys.stdout, text="Error analysis", prespace=2)
+
+        # Loop over the spectrometer frequencies.
+        for frq in loop_frq():
+            # Generate a list of spectrum IDs matching the frequency.
+            ids = []
+            for id in cdp.spectrum_ids:
+                # Check that the spectrometer frequency matches.
+                match_frq = True
+                if frq != None and cdp.spectrometer_frq[id] != frq:
+                    match_frq = False
+
+                # Add the ID.
+                if match_frq:
+                    ids.append(id)
+
+            # Run the error analysis on the subset.
+            error_analysis(subset=ids)
+
+    # Loop over the spins.
+    for cur_spin, mol_name, resi, resn, cur_spin_id in 
spin_loop(selection=spin_id, full_info=True, return_id=True, skip_desel=True):
+        # Generate spin string.
+        spin_string = generate_spin_string(spin=cur_spin, mol_name=mol_name, 
res_num=resi, res_name=resn)
+
+        # Print information.
+        if verbosity >= 1:
+            # Individual spin block section.
+            top = 2
+            if verbosity >= 2:
+                top += 2
+            subsection(file=sys.stdout, text="Fitting with 
scipy.optimize.leastsq to: %s"%spin_string, prespace=top)
+
+        # Loop over each spectrometer frequency and dispersion point.
+        for exp_type, frq, offset, point, ei, mi, oi, di in 
loop_exp_frq_offset_point(return_indices=True):
+            # The parameter key.
+            param_key = return_param_key_from_data(exp_type=exp_type, 
frq=frq, offset=offset, point=point)
+
+            # The peak intensities, errors and times.
+            values = []
+            errors = []
+            times = []
+            for time in loop_time(exp_type=exp_type, frq=frq, offset=offset, 
point=point):
+                values.append(average_intensity(spin=cur_spin, 
exp_type=exp_type, frq=frq, offset=offset, point=point, time=time))
+                errors.append(average_intensity(spin=cur_spin, 
exp_type=exp_type, frq=frq, offset=offset, point=point, time=time, 
error=True))
+                times.append(time)
+
+            # Convert to numpy array.
+            values = asarray(values)
+            errors = asarray(errors)
+            times = asarray(times)
+
+            # Initial guess for minimisation. Solved by linear least squares.
+            x0 = exp_class.estimate_x0_exp(intensities=values, times=times)
+
+            # Define function to minimise. Use errors as weights in the 
minimisation.
+            use_weights = True
+
+            # If 'sigma'/erros describes one standard deviation errors of 
the input data points. The estimated covariance in 'pcov' is based on these 
values.
+            absolute_sigma = True
+
+            if use_weights:
+                func = exp_class.func_exp_weighted_general
+                weights = 1.0 / asarray(errors)
+                args=(times, values, weights )
+            else:
+                func = exp_class.func_exp_general
+                args=(times, values)
+
+            # Call scipy.optimize.leastsq.
+            popt, pcov, infodict, errmsg, ier = leastsq(func=func, x0=x0, 
args=args, full_output=True, ftol=ftol, xtol=xtol, maxfev=maxfev, 
factor=factor)
+
+            # Catch errors:
+            if ier in [1, 2, 3]:
+                warning = None
+            elif ier in [4]:
+                warn(RelaxWarning("%s." % errmsg))
+                warning = errmsg
+            elif ier in [0, 5, 6, 7, 8]:
+                raise RelaxError("scipy.optimize.leastsq raises following 
error: '%s'." % errmsg)
+
+            # 'nfev' number of function calls.
+            f_count = infodict['nfev']
+
+            # 'fvec': function evaluated at the output.
+            fvec = infodict['fvec']
+            #fvec_test = func(popt, times, values)
+
+            # 'fjac': A permutation of the R matrix of a QR factorization of 
the final approximate Jacobian matrix, stored column wise. Together with 
ipvt, the covariance of the estimate can be approximated.
+            # fjac = infodict['fjac']
+
+            # 'qtf': The vector (transpose(q) * fvec).
+            # qtf = infodict['qtf']
+
+            # 'ipvt'  An integer array of length N which defines a 
permutation matrix, p, such that fjac*p = q*r, where r is upper triangular
+            # with diagonal elements of nonincreasing magnitude. Column j of 
p is column ipvt(j) of the identity matrix.
+
+            # Back calc fitted curve.
+            back_calc = exp_class.calc_exp(times=times, r2eff=popt[0], 
i0=popt[1])
+
+            # Calculate chi2.
+            chi2 = chi2_rankN(data=values, back_calc_vals=back_calc, 
errors=errors)
+
+            # 'pcov': The estimated covariance of popt.
+            # The diagonals provide the variance of the parameter estimate.
+
+            if pcov is None:
+                # indeterminate covariance
+                pcov = zeros((len(popt), len(popt)), dtype=float)
+                pcov.fill(inf)
+            elif not absolute_sigma:
+                if len(values) > len(x0):
+                    s_sq = sum( fvec**2 ) / (len(values) - len(x0))
+                    pcov = pcov * s_sq
+                else:
+                    pcov.fill(inf)
+
+            # To compute one standard deviation errors on the parameters, 
take the square root of the diagonal covariance.
+            perr = sqrt(diag(pcov))
+
+            # Extract values.
+            r2eff = popt[0]
+            i0 = popt[1]
+            r2eff_err = perr[0]
+            i0_err = perr[1]
+
+            # Disassemble the parameter vector.
+            disassemble_param_vector(param_vector=popt, spins=[cur_spin], 
key=param_key)
+
+            # Errors.
+            if not hasattr(cur_spin, 'r2eff_err'):
+                setattr(cur_spin, 'r2eff_err', deepcopy(getattr(cur_spin, 
'r2eff')))
+            if not hasattr(cur_spin, 'i0_err'):
+                setattr(cur_spin, 'i0_err', deepcopy(getattr(cur_spin, 
'i0')))
+
+            # Set error.
+            cur_spin.r2eff_err[param_key] = r2eff_err
+            cur_spin.i0_err[param_key] = i0_err
+
+            # Chi-squared statistic.
+            cur_spin.chi2 = chi2
+
+            # Iterations.
+            cur_spin.f_count = f_count
+
+            # Warning.
+            cur_spin.warning = warning
+
+            # Print information.
+            print_strings = []
+            if verbosity >= 1:
+                # Add print strings.
+                point_info = "%s at %3.1f MHz, for offset=%3.3f ppm and 
dispersion point %-5.1f, with %i time points." % (exp_type, frq/1E6, offset, 
point, len(times))
+                print_strings.append(point_info)
+
+                par_info = "r2eff=%3.3f r2eff_err=%3.3f, i0=%6.1f, 
i0_err=%3.3f, chi2=%3.3f.\n" % ( r2eff, r2eff_err, i0, i0_err, chi2)
+                print_strings.append(par_info)
+
+                if verbosity >= 2:
+                    time_info = ', '.join(map(str, times))
+                    print_strings.append('For time array: 
'+time_info+'.\n\n')
+
+            # Print info
+            if len(print_strings) > 0:
+                for print_string in print_strings:
+                    print(print_string),
 
 
 def minimise_r2eff(spins=None, spin_ids=None, min_algor=None, 
min_options=None, func_tol=None, grad_tol=None, max_iterations=None, 
constraints=False, scaling_matrix=None, verbosity=0, sim_index=None, 
lower=None, upper=None, inc=None):
r25234 - /trunk/specific_analyses/relax_disp/optimisation.py

Header

Content

Related Messages