influence of pdb orientation on model-free optimization? -- January 10, 2008

Hi All,

I am working with five relaxation data sets (r1, r2 and noe at 400MHz; r1 and r2 and 600 MHz), and therefore cannot use thefull_analysis.py protocol. I have obtained estimates for tm,Dratio, theta and phi using Art Palmer's quadric_diffusion program.I modified the full_analysis.py protocol to optimize a prolate tensorusing these estimates (attached file: mod.py). I have performed theoptimization of the prolate tensor using either (1) my originalstructure or (2) the same structure rotated and translated by thequadric_diffusion program. It seems that relax does not converge toa single global optimum, as different values of tm, Da, theta and phiare reported.


Using my original structure:
#tm = 6.00721299718e-09
#Da = 14256303.3975
#theta = 11.127323614211441
#phi = 62.250251959733312

Using the rotated/translated structure by the quadric_diffusion program:
#tm = 5.84350638161e-09
#Da = 11626835.475
#theta = 8.4006873071400197
#phi = 113.6068898953142

The only difference between the two calculations is the orientationof the input PDB structure file. For another set of five rates(different protein), there is a >0.3 ns difference in the convergedtm values.

Is my modified protocol (in mod.py) setup properly? Or is this amore complex issue in the global optimization? In previous attempts,I've also noticed that separate runs with differences in theestimates for Dratio, theta and phi also converge to differentoptimized diffusion tensor variables.


Doug

# Python module imports.
from os import getcwd, listdir
import os
from re import search
from string import lower


class Main:
    def __init__(self, relax):
        """Execute the model-free analysis."""

        # Setup.
        self.relax = relax


        # Diffusion models MII to MV.
        #############################

        if DIFF_MODEL == 'sphere' or DIFF_MODEL == 'prolate' or DIFF_MODEL == 
'oblate' or DIFF_MODEL == 'ellipsoid':
            # Loop until convergence if CONV_LOOP is set, otherwise just loop 
once.
            # This looping could be made much cleaner by removing the 
dependence on the determine_rnd() function.
            while 1:
                # Determine which round of optimisation to do (init, round_1, 
round_2, etc).
                self.round = self.determine_rnd(model=DIFF_MODEL)
                print self.round

                # Inital round of optimisation for diffusion models MII to MV.
                if self.round == 0:
                    # Base directory to place files into.
                    self.base_dir = DIFF_MODEL + '/init/'

                    # Run name.
                    name = DIFF_MODEL

                    # Create the run.
                    run.create(name, 'mf')

                    ## Load the local tm diffusion model MI results.
                    #results.read(run=name, file='results', 
dir='local_tm/aic')

                    ## Remove the tm parameter.
                    #model_free.remove_tm(run=name)

                    # Load the PDB file.
                    pdb(name, PDB_FILE)

                    # Add an arbitrary diffusion tensor which will be 
optimised.
                    if DIFF_MODEL == 'sphere':
                        diffusion_tensor.init(name,  6.8750165484745208e-09, 
fixed=0)
                        inc = 11
                    elif DIFF_MODEL == 'prolate':
                        diffusion_tensor.init(name, (6.7191e-09, 1.2161, 
0.37311, 0.75243), spheroid_type='prolate', fixed=0, param_types=2, 
time_scale=1.0, d_scale=1.0, angle_units='rad')
                        inc = 11
                    elif DIFF_MODEL == 'oblate':
                        diffusion_tensor.init(name, (6.9010706313324413e-09, 
-3952358.7699030284, 46.595757830415415, 91.415695324202986), 
spheroid_type='oblate', fixed=0)
                        inc = 11
                    elif DIFF_MODEL == 'ellipsoid':
                        diffusion_tensor.init(name, (6.7959783590876922e-09, 
7508732.0897961529, 0.20834484160305933, 20.278149438105672, 
88.202122436857408, 173.66135622003813), fixed=0)
                        inc = 6

                    # Sequential optimisation of all model-free models 
(function must be modified to suit).
                    self.multi_model(local_tm=1)

                    # Create the final run (for model selection and final 
optimisation).
                    name = 'final'
                    if name in self.relax.data.run_names:
                        run.delete(name)
                    run.create(name, 'mf')

                    # Model selection.
                    self.model_selection(run=name, dir=self.base_dir + 'aic')

                    # Final optimisation of all diffusion and model-free 
parameters.
                    fix(name, 'all', fixed=0)

                    # Minimise all parameters.
                    minimise(MIN_ALGOR, run=name)

                    # Write the results.
                    dir = self.base_dir + 'opt'
                    results.write(run=name, file='results', dir=dir, force=1)

                    ## Minimise just the diffusion tensor.
                    ##fix(name, 'all_res')
                    ##grid_search(name, inc=inc)
                    ##minimise(MIN_ALGOR, run=name)

                    ## Write the results.
                    ##results.write(run=name, file='results', 
dir=self.base_dir, force=1)


                # Normal round of optimisation for diffusion models MII to MV.
                else:
                    # Base directory to place files into.
                    self.base_dir = DIFF_MODEL + '/round_' + `self.round` + 
'/'

                    # Load the optimised diffusion tensor from either the 
previous round.
                    self.load_tensor()

                    # Sequential optimisation of all model-free models 
(function must be modified to suit).
                    self.multi_model()

                    # Create the final run (for model selection and final 
optimisation).
                    name = 'final'
                    if name in self.relax.data.run_names:
                        run.delete(name)
                    run.create(name, 'mf')

                    # Model selection.
                    self.model_selection(run=name, dir=self.base_dir + 'aic')

                    # Final optimisation of all diffusion and model-free 
parameters.
                    fix(name, 'all', fixed=0)

                    # Minimise all parameters.
                    minimise(MIN_ALGOR, run=name)

                    # Write the results.
                    dir = self.base_dir + 'opt'
                    results.write(run=name, file='results', dir=dir, force=1)

                    # Test for convergence.
                    converged = self.convergence(run=name)

                    # Break out of the infinite while loop if automatic 
looping is not activated or if convergence has occurred.
                    if converged or not CONV_LOOP:
                        break


        # Final run.
        ############

        elif DIFF_MODEL == 'final':
            # Diffusion model selection.
            ############################

            # All the global diffusion models to be used in the model 
selection.
            #self.runs = ['local_tm', 'sphere', 'prolate', 'oblate', 
'ellipsoid']
            self.runs = ['sphere', 'prolate', 'oblate', 'ellipsoid']

            ## Create the local_tm run.
            #run.create('local_tm', 'mf')

            ## Load the local tm diffusion model MI results.
            #results.read(run='local_tm', file='results', dir='local_tm/aic')

            # Loop over models MII to MV.
            for model in ['sphere', 'prolate', 'oblate', 'ellipsoid']:
                # Determine which was the last round of optimisation for each 
of the models.
                self.round = self.determine_rnd(model=model) - 1

                # If no directories begining with 'round_' exist, the script 
has not been properly utilised! 
                if self.round < 1:
                    # Construct the name of the diffusion tensor.
                    name = model
                    if model == 'prolate' or model == 'oblate':
                        name = name + ' spheroid'

                    # Throw an error to prevent misuse of the script.
                    raise RelaxError, "Multiple rounds of optimisation of the 
" + name + " (between 8 to 15) are required for the proper execution of this 
script."

                # Create the run.
                run.create(model, 'mf')

                # Load the diffusion model results.
                results.read(run=model, file='results', dir=model + '/round_' 
+ `self.round` + '/opt')

            # Create the run for model selection (which will be a copy of the 
selected diffusion model or run).
            run.create('final', 'mf')

            # Model selection between MI to MV.
            self.model_selection(run='final', write_flag=0)


            # Monte Carlo simulations.
            ##########################

            # Fix the diffusion tensor (if it exists!).
            if self.relax.data.diff.has_key('final'):
                fix('final', 'diff')

            # Simulations.
            monte_carlo.setup('final', number=MC_NUM)
            monte_carlo.create_data('final')
            monte_carlo.initial_values('final')
            minimise(MIN_ALGOR, run='final')
            eliminate('final')
            monte_carlo.error_analysis('final')


            # Write the final results.
            ##########################

            results.write(run='final', file='results', dir='final', force=1)


        # Unknown script behaviour.
        ###########################

        else:
            raise RelaxError, "Unknown diffusion model, change the value of 
'DIFF_MODEL'"



    def convergence(self, run=None):
        """Test for the convergence of the global model."""

        # Print out.
        print "\n\n\n"
        print "#####################"
        print "# Convergence tests #"
        print "#####################\n\n"

        # Convergence flags.
        chi2_converged = 1
        models_converged = 1
        params_converged = 1
        model_test = 1


        # Chi-squared test.
        ###################

        print "Chi-squared test:"
        print "    chi2 (k-1): " + `self.relax.data.chi2['previous']`
        print "    chi2 (k):   " + `self.relax.data.chi2[run]`
        if self.relax.data.chi2['previous'] == self.relax.data.chi2[run]:
            print "    The chi-squared value has converged.\n"
        else:
            print "    The chi-squared value has not converged.\n"
            chi2_converged = 0


        # Identical model-free model test.
        ##################################

        print "Identical model-free models test:"

        # Create a string representation of the model-free models of the 
previous run.
        prev_models = ''
        for i in xrange(len(self.relax.data.res['previous'])):
            if hasattr(self.relax.data.res['previous'][i], 'model'):
                if not self.relax.data.res['previous'][i].model == 'None':
                    prev_models = prev_models + 
self.relax.data.res['previous'][i].model
        
        # Create a string representation of the model-free models of the 
current run.
        curr_models = ''
        for i in xrange(len(self.relax.data.res[run])):
            if hasattr(self.relax.data.res[run][i], 'model'):
                if not self.relax.data.res[run][i].model == 'None':
                    curr_models = curr_models + 
self.relax.data.res[run][i].model

        # The test.
        if prev_models == curr_models:
            print "    The model-free models have converged.\n"
        else:
            print "    The model-free models have not converged.\n"
            print prev_models
            print curr_models
            models_converged = 0


        # Identical parameter value test.
        #################################

        print "Identical parameter test:"

        # Only run the tests if the model-free models have converged.
        if models_converged:
            # Diffusion parameter array.
            if DIFF_MODEL == 'sphere':
                params = ['tm']
            elif DIFF_MODEL == 'oblate' or DIFF_MODEL == 'prolate':
                params = ['tm', 'Da', 'theta', 'phi']
            elif DIFF_MODEL == 'ellipsoid':
                params = ['tm', 'Da', 'Dr', 'alpha', 'beta', 'gamma']

            # Tests.
            for param in params:
                # Get the parameter values.
                prev_val = getattr(self.relax.data.diff['previous'], param)
                curr_val = getattr(self.relax.data.diff[run], param)

                # Test if not identical.
                if prev_val != curr_val:
                    print "    Parameter:   " + param
                    print "    Value (k-1): " + `prev_val`
                    print "    Value (k):   " + `curr_val`
                    print "    The diffusion parameters have not converged.\n"
                    params_converged = 0

            # Skip the rest if the diffusion tensor parameters have not 
converged.
            if params_converged:
                # Loop over the spin systems.
                for i in xrange(len(self.relax.data.res[run])):
                    # Skip if the parameters have not converged.
                    if not params_converged:
                        break

                    # Skip spin systems with no 'params' object.
                    if not hasattr(self.relax.data.res['previous'][i], 
'params') or not hasattr(self.relax.data.res[run][i], 'params'):
                        continue

                    # Loop over the parameters.
                    for j in xrange(len(self.relax.data.res[run][i].params)):
                        # Get the parameter values.
                        prev_val = 
getattr(self.relax.data.res['previous'][i], 
lower(self.relax.data.res['previous'][i].params[j]))
                        curr_val = getattr(self.relax.data.res[run][i], 
lower(self.relax.data.res[run][i].params[j]))

                        # Test if not identical.
                        if prev_val != curr_val:
                            print "    Spin system: " + 
`self.relax.data.res[run][i].num` + ' ' + self.relax.data.res[run][i].name
                            print "    Parameter:   " + 
self.relax.data.res[run][i].params[j]
                            print "    Value (k-1): " + `prev_val`
                            print "    Value (k):   " + `curr_val`
                            print "    The model-free parameters have not 
converged.\n"
                            params_converged = 0
                            break

        # The model-free models haven't converged hence the parameter values 
haven't converged.
        else:
            print "    The model-free models haven't converged hence the 
parameters haven't converged.\n"
            params_converged = 0

        # Print out.
        if params_converged:
            print "    The diffusion tensor and model-free parameters have 
converged.\n"


        # Final print out.
        ##################

        print "\nConvergence:"
        if chi2_converged and models_converged and params_converged:
            print "    [ Yes ]"
            return 1
        else:
            print "    [ No ]"
            return 0


    def determine_rnd(self, model=None):
        """Function for returning the name of next round of optimisation."""

        # Get a list of all files in the directory model.  If no directory 
exists, set the round to 'init' or 0.
        try:
            dir_list = listdir(getcwd() + '/' + model)
        except:
            return 0

        # Set the round to 'init' or 0 if there is no directory called 'init'.
        if 'init' not in dir_list:
            return 0

        # Create a list of all files which begin with 'round_'.
        rnd_dirs = []
        for file in dir_list:
            if search('^round_', file):
                rnd_dirs.append(file)

        # Create a sorted list of integer round numbers.
        numbers = []
        for dir in rnd_dirs:
            try:
                numbers.append(int(dir[6:]))
            except:
                pass
        numbers.sort()

        # No directories begining with 'round_' exist, set the round to 1.
        if not len(numbers):
            return 1

        # Determine the number for the next round (add 1 to the highest 
number).
        return numbers[-1] + 1


    def load_tensor(self):
        """Function for loading the optimised diffusion tensor."""

        # Create the run for the previous data (deleting the old run first if 
necessary).
        if 'previous' in self.relax.data.run_names:
            run.delete('previous')
        run.create('previous', 'mf')

        # Load the optimised diffusion tensor from the initial round.
        if self.round == 1:
            results.read('previous', 'results', DIFF_MODEL + '/init/opt')

        # Load the optimised diffusion tensor from the previous round.
        else:
            results.read('previous', 'results', DIFF_MODEL + '/round_' + 
`self.round - 1` + '/opt')


    def model_selection(self, run=None, dir=None, write_flag=1):
        """Model selection function."""

        # Model elimination.
        eliminate()

        # Model selection.
        model_selection('AIC', run, runs=self.runs)

        # Write the results.
        if write_flag:
            results.write(run=run, file='results', dir=dir, force=1)


    def multi_model(self, local_tm=0):
        """Function for optimisation of all model-free models."""

        # Set the run names (also the names of preset model-free models).
        if local_tm:
            self.runs = MF_MODELS
        else:
            self.runs = MF_MODELS

        # Nuclei type
        nuclei(HETNUC)

        for name in self.runs:
            # Create the run.
            if name in self.relax.data.run_names:
                run.delete(name)
            run.create(name, 'mf')

            # Load the sequence.
            sequence.read(name, SEQUENCE)

            # Load the PDB file.
            pdb(name, PDB_FILE)

            # Load the relaxation data.
            for data in RELAX_DATA:
                relax_data.read(name, data[0], data[1], data[2], data[3])

            # Unselect unresolved residues.
            unselect.read(name, file=UNRES)

            # Copy the diffusion tensor from the run 'opt' and prevent it 
from being minimised.
            if not local_tm:
                diffusion_tensor.copy('previous', name)
                fix(name, 'diff')
            else:
                diffusion_tensor.copy(DIFF_MODEL, name)
                fix(name, 'diff')

            # Set the bond length and CSA values.
            value.set(name, BOND_LENGTH, 'bond_length')
            value.set(name, CSA, 'csa')

            # Select the model-free model.
            model_free.select_model(run=name, model=name)

            # Minimise.
            grid_search(name, inc=GRID_INC)
            minimise(MIN_ALGOR, run=name)

            # Write the results.
            dir = self.base_dir + name
            results.write(run=name, file='results', dir=dir, force=1)


# User variables.
#################

# The diffusion model.
DIFF_MODEL = 'prolate'

# The model-free models (do not change these unless absolutely necessary).
MF_MODELS = ['m0', 'm1', 'm2', 'm3', 'm4', 'm5', 'm9']

# The type of heteronucleus.
HETNUC = 'N'

# The PDB file.
PDB_FILE = 'data/file.pdb'

# The file containing the sequence.
SEQUENCE = 'data/noe.400.dat'

# The relaxation data (Data type, frequency label, frequency, file name).
# These are the arguments 'ri_label', 'frq_label', 'frq', and 'file' to the 
relax_data.read() user function.  Please read the user function documentation 
for more information.
RELAX_DATA = [['R1', '400', 399.8636182 * 1e6, 'data/r1.400.dat'],
                          ['R2', '400', 399.8636182 * 1e6, 'data/r2.400.dat'],
                          ['NOE', '400', 399.8636182 * 1e6, 
'data/noe.400.dat'],
                          ['R1', '600', 600.7904943 * 1e6, 'data/r1.600.dat'],
                          ['R2', '600', 600.7904943 * 1e6, 'data/r2.600.dat']
]

# The file containing the list of unresolved residues to exclude from the 
analysis.
UNRES = 'unresolved'

# The bond length and CSA values.
BOND_LENGTH = 1.02 * 1e-10
CSA = -172 * 1e-6

# The grid search size (the number of increments per dimension).
GRID_INC = 11

# The optimisation technique.
MIN_ALGOR = 'newton'

# The number of Monte Carlo simulations to be used for error analysis at the 
end of the analysis.
MC_NUM = 500

# Automatic looping over all rounds until convergence.
CONV_LOOP = 1

# The main class.
Main(self.relax)

influence of pdb orientation on model-free optimization?

Header

Content

Related Messages