Author: bugman Date: Wed Apr 5 11:34:20 2006 New Revision: 2449 URL: http://svn.gna.org/viewcvs/relax?rev=2449&view=rev Log: Model selection is now enabled for hybrid models. This completes task #3122. The location of the task is https://gna.org/task/?func=detailitem&item_id=3122. Modified: 1.2/generic_fns/model_selection.py 1.2/specific_fns/hybrid.py 1.2/specific_fns/model_free.py 1.2/specific_fns/specific_setup.py Modified: 1.2/generic_fns/model_selection.py URL: http://svn.gna.org/viewcvs/relax/1.2/generic_fns/model_selection.py?rev=2449&r1=2448&r2=2449&view=diff ============================================================================== --- 1.2/generic_fns/model_selection.py (original) +++ 1.2/generic_fns/model_selection.py Wed Apr 5 11:34:20 2006 @@ -69,20 +69,65 @@ if len(self.runs) == 0: raise RelaxError, "No runs are availible for use in model selection." - # Store the first run. + # Initialise. + self.first_run = None + self.function_type = {} + self.duplicate_data = {} + self.count_num_instances = {} + self.model_statistics = {} + self.skip_function = {} + + # Cross validation setup. if type(self.runs[0]) == list: - self.first_run = self.runs[0][0] + # No runs. + if len(run) == 0: + raise RelaxError, "No runs are availible for use in model selection in the array " + `run` + "." + + # Loop over the runs. + for i in xrange(len(self.runs)): + for j in xrange(len(self.runs[i])): + # The run name. + run = self.runs[i][j] + + # Function type. + self.function_type[run] = self.relax.data.run_types[self.relax.data.run_names.index(run)] + + # Store the first non-hybrid run. + if not self.first_run and self.function_type[run] != 'hybrid': + self.first_run = run + + # Specific duplicate data, number of instances, and model statistics functions. + self.duplicate_data[run] = self.relax.specific_setup.setup('duplicate_data', self.function_type[run]) + self.count_num_instances[run] = self.relax.specific_setup.setup('num_instances', self.function_type[run]) + self.model_statistics[run] = self.relax.specific_setup.setup('model_stats', self.function_type[run]) + self.skip_function[run] = self.relax.specific_setup.setup('skip_function', self.function_type[run]) + + # Run various tests. + self.tests(run) + + # All other model selection setup. else: - self.first_run = self.runs[0] - - # Function type. - self.function_type = self.relax.data.run_types[self.relax.data.run_names.index(self.first_run)] - - # Specific duplicate data, number of instances, and model statistics functions. - self.duplicate_data = self.relax.specific_setup.setup('duplicate_data', self.function_type) - self.count_num_instances = self.relax.specific_setup.setup('num_instances', self.function_type) - self.model_statistics = self.relax.specific_setup.setup('model_stats', self.function_type) - self.skip_function = self.relax.specific_setup.setup('skip_function', self.function_type) + # Loop over the runs. + for i in xrange(len(self.runs)): + # The run name. + run = self.runs[i] + + # Function type. + self.function_type[run] = self.relax.data.run_types[self.relax.data.run_names.index(run)] + + # Store the first non-hybrid run. + if not self.first_run and self.function_type[run] != 'hybrid': + self.first_run = run + + # Specific duplicate data, number of instances, and model statistics functions. + self.duplicate_data[run] = self.relax.specific_setup.setup('duplicate_data', self.function_type[run]) + self.count_num_instances[run] = self.relax.specific_setup.setup('num_instances', self.function_type[run]) + self.model_statistics[run] = self.relax.specific_setup.setup('model_stats', self.function_type[run]) + self.skip_function[run] = self.relax.specific_setup.setup('skip_function', self.function_type[run]) + + # Run various tests. + self.tests(run) + # Number of instances. If the number is not the same for each run, then the minimum number will give the specific function self.model_statistics the # opportunity to consolidate the instances to the minimum number if possible. @@ -96,7 +141,7 @@ # Loop over the nested array. for j in xrange(len(self.runs[i])): # Number of instances. - num = self.count_num_instances(self.runs[i][j]) + num = self.count_num_instances[self.runs[i][j]](self.runs[i][j]) self.num_instances[i].append(num) # Minimum. @@ -105,30 +150,12 @@ else: # Number of instances. - num = self.count_num_instances(self.runs[i]) + num = self.count_num_instances[self.runs[i]](self.runs[i]) self.num_instances.append(num) # Minimum. if num < self.min_instances: self.min_instances = num - - # Tests for all runs. - for run in self.runs: - # An array of arrays - for cross validation model selection. - if type(run) == list: - # No runs. - if len(run) == 0: - raise RelaxError, "No runs are availible for use in model selection in the array " + `run` + "." - - # Loop over the nested array. - for run2 in run: - # Run various tests. - self.tests(run2) - - # runs is a normal array. - else: - # Run various tests. - self.tests(run) # Loop over the number of instances. @@ -145,20 +172,20 @@ for j in xrange(len(self.runs)): # Single-item-out cross validation. if method == 'CV': - # Reassign the run. - run = self.runs[j][0] - # Sum of chi-squared values. sum_crit = 0.0 # Loop over the validation samples and sum the chi-squared values. for k in xrange(len(self.runs[j])): + # Reassign the run. + run = self.runs[j][k] + # Skip function. - if self.skip_function(run=self.runs[j][k], instance=i): + if self.skip_function[run](run=run, instance=i): continue # Get the model statistics. - k, n, chi2 = self.model_statistics(run=self.runs[j][k], instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j][k]) + k, n, chi2 = self.model_statistics[run](run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j][k]) # Missing data sets. if k == None or n == None or chi2 == None: @@ -176,11 +203,11 @@ run = self.runs[j] # Skip function. - if self.skip_function(run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]): + if self.skip_function[run](run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]): continue # Get the model statistics. - k, n, chi2 = self.model_statistics(run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]) + k, n, chi2 = self.model_statistics[run](run=run, instance=i, min_instances=self.min_instances, num_instances=self.num_instances[j]) # Missing data sets. if k == None or n == None or chi2 == None: @@ -202,7 +229,7 @@ # Duplicate the data from the 'best_model' to the model selection run 'modsel_run'. if best_model != None: - self.duplicate_data(new_run=modsel_run, old_run=best_model, instance=i) + self.duplicate_data[best_model](new_run=modsel_run, old_run=best_model, instance=i) def aic(self, chi2, k, n): @@ -264,9 +291,14 @@ # Find the index of the run. index = self.relax.data.run_names.index(run) - # Test if the function type is the same as 'self.function_type'. - if self.relax.data.run_types[index] != self.function_type: + # Test if the function type is the same as 'self.function_type' (skip the test if self.function_type is a hybrid). + #if self.function_type != 'hybrid' and self.relax.data.run_types[index] != self.function_type: + if self.relax.data.run_types[index] != self.function_type[run]: raise RelaxError, "The runs supplied are not all of the same function type." + + # Skip all tests if the model is a hybrid. + if self.relax.data.run_types[index] == 'hybrid': + return # Test if sequence data is loaded. if not self.relax.data.res.has_key(run): Modified: 1.2/specific_fns/hybrid.py URL: http://svn.gna.org/viewcvs/relax/1.2/specific_fns/hybrid.py?rev=2449&r1=2448&r2=2449&view=diff ============================================================================== --- 1.2/specific_fns/hybrid.py (original) +++ 1.2/specific_fns/hybrid.py Wed Apr 5 11:34:20 2006 @@ -26,6 +26,28 @@ """Class containing function specific to hybrid models.""" self.relax = relax + + + def duplicate_data(self, new_run=None, old_run=None, instance=None): + """Function for duplicating data.""" + + # Test that the new run exists. + if not new_run in self.relax.data.run_names: + raise RelaxNoRunError, new_run + + # Test that the old run exists. + if not old_run in self.relax.data.run_names: + raise RelaxNoRunError, old_run + + # Test that the new run has no sequence loaded. + if self.relax.data.res.has_key(new_run): + raise RelaxSequenceError, new_run + + # Reset the new run type to hybrid! + self.relax.data.run_types[self.relax.data.run_names.index(new_run)] = 'hybrid' + + # Duplicate the hybrid run data structure. + self.relax.data.hybrid_runs[new_run] = self.relax.data.hybrid_runs[old_run] def hybridise(self, hybrid=None, runs=None): @@ -65,3 +87,52 @@ # Create the data structure of the runs which form the hybrid. self.relax.data.hybrid_runs[hybrid] = runs + + + def model_statistics(self, run=None, instance=None, min_instances=None, num_instances=None): + """Function for returning the values k, n, and chi2 of the hybrid. + + k - number of parameters. + n - number of data points. + chi2 - the chi-squared value. + """ + + # Arguments. + self.run = run + + # Initialise. + k_total = 0 + n_total = 0 + chi2_total = 0.0 + + # Specific setup. + for run in self.relax.data.hybrid_runs[self.run]: + # Function type. + function_type = self.relax.data.run_types[self.relax.data.run_names.index(run)] + + # Specific model statistics functions. + model_statistics = self.relax.specific_setup.setup('model_stats', function_type) + + # Get the statistics. + k, n, chi2 = model_statistics(run, instance=0, min_instances=1) + + # Sum the stats. + k_total = k_total + k + n_total = n_total + n + chi2_total = chi2_total + chi2 + + # Return the totals. + return k_total, n_total, chi2_total + + + def num_instances(self, run=None): + """Function for returning the number of instances, which for hybrids is always 1.""" + + return 1 + + + def skip_function(self, run=None, instance=None, min_instances=None, num_instances=None): + """Dummy function.""" + + return + Modified: 1.2/specific_fns/model_free.py URL: http://svn.gna.org/viewcvs/relax/1.2/specific_fns/model_free.py?rev=2449&r1=2448&r2=2449&view=diff ============================================================================== --- 1.2/specific_fns/model_free.py (original) +++ 1.2/specific_fns/model_free.py Wed Apr 5 11:34:20 2006 @@ -2507,6 +2507,10 @@ global_stats = 0 break + # Determine the parameter set type. + self.param_set = self.determine_param_set_type() + + # Sequence specific data. # Statistics for a single residue. if not global_stats and not combine: # Missing data sets. Modified: 1.2/specific_fns/specific_setup.py URL: http://svn.gna.org/viewcvs/relax/1.2/specific_fns/specific_setup.py?rev=2449&r1=2448&r2=2449&view=diff ============================================================================== --- 1.2/specific_fns/specific_setup.py (original) +++ 1.2/specific_fns/specific_setup.py Wed Apr 5 11:34:20 2006 @@ -84,6 +84,10 @@ # Model-free analysis. if function_type == 'mf': function = self.mf_funcs() + + # Hybrid models. + if function_type == 'hybrid': + function = self.hybrid_funcs() # Catch all errors. except: @@ -103,8 +107,28 @@ return function + def hybrid_funcs(self): + """Hybrid model specific functions.""" + + # Duplicate data function. + if self.eqi == 'duplicate_data': + return self.relax.specific.hybrid.duplicate_data + + # Model statistics. + if self.eqi == 'model_stats': + return self.relax.specific.hybrid.model_statistics + + # Number of instances. + if self.eqi == 'num_instances': + return self.relax.specific.hybrid.num_instances + + # Skip function. + if self.eqi == 'skip_function': + return self.relax.specific.hybrid.skip_function + + def jw_funcs(self): - """Model-free analysis specific functions.""" + """Reduced spectral density mapping specific functions.""" # Calculate function. if self.eqi == 'calculate':