Author: bugman Date: Mon Oct 5 10:25:33 2009 New Revision: 9564 URL: http://svn.gna.org/viewcvs/relax?rev=9564&view=rev Log: Bug fix: Added the function __deselect_spins() to remove spins with too little data. This fixes bug #14428 (http://gna.org/bugs/?14428) reported by Pierre-Yves Savard <pierre-yves dott savard at bcm dot ulaval dot ca>. The new function is almost identical to that used in the model-free analysis performed by relax in that spins with no or too little data, or those which would be over-fit are removed from the analysis. Modified: 1.3/generic_fns/dasha.py 1.3/generic_fns/palmer.py Modified: 1.3/generic_fns/dasha.py URL: http://svn.gna.org/viewcvs/relax/1.3/generic_fns/dasha.py?rev=9564&r1=9563&r2=9564&view=diff ============================================================================== --- 1.3/generic_fns/dasha.py (original) +++ 1.3/generic_fns/dasha.py Mon Oct 5 10:25:33 2009 @@ -37,9 +37,36 @@ from specific_fns.setup import model_free_obj +def __deselect_spins(): + """Deselect spins with no or too little data, that are overfitting, etc.""" + + # Test if sequence data exists. + if not exists_mol_res_spin_data(): + raise RelaxNoSequenceError + + # Is structural data required? + need_vect = False + if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' or cdp.diff_tensor.type == 'ellipsoid'): + need_vect = True + + # Loop over the sequence. + for spin in spin_loop(): + # Relaxation data must exist! + if not hasattr(spin, 'relax_data'): + spin.select = False + + # Require 3 or more relaxation data points. + elif len(spin.relax_data) < 3: + spin.select = False + + # Require at least as many data points as params to prevent over-fitting. + elif hasattr(spin, 'params') and spin.params and len(spin.params) > len(spin.relax_data): + spin.select = False + + def create(algor='LM', dir=None, force=False): """Create the Dasha script file 'dasha_script' for controlling the program. - + @keyword algor: The optimisation algorithm to use. This can be the Levenberg-Marquardt algorithm 'LM' or the Newton-Raphson algorithm 'NR'. @type algor: str @@ -76,6 +103,9 @@ # Test the number of spins. if len(residue.spin) > 1: raise RelaxError("More than one spin per residue is not supported.") + + # Deselect certain spins. + __deselect_spins() # Directory creation. if dir == None: Modified: 1.3/generic_fns/palmer.py URL: http://svn.gna.org/viewcvs/relax/1.3/generic_fns/palmer.py?rev=9564&r1=9563&r2=9564&view=diff ============================================================================== --- 1.3/generic_fns/palmer.py (original) +++ 1.3/generic_fns/palmer.py Mon Oct 5 10:25:33 2009 @@ -38,6 +38,33 @@ from relax_errors import RelaxDirError, RelaxFileError, RelaxNoModelError, RelaxNoPdbError, RelaxNoSequenceError from relax_io import mkdir_nofail, open_write_file, test_binary from specific_fns.setup import model_free_obj + + +def __deselect_spins(): + """Deselect spins with no or too little data, that are overfitting, etc.""" + + # Test if sequence data exists. + if not exists_mol_res_spin_data(): + raise RelaxNoSequenceError + + # Is structural data required? + need_vect = False + if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' or cdp.diff_tensor.type == 'ellipsoid'): + need_vect = True + + # Loop over the sequence. + for spin in spin_loop(): + # Relaxation data must exist! + if not hasattr(spin, 'relax_data'): + spin.select = False + + # Require 3 or more relaxation data points. + elif len(spin.relax_data) < 3: + spin.select = False + + # Require at least as many data points as params to prevent over-fitting. + elif hasattr(spin, 'params') and spin.params and len(spin.params) > len(spin.relax_data): + spin.select = False def create(dir=None, binary=None, diff_search=None, sims=None, sim_type=None, trim=None, steps=None, heteronuc_type=None, atom1=None, atom2=None, spin_id=None, force=False, constraints=True): @@ -97,6 +124,9 @@ if hasattr(cdp, 'diff_tensor') and not cdp.diff_tensor.type == 'sphere' and not hasattr(cdp, 'structure'): raise RelaxNoPdbError + # Deselect certain spins. + __deselect_spins() + # Directory creation. if dir == None: dir = pipes.cdp_name()