mailr9564 - in /1.3/generic_fns: dasha.py palmer.py


Others Months | Index by Date | Thread Index
>>   [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Header


Content

Posted by edward on October 05, 2009 - 10:25:
Author: bugman
Date: Mon Oct  5 10:25:33 2009
New Revision: 9564

URL: http://svn.gna.org/viewcvs/relax?rev=9564&view=rev
Log:
Bug fix:  Added the function __deselect_spins() to remove spins with too 
little data.

This fixes bug #14428 (http://gna.org/bugs/?14428) reported by Pierre-Yves 
Savard <pierre-yves dott
savard at bcm dot ulaval dot ca>.

The new function is almost identical to that used in the model-free analysis 
performed by relax in
that spins with no or too little data, or those which would be over-fit are 
removed from the
analysis.


Modified:
    1.3/generic_fns/dasha.py
    1.3/generic_fns/palmer.py

Modified: 1.3/generic_fns/dasha.py
URL: 
http://svn.gna.org/viewcvs/relax/1.3/generic_fns/dasha.py?rev=9564&r1=9563&r2=9564&view=diff
==============================================================================
--- 1.3/generic_fns/dasha.py (original)
+++ 1.3/generic_fns/dasha.py Mon Oct  5 10:25:33 2009
@@ -37,9 +37,36 @@
 from specific_fns.setup import model_free_obj
 
 
+def __deselect_spins():
+    """Deselect spins with no or too little data, that are overfitting, 
etc."""
+
+    # Test if sequence data exists.
+    if not exists_mol_res_spin_data():
+        raise RelaxNoSequenceError
+
+    # Is structural data required?
+    need_vect = False
+    if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' 
or cdp.diff_tensor.type == 'ellipsoid'):
+        need_vect = True
+
+    # Loop over the sequence.
+    for spin in spin_loop():
+        # Relaxation data must exist!
+        if not hasattr(spin, 'relax_data'):
+            spin.select = False
+
+        # Require 3 or more relaxation data points.
+        elif len(spin.relax_data) < 3:
+            spin.select = False
+
+        # Require at least as many data points as params to prevent 
over-fitting.
+        elif hasattr(spin, 'params') and spin.params and len(spin.params) > 
len(spin.relax_data):
+            spin.select = False
+
+
 def create(algor='LM', dir=None, force=False):
     """Create the Dasha script file 'dasha_script' for controlling the 
program.
-    
+
     @keyword algor: The optimisation algorithm to use.  This can be the 
Levenberg-Marquardt
                     algorithm 'LM' or the Newton-Raphson algorithm 'NR'.
     @type algor:    str
@@ -76,6 +103,9 @@
         # Test the number of spins.
         if len(residue.spin) > 1:
             raise RelaxError("More than one spin per residue is not 
supported.")
+
+    # Deselect certain spins.
+    __deselect_spins()
 
     # Directory creation.
     if dir == None:

Modified: 1.3/generic_fns/palmer.py
URL: 
http://svn.gna.org/viewcvs/relax/1.3/generic_fns/palmer.py?rev=9564&r1=9563&r2=9564&view=diff
==============================================================================
--- 1.3/generic_fns/palmer.py (original)
+++ 1.3/generic_fns/palmer.py Mon Oct  5 10:25:33 2009
@@ -38,6 +38,33 @@
 from relax_errors import RelaxDirError, RelaxFileError, RelaxNoModelError, 
RelaxNoPdbError, RelaxNoSequenceError
 from relax_io import mkdir_nofail, open_write_file, test_binary
 from specific_fns.setup import model_free_obj
+
+
+def __deselect_spins():
+    """Deselect spins with no or too little data, that are overfitting, 
etc."""
+
+    # Test if sequence data exists.
+    if not exists_mol_res_spin_data():
+        raise RelaxNoSequenceError
+
+    # Is structural data required?
+    need_vect = False
+    if hasattr(cdp, 'diff_tensor') and (cdp.diff_tensor.type == 'spheroid' 
or cdp.diff_tensor.type == 'ellipsoid'):
+        need_vect = True
+
+    # Loop over the sequence.
+    for spin in spin_loop():
+        # Relaxation data must exist!
+        if not hasattr(spin, 'relax_data'):
+            spin.select = False
+
+        # Require 3 or more relaxation data points.
+        elif len(spin.relax_data) < 3:
+            spin.select = False
+
+        # Require at least as many data points as params to prevent 
over-fitting.
+        elif hasattr(spin, 'params') and spin.params and len(spin.params) > 
len(spin.relax_data):
+            spin.select = False
 
 
 def create(dir=None, binary=None, diff_search=None, sims=None, 
sim_type=None, trim=None, steps=None, heteronuc_type=None, atom1=None, 
atom2=None, spin_id=None, force=False, constraints=True):
@@ -97,6 +124,9 @@
     if hasattr(cdp, 'diff_tensor') and not cdp.diff_tensor.type == 'sphere' 
and not hasattr(cdp, 'structure'):
         raise RelaxNoPdbError
 
+    # Deselect certain spins.
+    __deselect_spins()
+
     # Directory creation.
     if dir == None:
         dir = pipes.cdp_name()




Related Messages


Powered by MHonArc, Updated Mon Oct 05 10:40:02 2009