r4910 - /1.3/generic_fns/sequence.py -- February 04, 2008

Author: bugman
Date: Mon Feb  4 18:06:15 2008
New Revision: 4910

URL: http://svn.gna.org/viewcvs/relax?rev=4910&view=rev
Log:
Huge redesign of the load_PDB_sequence() function (and converted it to the 
new relax design).

This new function should give much more flexibility and abstraction in the 
mapping from the
molecular structure to the relax mol-res-spin data structure.


Modified:
    1.3/generic_fns/sequence.py

Modified: 1.3/generic_fns/sequence.py
URL: 
http://svn.gna.org/viewcvs/relax/1.3/generic_fns/sequence.py?rev=4910&r1=4909&r2=4910&view=diff
==============================================================================
--- 1.3/generic_fns/sequence.py (original)
+++ 1.3/generic_fns/sequence.py Mon Feb  4 18:06:15 2008
@@ -22,9 +22,9 @@
 
 # relax module imports.
 from data import Data as relax_data_store
+from generic_fns.selection import count_spins, parse_token, spin_loop, 
tokenise
 from relax_errors import RelaxError, RelaxFileEmptyError, 
RelaxNoPdbChainError, RelaxNoPipeError, RelaxNoSequenceError, 
RelaxSequenceError
 from relax_io import extract_data, open_write_file, strip
-from generic_fns.selection import count_spins, spin_loop
 import sys
 
 
@@ -57,44 +57,118 @@
     write_body(file=sys.stdout, mol_name_col=mol_name_col, 
res_num_col=res_num_col, res_name_col=res_name_col, 
spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep)
 
 
-def load_PDB_sequence():
+def load_PDB_sequence(spin_id=None):
     """Function for loading the sequence out of a PDB file.
 
-    This needs to be modified to handle multiple peptide chains.
+    @param spin_id: The molecule, residue, and spin identifier string.
+    @type spin_id:  str
     """
 
     # Print out.
     print "\nLoading the sequence from the PDB file.\n"
 
+    # Alias the current data pipe.
+    cdp = relax_data_store[relax_data_store.current_pipe]
+
     # Reassign the sequence of the first structure.
-    if relax_data_store.pdb[run].structures[0].peptide_chains:
-        res = 
relax_data_store.pdb[run].structures[0].peptide_chains[0].residues
+    if cdp.structure.structures[0].peptide_chains:
+        chains = cdp.structure.structures[0].peptide_chains
         molecule = 'protein'
-    elif relax_data_store.pdb[run].structures[0].nucleotide_chains:
-        res = 
relax_data_store.pdb[run].structures[0].nucleotide_chains[0].residues
+    elif cdp.structure.structures[0].nucleotide_chains:
+        chains = cdp.structure.structures[0].nucleotide_chains
         molecule = 'nucleic acid'
     else:
         raise RelaxNoPdbChainError
 
-    # Add the run to 'relax_data_store.res'.
-    relax_data_store.res.add_list(run)
-
-    # Loop over the sequence.
-    for i in xrange(len(res)):
-        # Append a data container.
-        relax_data_store.res[run].add_item()
-
-        # Residue number.
-        relax_data_store.res[run][i].num = res[i].number
-
-        # Residue name.
-        if molecule == 'nucleic acid':
-            relax_data_store.res[run][i].name = res[i].name[-1]
+    # Split up the selection string.
+    mol_token, res_token, spin_token = tokenise(spin_id)
+
+    # Parse the tokens.
+    molecules = parse_token(mol_token)
+    residues = parse_token(res_token)
+    spins = parse_token(spin_token)
+
+    # Init some indecies.
+    mol_index = 0
+    res_index = 0
+    spin_index = 0
+
+    # Loop over the molecules.
+    for chain in chains:
+        # The name of the molecule.
+        if chain.chain_id:
+            mol_name = chain.chain_id
+        elif chain.segment_id:
+            mol_name = chain.segment_id
         else:
-            relax_data_store.res[run][i].name = res[i].name
-
-        # Select the residue.
-        relax_data_store.res[run][i].select = 1
+            mol_name = None
+
+        # Skip non-matching molecules.
+        if mol_token and mol_name not in molecules:
+            continue
+
+        # Add the molecule if there is a molecule name (otherwise everything 
goes into the default first MolecularContainer).
+        if mol_name:
+            # Replace the first empty molecule.
+            if mol_index == 0 and cdp.mol[0].name == None:
+                cdp.mol[0].name = chain.name
+
+            # Create a new molecule.
+            else:
+                # Add the molecule.
+                cdp.mol.add_item(mol_name=chain.name)
+
+        # Loop over the residues.
+        for res in chain.residues:
+            # The residue name and number.
+            if molecule == 'nucleic acid':
+                res_name = res.name[-1]
+            else:
+                res_name = res.name
+            res_num = res.number
+
+            # Skip non-matching residues.
+            if res_token and not (res_name in residues or res_num in 
residues):
+                continue
+
+            # Replace the first empty residue.
+            if res_index == 0 and cdp.mol[mol_index].res[0].name == None:
+                cdp.mol[mol_index].res[0].name = res_name
+                cdp.mol[mol_index].res[0].num = res_num
+
+            # Create a new residue.
+            else:
+                # Add the residue.
+                cdp.mol[mol_index].res.add_item(res_name=res_name, 
res_num=res_num)
+
+            # Loop over the spins.
+            for atom in res.atom_list:
+                # The spin name and number.
+                spin_name = atom.name
+                spin_num = atom.properties['serial_number']
+
+                # Skip non-matching spins.
+                if spin_token and not (spin_name in spins or spin_num in 
spins):
+                    continue
+
+                # Replace the first empty residue.
+                if spin_index == 0 and 
cdp.mol[mol_index].res[res_index].spin[0].name == None:
+                    cdp.mol[mol_index].res[res_index].spin[0].name = 
spin_name
+                    cdp.mol[mol_index].res[res_index].spin[0].num = spin_num
+
+                # Create a new residue.
+                else:
+                    # Add the residue.
+                    
cdp.mol[mol_index].res[res_index].spin.add_item(spin_name=spin_name, 
spin_num=spin_num)
+
+                # Increment the residue index.
+                spin_index = spin_index + 1
+
+            # Increment the residue index.
+            res_index = res_index + 1
+
+        # Increment the molecule index.
+        mol_index = mol_index + 1
 
 
 def read(file=None, dir=None, mol_name_col=None, res_num_col=0, 
res_name_col=1, spin_num_col=None, spin_name_col=None, sep=None):
r4910 - /1.3/generic_fns/sequence.py

Header

Content

Related Messages