r18506 - /trunk/generic_fns/structure/internal.py -- February 19, 2013

Author: bugman
Date: Tue Feb 19 22:03:41 2013
New Revision: 18506

URL: http://svn.gna.org/viewcvs/relax?rev=18506&view=rev
Log:
Faster PDB parsing by the removal of the use of the re.search() function.

Now line slices are directly compared instead.


Modified:
    trunk/generic_fns/structure/internal.py

Modified: trunk/generic_fns/structure/internal.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/generic_fns/structure/internal.py?rev=18506&r1=18505&r2=18506&view=diff
==============================================================================
--- trunk/generic_fns/structure/internal.py (original)
+++ trunk/generic_fns/structure/internal.py Tue Feb 19 22:03:41 2013
@@ -26,7 +26,6 @@
 from numpy import array, dot, float64, linalg, zeros
 import os
 from os import F_OK, access
-from re import search
 from string import digits
 from warnings import warn
 
@@ -277,7 +276,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the connectivity 
annotation section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -303,18 +302,18 @@
         # Loop over the data.
         for i in range(len(lines)):
             # A new model record.
-            if search('^MODEL', lines[i]):
+            if lines[i][:5] == 'MODEL':
                 try:
                     model = int(lines[i].split()[1])
                 except:
                     raise RelaxError("The MODEL record " + repr(lines[i]) + 
" is corrupt, cannot read the PDB file.")
 
             # Skip all records prior to the first ATOM or HETATM record.
-            if not (search('^ATOM', lines[i]) or search('^HETATM', 
lines[i])) and not len(records):
+            if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and 
not len(records):
                 continue
 
             # End of the model.
-            if search('^ENDMDL', lines[i]):
+            if lines[i][:6] == 'ENDMDL':
                 # Yield the info.
                 yield model, records
 
@@ -355,7 +354,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the hetrogen section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -382,7 +381,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the miscellaneous 
section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -414,7 +413,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the primary structure 
section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -443,7 +442,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the secondary 
structure section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -485,7 +484,7 @@
         # Loop over the lines.
         for i in range(len(lines)):
             # No match, therefore assume to be out of the title section.
-            if lines[i][0: 6] not in records:
+            if lines[i][:6] not in records:
                 break
         
         # Return the remaining lines.
@@ -599,19 +598,19 @@
         # Loop over the data.
         for i in range(len(records)):
             # A PDB termination record.
-            if search('^END', records[i]):
+            if records[i][:3] == 'END':
                 break
 
             # A model termination record.
-            if search('^ENDMDL', records[i]):
+            if records[i][:6] == 'ENDMDL':
                 end = True
 
             # A molecule termination record with no trailing HETATM.
-            elif i < len(records)-1 and search('^TER', records[i]) and not 
search('^HETATM', records[i+1]):
+            elif i < len(records)-1 and records[i][:3] == 'TER' and not 
records[i+1][:6] == 'HETATM':
                 end = True
 
             # A HETATM followed by an ATOM record.
-            elif i < len(records)-1 and search('^HETATM', records[i]) and 
search('^ATOM', records[i+1]):
+            elif i < len(records)-1 and records[i][:6] == 'HETATM' and 
records[i+1][:4] == 'ATOM':
                 end = True
 
             # End.
@@ -2187,11 +2186,11 @@
                 continue
 
             # Add the atom.
-            if search('^ATOM', record) or search('^HETATM', record):
+            if record[:4] == 'ATOM' or record[:6] == 'HETATM':
                 # Parse the record.
-                if search('^ATOM', record):
+                if record[:4] == 'ATOM':
                     record_type, serial, name, alt_loc, res_name, chain_id, 
res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = 
pdb_read.atom(record)
-                if search('^HETATM', record):
+                if record[:6] == 'HETATM':
                     record_type, serial, name, alt_loc, res_name, chain_id, 
res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = 
pdb_read.hetatm(record)
 
                 # Handle the alternate locations.
@@ -2212,7 +2211,7 @@
                 self.atom_add(pdb_record=record_type, atom_num=serial, 
atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, 
pos=[x, y, z], element=element)
 
             # Connect atoms.
-            if search('^CONECT', record):
+            if record[:6] == 'CONECT':
                 # Parse the record.
                 record_type, serial, bonded1, bonded2, bonded3, bonded4 = 
pdb_read.conect(record)
r18506 - /trunk/generic_fns/structure/internal.py

Header

Content

Related Messages