mailr27258 - in /trunk/lib/sequence_alignment: __init__.py align_protein.py


Others Months | Index by Date | Thread Index
>>   [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Header


Content

Posted by edward on January 21, 2015 - 15:40:
Author: bugman
Date: Wed Jan 21 15:40:56 2015
New Revision: 27258

URL: http://svn.gna.org/viewcvs/relax?rev=27258&view=rev
Log:
Created the lib.sequence_alignment.align_protein module for the sequence 
alignment of proteins.

This general module currently implements the align_pairwise() function for 
the pairwise alignment of
protein sequences.  It provides the infrastructure for specifying gap 
starting and extension
penalties, choosing the alignment algorithm (currently only the 
Needleman-Wunsch sequence alignment
algorithm as 'NW70'), and choosing the substitution matrix (currently only 
BLOSUM62).  The function
provides lots of printouts for user feedback.


Added:
    trunk/lib/sequence_alignment/align_protein.py
Modified:
    trunk/lib/sequence_alignment/__init__.py

Modified: trunk/lib/sequence_alignment/__init__.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/__init__.py?rev=27258&r1=27257&r2=27258&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/__init__.py    (original)
+++ trunk/lib/sequence_alignment/__init__.py    Wed Jan 21 15:40:56 2015
@@ -23,6 +23,7 @@
 """The relax-lib sequence alignment package - a library of functions for 
aligning proteins, DNA, RNA or other molecules."""
 
 __all__ = [
+    'align_protein',
     'needleman_wunsch',
     'substitution_matrices'
 ]

Added: trunk/lib/sequence_alignment/align_protein.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/align_protein.py?rev=27258&view=auto
==============================================================================
--- trunk/lib/sequence_alignment/align_protein.py       (added)
+++ trunk/lib/sequence_alignment/align_protein.py       Wed Jan 21 15:40:56 
2015
@@ -0,0 +1,98 @@
+###############################################################################
+#                                                                            
 #
+# Copyright (C) 2015 Edward d'Auvergne                                       
 #
+#                                                                            
 #
+# This file is part of the program relax (http://www.nmr-relax.com).         
 #
+#                                                                            
 #
+# This program is free software: you can redistribute it and/or modify       
 #
+# it under the terms of the GNU General Public License as published by       
 #
+# the Free Software Foundation, either version 3 of the License, or          
 #
+# (at your option) any later version.                                        
 #
+#                                                                            
 #
+# This program is distributed in the hope that it will be useful,            
 #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of             
 #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              
 #
+# GNU General Public License for more details.                               
 #
+#                                                                            
 #
+# You should have received a copy of the GNU General Public License          
 #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.      
 #
+#                                                                            
 #
+###############################################################################
+
+# Module docstring.
+"""General sequence alignment functions."""
+
+# Python module imports.
+from string import upper
+import sys
+from warnings import warn
+
+# relax module imports.
+from lib.errors import RelaxError
+from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align
+from lib.sequence_alignment.substitution_matrices import BLOSUM62, 
BLOSUM62_SEQ
+from lib.warnings import RelaxWarning
+
+
+def align_pairwise(sequence1, sequence2, algorithm='NW70', 
matrix='BLOSUM62', gap_penalty=1.0, extend_penalty=1.0):
+    """Align two protein sequences.
+
+    @param sequence1:           The first protein sequence as one letter 
codes.
+    @type sequence1:            str
+    @param sequence2:           The second protein sequence as one letter 
codes.
+    @type sequence2:            str
+    @keyword algorithm:         The alignment algorithm to use.
+    @type algorithm:            str
+    @keyword matrix:            The substitution matrix to use.
+    @type matrix:               str
+    @keyword gap_penalty:       The penalty for introducing gaps, as a 
positive number.
+    @type gap_penalty:          float
+    @keyword extend_penalty:    The penalty for extending a gap, as a 
positive number.
+    @type extend_penalty:       float
+    """
+
+    # Checks.
+    allowed_algor = ['NW70']
+    if algorithm not in allowed_algor:
+        raise RelaxError("The sequence alignment algorithm '%s' is unknown, 
it must be one of %s." % (algorithm, allowed_algor))
+    allowed_matrices = ['BLOSUM62']
+    if matrix not in allowed_matrices:
+        raise RelaxError("The substitution matrix '%s' is unknown, it must 
be one of %s." % (matrix, allowed_matrices))
+
+    # Capitalise the sequences.
+    sequence1 = upper(sequence1)
+    sequence2 = upper(sequence2)
+
+    # Turn off the extension penalty for algorithms not supporting it.
+    if extend_penalty != None and algorithm in ['NW70']:
+        warn(RelaxWarning("The gap extension penalty is not supported by the 
Needleman-Wunsch sequence alignment algorithm."))
+        extend_penalty = None
+
+    # Initial printout.
+    sys.stdout.write("\nPairwise protein alignment.\n")
+    sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix))
+    sys.stdout.write("%-30s %s\n" % ("Gap penalty:", gap_penalty))
+    sys.stdout.write("%-30s %s\n" % ("Extend penalty:", extend_penalty))
+    sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1))
+    sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2))
+
+    # Select the substitution matrix.
+    if matrix == 'BLOSUM62':
+        sub_matrix = BLOSUM62
+        sub_seq = BLOSUM62_SEQ
+
+    # The alignment.
+    if algorithm == 'NW70':
+        align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2, 
sub_matrix=sub_matrix, sub_seq=sub_seq, gap_penalty=gap_penalty)
+
+    # Final printout.
+    sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1))
+    sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2))
+    sys.stdout.write("%-30s " % "")
+    for i in range(len(align1)):
+        if align1[i] == align2[i]:
+            sys.stdout.write("*")
+        else:
+            sys.stdout.write(" ")
+    sys.stdout.write("\n\n")
+




Related Messages


Powered by MHonArc, Updated Wed Jan 21 16:00:02 2015