mailr27349 - /trunk/lib/sequence_alignment/align_protein.py


Others Months | Index by Date | Thread Index
>>   [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Header


Content

Posted by edward on January 29, 2015 - 13:34:
Author: bugman
Date: Thu Jan 29 13:34:38 2015
New Revision: 27349

URL: http://svn.gna.org/viewcvs/relax?rev=27349&view=rev
Log:
Added the lib.sequence_alignment.align_protein.align_multiple_from_pairwise() 
function.

This should have been committed earlier.  The function is only partly 
implemented.


Modified:
    trunk/lib/sequence_alignment/align_protein.py

Modified: trunk/lib/sequence_alignment/align_protein.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/align_protein.py?rev=27349&r1=27348&r2=27349&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/align_protein.py       (original)
+++ trunk/lib/sequence_alignment/align_protein.py       Thu Jan 29 13:34:38 
2015
@@ -23,12 +23,106 @@
 """General sequence alignment functions."""
 
 # Python module imports.
+from numpy import int16, zeros
 import sys
 
 # relax module imports.
 from lib.errors import RelaxError
 from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align
 from lib.sequence_alignment.substitution_matrices import BLOSUM62, 
BLOSUM62_SEQ, PAM250, PAM250_SEQ
+
+
+def align_multiple_from_pairwise(reference_sequence, sequences, 
algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, 
gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
+    """Align multiple protein sequences to one reference by fusing multiple 
pairwise alignments.
+
+    @param reference_sequence:          The first protein sequence as one 
letter codes to use as the reference.
+    @type reference_sequence:           str
+    @param sequences:                   The list of remaining protein 
sequences as one letter codes.
+    @type sequences:                    list of str
+    @keyword algorithm:                 The pairwise sequence alignment 
algorithm to use.
+    @type algorithm:                    str
+    @keyword matrix:                    The substitution matrix to use.
+    @type matrix:                       str
+    @keyword gap_open_penalty:          The penalty for introducing gaps, as 
a positive number.
+    @type gap_open_penalty:             float
+    @keyword gap_extend_penalty:        The penalty for extending a gap, as 
a positive number.
+    @type gap_extend_penalty:           float
+    @keyword end_gap_open_penalty:      The optional penalty for opening a 
gap at the end of a sequence.
+    @type end_gap_open_penalty:         float
+    @keyword end_gap_extend_penalty:    The optional penalty for extending a 
gap at the end of a sequence.
+    @type end_gap_extend_penalty:       float
+    @return:                            The two alignment strings and the 
gap matrix.
+    @rtype:                             str, str, numpy rank-2 int array
+    """
+
+    # The pairwise alignments.
+    N = len(sequences)
+    align1_list = []
+    align2_list = []
+    gap_list = []
+    for i in range(N):
+        # Pairwise alignment.
+        align1, align2, gaps = align_pairwise(reference_sequence, 
sequences[i], algorithm=algorithm, matrix=matrix, 
gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, 
end_gap_open_penalty=end_gap_open_penalty, 
end_gap_extend_penalty=end_gap_extend_penalty)
+
+        # Store the results.
+        align1_list.append(align1)
+        align2_list.append(align2)
+        gap_list.append(gaps)
+
+    # Convert all sequence strings to lists.
+    ref_list = list(reference_sequence)
+    seq_list = []
+    for i in range(N):
+        seq_list.append(list(sequences[i]))
+
+    # Loop over the alignment elements.
+    strings = []
+    index = -1
+    offsets = zeros(N, int16)
+    while 1:
+        # Increment the index.
+        index += 1
+        print "\nIndex %i" % index
+
+        # Termination condition.
+        term = True
+        for i in range(N):
+            if index + offsets[i] < len(gap_list[i][0]):
+                term = False
+            else:
+                print "    At end in %i" % i
+        if term:
+            break
+
+        # A gap in one of the references.
+        gap = False
+        for i in range(N):
+            if index + offsets[i] >= len(gap_list[i][0]) or gap_list[i][0, 
index]:
+                print "    Gap in %i" % i
+                offsets[i] += 1
+                gap = True
+        print "    New offsets %s" % offsets
+
+        # No reference gap.
+        if not gap:
+            print "    No ref gap."
+            continue
+
+        # Insert the gap into the reference list.
+        print "    Inserting gap into ref list at %i" % index
+        ref_list.insert(index, '-')
+
+    for i in range(N):
+        seq = ''.join(seq_list[i])
+        strings.append(seq)
+
+    ref = ''.join(ref_list)
+
+    print ref_list
+    print seq_list
+
+    # Return the results.
+    return [ref] + strings, gap_list
 
 
 def align_pairwise(sequence1, sequence2, algorithm='NW70', 
matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, 
end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):




Related Messages


Powered by MHonArc, Updated Thu Jan 29 13:40:02 2015