r27350 - in /trunk/lib/sequence_alignment: __init__.py align_protein.py msa.py -- January 29, 2015

Author: bugman
Date: Thu Jan 29 13:37:46 2015
New Revision: 27350

URL: http://svn.gna.org/viewcvs/relax?rev=27350&view=rev
Log:
Initial lib.sequence_alignment.msa.central_star() function.

This was moved from 
lib.sequence_alignment.align_protein.align_multiple_from_pairwise().


Added:
    trunk/lib/sequence_alignment/msa.py
      - copied, changed from r27349, 
trunk/lib/sequence_alignment/align_protein.py
Modified:
    trunk/lib/sequence_alignment/__init__.py
    trunk/lib/sequence_alignment/align_protein.py

Modified: trunk/lib/sequence_alignment/__init__.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/__init__.py?rev=27350&r1=27349&r2=27350&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/__init__.py    (original)
+++ trunk/lib/sequence_alignment/__init__.py    Thu Jan 29 13:37:46 2015
@@ -24,6 +24,7 @@
 
 __all__ = [
     'align_protein',
+    'msa',
     'needleman_wunsch',
     'substitution_matrices'
 ]

Modified: trunk/lib/sequence_alignment/align_protein.py
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/align_protein.py?rev=27350&r1=27349&r2=27350&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/align_protein.py       (original)
+++ trunk/lib/sequence_alignment/align_protein.py       Thu Jan 29 13:37:46 
2015
@@ -23,106 +23,12 @@
 """General sequence alignment functions."""
 
 # Python module imports.
-from numpy import int16, zeros
 import sys
 
 # relax module imports.
 from lib.errors import RelaxError
 from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align
 from lib.sequence_alignment.substitution_matrices import BLOSUM62, 
BLOSUM62_SEQ, PAM250, PAM250_SEQ
-
-
-def align_multiple_from_pairwise(reference_sequence, sequences, 
algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, 
gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
-    """Align multiple protein sequences to one reference by fusing multiple 
pairwise alignments.
-
-    @param reference_sequence:          The first protein sequence as one 
letter codes to use as the reference.
-    @type reference_sequence:           str
-    @param sequences:                   The list of remaining protein 
sequences as one letter codes.
-    @type sequences:                    list of str
-    @keyword algorithm:                 The pairwise sequence alignment 
algorithm to use.
-    @type algorithm:                    str
-    @keyword matrix:                    The substitution matrix to use.
-    @type matrix:                       str
-    @keyword gap_open_penalty:          The penalty for introducing gaps, as 
a positive number.
-    @type gap_open_penalty:             float
-    @keyword gap_extend_penalty:        The penalty for extending a gap, as 
a positive number.
-    @type gap_extend_penalty:           float
-    @keyword end_gap_open_penalty:      The optional penalty for opening a 
gap at the end of a sequence.
-    @type end_gap_open_penalty:         float
-    @keyword end_gap_extend_penalty:    The optional penalty for extending a 
gap at the end of a sequence.
-    @type end_gap_extend_penalty:       float
-    @return:                            The two alignment strings and the 
gap matrix.
-    @rtype:                             str, str, numpy rank-2 int array
-    """
-
-    # The pairwise alignments.
-    N = len(sequences)
-    align1_list = []
-    align2_list = []
-    gap_list = []
-    for i in range(N):
-        # Pairwise alignment.
-        align1, align2, gaps = align_pairwise(reference_sequence, 
sequences[i], algorithm=algorithm, matrix=matrix, 
gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, 
end_gap_open_penalty=end_gap_open_penalty, 
end_gap_extend_penalty=end_gap_extend_penalty)
-
-        # Store the results.
-        align1_list.append(align1)
-        align2_list.append(align2)
-        gap_list.append(gaps)
-
-    # Convert all sequence strings to lists.
-    ref_list = list(reference_sequence)
-    seq_list = []
-    for i in range(N):
-        seq_list.append(list(sequences[i]))
-
-    # Loop over the alignment elements.
-    strings = []
-    index = -1
-    offsets = zeros(N, int16)
-    while 1:
-        # Increment the index.
-        index += 1
-        print "\nIndex %i" % index
-
-        # Termination condition.
-        term = True
-        for i in range(N):
-            if index + offsets[i] < len(gap_list[i][0]):
-                term = False
-            else:
-                print "    At end in %i" % i
-        if term:
-            break
-
-        # A gap in one of the references.
-        gap = False
-        for i in range(N):
-            if index + offsets[i] >= len(gap_list[i][0]) or gap_list[i][0, 
index]:
-                print "    Gap in %i" % i
-                offsets[i] += 1
-                gap = True
-        print "    New offsets %s" % offsets
-
-        # No reference gap.
-        if not gap:
-            print "    No ref gap."
-            continue
-
-        # Insert the gap into the reference list.
-        print "    Inserting gap into ref list at %i" % index
-        ref_list.insert(index, '-')
-
-    for i in range(N):
-        seq = ''.join(seq_list[i])
-        strings.append(seq)
-
-    ref = ''.join(ref_list)
-
-    print ref_list
-    print seq_list
-
-    # Return the results.
-    return [ref] + strings, gap_list
 
 
 def align_pairwise(sequence1, sequence2, algorithm='NW70', 
matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, 
end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):

Copied: trunk/lib/sequence_alignment/msa.py (from r27349, 
trunk/lib/sequence_alignment/align_protein.py)
URL: 
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/msa.py?p2=trunk/lib/sequence_alignment/msa.py&p1=trunk/lib/sequence_alignment/align_protein.py&r1=27349&r2=27350&rev=27350&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/align_protein.py       (original)
+++ trunk/lib/sequence_alignment/msa.py Thu Jan 29 13:37:46 2015
@@ -20,7 +20,7 @@
 
###############################################################################
 
 # Module docstring.
-"""General sequence alignment functions."""
+"""Multiple sequence alignment (MSA) algorithms."""
 
 # Python module imports.
 from numpy import int16, zeros
@@ -32,12 +32,10 @@
 from lib.sequence_alignment.substitution_matrices import BLOSUM62, 
BLOSUM62_SEQ, PAM250, PAM250_SEQ
 
 
-def align_multiple_from_pairwise(reference_sequence, sequences, 
algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, 
gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
+def central_star(sequences, algorithm='NW70', matrix='BLOSUM62', 
gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, 
end_gap_extend_penalty=0.0):
     """Align multiple protein sequences to one reference by fusing multiple 
pairwise alignments.
 
-    @param reference_sequence:          The first protein sequence as one 
letter codes to use as the reference.
-    @type reference_sequence:           str
-    @param sequences:                   The list of remaining protein 
sequences as one letter codes.
+    @param sequences:                   The list of residue sequences as one 
letter codes.
     @type sequences:                    list of str
     @keyword algorithm:                 The pairwise sequence alignment 
algorithm to use.
     @type algorithm:                    str
@@ -123,73 +121,3 @@
 
     # Return the results.
     return [ref] + strings, gap_list
-
-
-def align_pairwise(sequence1, sequence2, algorithm='NW70', 
matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, 
end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
-    """Align two protein sequences.
-
-    @param sequence1:                   The first protein sequence as one 
letter codes.
-    @type sequence1:                    str
-    @param sequence2:                   The second protein sequence as one 
letter codes.
-    @type sequence2:                    str
-    @keyword algorithm:                 The pairwise sequence alignment 
algorithm to use.
-    @type algorithm:                    str
-    @keyword matrix:                    The substitution matrix to use.
-    @type matrix:                       str
-    @keyword gap_open_penalty:          The penalty for introducing gaps, as 
a positive number.
-    @type gap_open_penalty:             float
-    @keyword gap_extend_penalty:        The penalty for extending a gap, as 
a positive number.
-    @type gap_extend_penalty:           float
-    @keyword end_gap_open_penalty:      The optional penalty for opening a 
gap at the end of a sequence.
-    @type end_gap_open_penalty:         float
-    @keyword end_gap_extend_penalty:    The optional penalty for extending a 
gap at the end of a sequence.
-    @type end_gap_extend_penalty:       float
-    @return:                            The two alignment strings and the 
gap matrix.
-    @rtype:                             str, str, numpy rank-2 int array
-    """
-
-    # Checks.
-    allowed_algor = ['NW70']
-    if algorithm not in allowed_algor:
-        raise RelaxError("The sequence alignment algorithm '%s' is unknown, 
it must be one of %s." % (algorithm, allowed_algor))
-    allowed_matrices = ['BLOSUM62', 'PAM250']
-    if matrix not in allowed_matrices:
-        raise RelaxError("The substitution matrix '%s' is unknown, it must 
be one of %s." % (matrix, allowed_matrices))
-
-    # Capitalise the sequences.
-    sequence1 = sequence1.upper()
-    sequence2 = sequence2.upper()
-
-    # Initial printout.
-    sys.stdout.write("\nPairwise protein alignment.\n")
-    sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix))
-    sys.stdout.write("%-30s %s\n" % ("Gap opening penalty:", 
gap_open_penalty))
-    sys.stdout.write("%-30s %s\n" % ("Gap extend penalty:", 
gap_extend_penalty))
-    sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1))
-    sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2))
-
-    # Select the substitution matrix.
-    if matrix == 'BLOSUM62':
-        sub_matrix = BLOSUM62
-        sub_seq = BLOSUM62_SEQ
-    elif matrix == 'PAM250':
-        sub_matrix = PAM250
-        sub_seq = PAM250_SEQ
-
-    # The alignment.
-    if algorithm == 'NW70':
-        align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2, 
sub_matrix=sub_matrix, sub_seq=sub_seq, gap_open_penalty=gap_open_penalty, 
gap_extend_penalty=gap_extend_penalty, 
end_gap_open_penalty=end_gap_open_penalty, 
end_gap_extend_penalty=end_gap_extend_penalty)
-
-    # Final printout.
-    sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1))
-    sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2))
-    sys.stdout.write("%-30s " % "")
-    for i in range(len(align1)):
-        if align1[i] == align2[i]:
-            sys.stdout.write("*")
-        else:
-            sys.stdout.write(" ")
-    sys.stdout.write("\n\n")
-
-    # Return the results.
-    return align1, align2, gaps
r27350 - in /trunk/lib/sequence_alignment: init.py align_protein.py msa.py

Header

Content

Related Messages