Author: bugman Date: Thu Jan 29 13:34:38 2015 New Revision: 27349 URL: http://svn.gna.org/viewcvs/relax?rev=27349&view=rev Log: Added the lib.sequence_alignment.align_protein.align_multiple_from_pairwise() function. This should have been committed earlier. The function is only partly implemented. Modified: trunk/lib/sequence_alignment/align_protein.py Modified: trunk/lib/sequence_alignment/align_protein.py URL: http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/align_protein.py?rev=27349&r1=27348&r2=27349&view=diff ============================================================================== --- trunk/lib/sequence_alignment/align_protein.py (original) +++ trunk/lib/sequence_alignment/align_protein.py Thu Jan 29 13:34:38 2015 @@ -23,12 +23,106 @@ """General sequence alignment functions.""" # Python module imports. +from numpy import int16, zeros import sys # relax module imports. from lib.errors import RelaxError from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align from lib.sequence_alignment.substitution_matrices import BLOSUM62, BLOSUM62_SEQ, PAM250, PAM250_SEQ + + +def align_multiple_from_pairwise(reference_sequence, sequences, algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0): + """Align multiple protein sequences to one reference by fusing multiple pairwise alignments. + + @param reference_sequence: The first protein sequence as one letter codes to use as the reference. + @type reference_sequence: str + @param sequences: The list of remaining protein sequences as one letter codes. + @type sequences: list of str + @keyword algorithm: The pairwise sequence alignment algorithm to use. + @type algorithm: str + @keyword matrix: The substitution matrix to use. + @type matrix: str + @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. + @type gap_open_penalty: float + @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. + @type gap_extend_penalty: float + @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. + @type end_gap_open_penalty: float + @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. + @type end_gap_extend_penalty: float + @return: The two alignment strings and the gap matrix. + @rtype: str, str, numpy rank-2 int array + """ + + # The pairwise alignments. + N = len(sequences) + align1_list = [] + align2_list = [] + gap_list = [] + for i in range(N): + # Pairwise alignment. + align1, align2, gaps = align_pairwise(reference_sequence, sequences[i], algorithm=algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) + + # Store the results. + align1_list.append(align1) + align2_list.append(align2) + gap_list.append(gaps) + + # Convert all sequence strings to lists. + ref_list = list(reference_sequence) + seq_list = [] + for i in range(N): + seq_list.append(list(sequences[i])) + + # Loop over the alignment elements. + strings = [] + index = -1 + offsets = zeros(N, int16) + while 1: + # Increment the index. + index += 1 + print "\nIndex %i" % index + + # Termination condition. + term = True + for i in range(N): + if index + offsets[i] < len(gap_list[i][0]): + term = False + else: + print " At end in %i" % i + if term: + break + + # A gap in one of the references. + gap = False + for i in range(N): + if index + offsets[i] >= len(gap_list[i][0]) or gap_list[i][0, index]: + print " Gap in %i" % i + offsets[i] += 1 + gap = True + print " New offsets %s" % offsets + + # No reference gap. + if not gap: + print " No ref gap." + continue + + # Insert the gap into the reference list. + print " Inserting gap into ref list at %i" % index + ref_list.insert(index, '-') + + for i in range(N): + seq = ''.join(seq_list[i]) + strings.append(seq) + + ref = ''.join(ref_list) + + print ref_list + print seq_list + + # Return the results. + return [ref] + strings, gap_list def align_pairwise(sequence1, sequence2, algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):