1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  """General sequence alignment functions.""" 
 24   
 25   
 26  import sys 
 27   
 28   
 29  from lib.errors import RelaxError 
 30  from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align 
 31  from lib.sequence_alignment.substitution_matrices import BLOSUM62, BLOSUM62_SEQ, PAM250, PAM250_SEQ 
 32   
 33   
 34 -def align_pairwise(sequence1, sequence2, algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0, verbosity=1): 
  35      """Align two protein sequences. 
 36   
 37      @param sequence1:                   The first protein sequence as one letter codes. 
 38      @type sequence1:                    str 
 39      @param sequence2:                   The second protein sequence as one letter codes. 
 40      @type sequence2:                    str 
 41      @keyword algorithm:                 The pairwise sequence alignment algorithm to use. 
 42      @type algorithm:                    str 
 43      @keyword matrix:                    The substitution matrix to use. 
 44      @type matrix:                       str 
 45      @keyword gap_open_penalty:          The penalty for introducing gaps, as a positive number. 
 46      @type gap_open_penalty:             float 
 47      @keyword gap_extend_penalty:        The penalty for extending a gap, as a positive number. 
 48      @type gap_extend_penalty:           float 
 49      @keyword end_gap_open_penalty:      The optional penalty for opening a gap at the end of a sequence. 
 50      @type end_gap_open_penalty:         float 
 51      @keyword end_gap_extend_penalty:    The optional penalty for extending a gap at the end of a sequence. 
 52      @type end_gap_extend_penalty:       float 
 53      @keyword verbosity:                 The level of verbosity.  Setting this to zero silences all printouts. 
 54      @type verbosity:                    int 
 55      @return:                            The alignment score, two alignment strings and the gap matrix. 
 56      @rtype:                             float, str, str, numpy rank-2 int array 
 57      """ 
 58   
 59       
 60      allowed_algor = ['NW70'] 
 61      if algorithm not in allowed_algor: 
 62          raise RelaxError("The sequence alignment algorithm '%s' is unknown, it must be one of %s." % (algorithm, allowed_algor)) 
 63      allowed_matrices = ['BLOSUM62', 'PAM250'] 
 64      if matrix not in allowed_matrices: 
 65          raise RelaxError("The substitution matrix '%s' is unknown, it must be one of %s." % (matrix, allowed_matrices)) 
 66   
 67       
 68      sequence1 = sequence1.upper() 
 69      sequence2 = sequence2.upper() 
 70   
 71       
 72      if verbosity: 
 73          sys.stdout.write("\nPairwise protein alignment.\n") 
 74          sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix)) 
 75          sys.stdout.write("%-30s %s\n" % ("Gap opening penalty:", gap_open_penalty)) 
 76          sys.stdout.write("%-30s %s\n" % ("Gap extend penalty:", gap_extend_penalty)) 
 77          sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1)) 
 78          sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2)) 
 79   
 80       
 81      if matrix == 'BLOSUM62': 
 82          sub_matrix = BLOSUM62 
 83          sub_seq = BLOSUM62_SEQ 
 84      elif matrix == 'PAM250': 
 85          sub_matrix = PAM250 
 86          sub_seq = PAM250_SEQ 
 87   
 88       
 89      if algorithm == 'NW70': 
 90          score, align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2, sub_matrix=sub_matrix, sub_seq=sub_seq, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) 
 91   
 92       
 93      if verbosity: 
 94          sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1)) 
 95          sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2)) 
 96          sys.stdout.write("%-30s " % "") 
 97          for i in range(len(align1)): 
 98              if align1[i] == align2[i]: 
 99                  sys.stdout.write("*") 
100              else: 
101                  sys.stdout.write(" ") 
102          sys.stdout.write("\n\n") 
103   
104       
105      return score, align1, align2, gaps 
 106