Package lib :: Package sequence_alignment :: Module align_protein
[hide private]
[frames] | no frames]

Source Code for Module lib.sequence_alignment.align_protein

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2015 Edward d'Auvergne                                        # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """General sequence alignment functions.""" 
 24   
 25  # Python module imports. 
 26  import sys 
 27   
 28  # relax module imports. 
 29  from lib.errors import RelaxError 
 30  from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align 
 31  from lib.sequence_alignment.substitution_matrices import BLOSUM62, BLOSUM62_SEQ, PAM250, PAM250_SEQ 
 32   
 33   
34 -def align_pairwise(sequence1, sequence2, algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0, verbosity=1):
35 """Align two protein sequences. 36 37 @param sequence1: The first protein sequence as one letter codes. 38 @type sequence1: str 39 @param sequence2: The second protein sequence as one letter codes. 40 @type sequence2: str 41 @keyword algorithm: The pairwise sequence alignment algorithm to use. 42 @type algorithm: str 43 @keyword matrix: The substitution matrix to use. 44 @type matrix: str 45 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. 46 @type gap_open_penalty: float 47 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. 48 @type gap_extend_penalty: float 49 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. 50 @type end_gap_open_penalty: float 51 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. 52 @type end_gap_extend_penalty: float 53 @keyword verbosity: The level of verbosity. Setting this to zero silences all printouts. 54 @type verbosity: int 55 @return: The alignment score, two alignment strings and the gap matrix. 56 @rtype: float, str, str, numpy rank-2 int array 57 """ 58 59 # Checks. 60 allowed_algor = ['NW70'] 61 if algorithm not in allowed_algor: 62 raise RelaxError("The sequence alignment algorithm '%s' is unknown, it must be one of %s." % (algorithm, allowed_algor)) 63 allowed_matrices = ['BLOSUM62', 'PAM250'] 64 if matrix not in allowed_matrices: 65 raise RelaxError("The substitution matrix '%s' is unknown, it must be one of %s." % (matrix, allowed_matrices)) 66 67 # Capitalise the sequences. 68 sequence1 = sequence1.upper() 69 sequence2 = sequence2.upper() 70 71 # Initial printout. 72 if verbosity: 73 sys.stdout.write("\nPairwise protein alignment.\n") 74 sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix)) 75 sys.stdout.write("%-30s %s\n" % ("Gap opening penalty:", gap_open_penalty)) 76 sys.stdout.write("%-30s %s\n" % ("Gap extend penalty:", gap_extend_penalty)) 77 sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1)) 78 sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2)) 79 80 # Select the substitution matrix. 81 if matrix == 'BLOSUM62': 82 sub_matrix = BLOSUM62 83 sub_seq = BLOSUM62_SEQ 84 elif matrix == 'PAM250': 85 sub_matrix = PAM250 86 sub_seq = PAM250_SEQ 87 88 # The alignment. 89 if algorithm == 'NW70': 90 score, align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2, sub_matrix=sub_matrix, sub_seq=sub_seq, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) 91 92 # Final printout. 93 if verbosity: 94 sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1)) 95 sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2)) 96 sys.stdout.write("%-30s " % "") 97 for i in range(len(align1)): 98 if align1[i] == align2[i]: 99 sys.stdout.write("*") 100 else: 101 sys.stdout.write(" ") 102 sys.stdout.write("\n\n") 103 104 # Return the results. 105 return score, align1, align2, gaps
106