1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  from numpy import int16, zeros 
 24  from unittest import TestCase 
 25   
 26   
 27  from lib.sequence_alignment.align_protein import align_pairwise 
 28   
 29   
 31      """Unit tests for the lib.sequence_alignment.align_protein relax module.""" 
 32   
 34          """Test the Needleman-Wunsch sequence alignment for two protein sequences. 
 35   
 36          This uses the sequences: 
 37   
 38              - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla', 
 39              - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'. 
 40   
 41          From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be:: 
 42   
 43              https://www.ebi.ac.uk/Tools/psa/emboss_needle/ 
 44              EMBOSS_001           IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA 
 45                                   :|||:|||:|||:|| | |   ||||||||||:|.. .|:|:||||||||:||:.|::|.:|:|.|| 
 46              EMBOSS_001           LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL- 
 47   
 48              http://web.expasy.org/cgi-bin/sim/sim.pl?prot 
 49              UserSeq1             IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKL 
 50              UserSeq2             LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL 
 51                                    *** *** *** ** * *   ********** *    * * ******** **  *  *  * * ** 
 52          """ 
 53   
 54           
 55          seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla' 
 56          seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl' 
 57          print(seq1) 
 58          print(seq2) 
 59   
 60           
 61          score, align1, align2, gaps = align_pairwise(seq1, seq2, matrix='BLOSUM62', gap_open_penalty=5.0, gap_extend_penalty=1.0) 
 62          print(score) 
 63          print(align1) 
 64          print(align2) 
 65          print(gaps) 
 66   
 67           
 68          self.assertEqual(align1, 'IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA') 
 69          self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-') 
 70   
 71           
 72          real_gaps = zeros((2, 68), int16) 
 73          real_gaps[0, 15] = 1 
 74          real_gaps[0, 17] = 1 
 75          real_gaps[0, 19] = 1 
 76          real_gaps[0, 20] = 1 
 77          real_gaps[0, 21] = 1 
 78          real_gaps[0, 36] = 1 
 79          real_gaps[1, 67] = 1 
 80          for i in range(2): 
 81              for j in range(68): 
 82                  self.assertEqual(gaps[i, j], real_gaps[i][j]) 
  83   
 84   
 86          """Test the Needleman-Wunsch sequence alignment for two protein sequences using the PAM250 substitution matrix. 
 87   
 88          This uses the sequences: 
 89   
 90              - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla', 
 91              - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'. 
 92   
 93          From online servers, the results with a gap open penalty of 5 and gap extend of 0.5 should be:: 
 94   
 95              https://www.ebi.ac.uk/Tools/psa/emboss_needle/ 
 96              EMBOSS_001          IHAAEEKDWKTAYSYb--g---FYEAFEGYdsidspk--aitslkymllckimlntpedvqalvsgkla 
 97                                  :|||:|||.|||:||.  |   ||||||||||:|. |  |:|:||||||||:||:.|::|::|:|:||  
 98              EMBOSS_001          LHAADEKDFKTAFSYabiggapFYEAFEGYdsvde-kvsaltalkymllckvmldlpdevnsllsakl- 
 99   
100              http://web.expasy.org/cgi-bin/sim/sim.pl?prot 
101              UserSeq1            IHAAEEKDWKTAYSYBG-----FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKL 
102              UserSeq2            LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL 
103                                   *** *** *** **       ********** *  *  * * ******** **  *  *  * * ** 
104          """ 
105   
106           
107          seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla' 
108          seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl' 
109          print(seq1) 
110          print(seq2) 
111   
112           
113          score, align1, align2, gaps = align_pairwise(seq1, seq2, matrix='PAM250', gap_open_penalty=5.0, gap_extend_penalty=0.5) 
114          print(score) 
115          print(align1) 
116          print(align2) 
117          print(gaps) 
118   
119           
120          self.assertEqual(align1, 'IHAAEEKDWKTAYSYB--G---FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKLA') 
121          self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-') 
122   
123           
124          real_gaps = zeros((2, 69), int16) 
125          real_gaps[0, 16] = 1 
126          real_gaps[0, 17] = 1 
127          real_gaps[0, 19] = 1 
128          real_gaps[0, 20] = 1 
129          real_gaps[0, 21] = 1 
130          real_gaps[0, 37] = 1 
131          real_gaps[0, 38] = 1 
132          real_gaps[1, 35] = 1 
133          real_gaps[1, 68] = 1 
134          for i in range(2): 
135              for j in range(68): 
136                  self.assertEqual(gaps[i, j], real_gaps[i][j]) 
  137