1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 from numpy import int16, zeros
24 from unittest import TestCase
25
26
27 from lib.sequence_alignment.align_protein import align_pairwise
28
29
31 """Unit tests for the lib.sequence_alignment.align_protein relax module."""
32
34 """Test the Needleman-Wunsch sequence alignment for two protein sequences.
35
36 This uses the sequences:
37
38 - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla',
39 - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'.
40
41 From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be::
42
43 https://www.ebi.ac.uk/Tools/psa/emboss_needle/
44 EMBOSS_001 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA
45 :|||:|||:|||:|| | | ||||||||||:|.. .|:|:||||||||:||:.|::|.:|:|.||
46 EMBOSS_001 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-
47
48 http://web.expasy.org/cgi-bin/sim/sim.pl?prot
49 UserSeq1 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKL
50 UserSeq2 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL
51 *** *** *** ** * * ********** * * * ******** ** * * * * **
52 """
53
54
55 seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla'
56 seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'
57 print(seq1)
58 print(seq2)
59
60
61 score, align1, align2, gaps = align_pairwise(seq1, seq2, matrix='BLOSUM62', gap_open_penalty=5.0, gap_extend_penalty=1.0)
62 print(score)
63 print(align1)
64 print(align2)
65 print(gaps)
66
67
68 self.assertEqual(align1, 'IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA')
69 self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-')
70
71
72 real_gaps = zeros((2, 68), int16)
73 real_gaps[0, 15] = 1
74 real_gaps[0, 17] = 1
75 real_gaps[0, 19] = 1
76 real_gaps[0, 20] = 1
77 real_gaps[0, 21] = 1
78 real_gaps[0, 36] = 1
79 real_gaps[1, 67] = 1
80 for i in range(2):
81 for j in range(68):
82 self.assertEqual(gaps[i, j], real_gaps[i][j])
83
84
86 """Test the Needleman-Wunsch sequence alignment for two protein sequences using the PAM250 substitution matrix.
87
88 This uses the sequences:
89
90 - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla',
91 - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'.
92
93 From online servers, the results with a gap open penalty of 5 and gap extend of 0.5 should be::
94
95 https://www.ebi.ac.uk/Tools/psa/emboss_needle/
96 EMBOSS_001 IHAAEEKDWKTAYSYb--g---FYEAFEGYdsidspk--aitslkymllckimlntpedvqalvsgkla
97 :|||:|||.|||:||. | ||||||||||:|. | |:|:||||||||:||:.|::|::|:|:||
98 EMBOSS_001 LHAADEKDFKTAFSYabiggapFYEAFEGYdsvde-kvsaltalkymllckvmldlpdevnsllsakl-
99
100 http://web.expasy.org/cgi-bin/sim/sim.pl?prot
101 UserSeq1 IHAAEEKDWKTAYSYBG-----FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKL
102 UserSeq2 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL
103 *** *** *** ** ********** * * * * ******** ** * * * * **
104 """
105
106
107 seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla'
108 seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'
109 print(seq1)
110 print(seq2)
111
112
113 score, align1, align2, gaps = align_pairwise(seq1, seq2, matrix='PAM250', gap_open_penalty=5.0, gap_extend_penalty=0.5)
114 print(score)
115 print(align1)
116 print(align2)
117 print(gaps)
118
119
120 self.assertEqual(align1, 'IHAAEEKDWKTAYSYB--G---FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKLA')
121 self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-')
122
123
124 real_gaps = zeros((2, 69), int16)
125 real_gaps[0, 16] = 1
126 real_gaps[0, 17] = 1
127 real_gaps[0, 19] = 1
128 real_gaps[0, 20] = 1
129 real_gaps[0, 21] = 1
130 real_gaps[0, 37] = 1
131 real_gaps[0, 38] = 1
132 real_gaps[1, 35] = 1
133 real_gaps[1, 68] = 1
134 for i in range(2):
135 for j in range(68):
136 self.assertEqual(gaps[i, j], real_gaps[i][j])
137