Author: bugman Date: Wed Jan 28 18:24:15 2015 New Revision: 27344 URL: http://svn.gna.org/viewcvs/relax?rev=27344&view=rev Log: Created the Test_align_protein.test_align_multiple_from_pairwise unit test. This is in the _lib._sequence_alignment.test_align_protein unit test module. This test checks the operation of the lib.sequence_alignment.align_protein.align_multiple_from_pairwise() function, which does not yet exist. Modified: trunk/test_suite/unit_tests/_lib/_sequence_alignment/test_align_protein.py Modified: trunk/test_suite/unit_tests/_lib/_sequence_alignment/test_align_protein.py URL: http://svn.gna.org/viewcvs/relax/trunk/test_suite/unit_tests/_lib/_sequence_alignment/test_align_protein.py?rev=27344&r1=27343&r2=27344&view=diff ============================================================================== --- trunk/test_suite/unit_tests/_lib/_sequence_alignment/test_align_protein.py (original) +++ trunk/test_suite/unit_tests/_lib/_sequence_alignment/test_align_protein.py Wed Jan 28 18:24:15 2015 @@ -24,60 +24,59 @@ from unittest import TestCase # relax module imports. -from lib.sequence_alignment.align_protein import align_pairwise +from lib.sequence_alignment.align_protein import align_multiple_from_pairwise, align_pairwise class Test_align_protein(TestCase): """Unit tests for the lib.sequence_alignment.align_protein relax module.""" - def test_align_pairwise(self): - """Test the Needleman-Wunsch sequence alignment for two protein sequences. - - This uses the sequences: - - - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla', - - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'. - - From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be:: - - https://www.ebi.ac.uk/Tools/psa/emboss_needle/ - EMBOSS_001 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA - :|||:|||:|||:|| | | ||||||||||:|.. .|:|:||||||||:||:.|::|.:|:|.|| - EMBOSS_001 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL- - - http://web.expasy.org/cgi-bin/sim/sim.pl?prot - UserSeq1 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKL - UserSeq2 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL - *** *** *** ** * * ********** * * * ******** ** * * * * ** - """ - - # The sequences. - seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla' - seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl' - print(seq1) - print(seq2) - - # Perform the alignment. - align1, align2, gaps = align_pairwise(seq1, seq2, matrix='BLOSUM62', gap_open_penalty=5.0, gap_extend_penalty=1.0) - print(align1) - print(align2) - print(gaps) - - # Check the alignment. - self.assertEqual(align1, 'IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA') - self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-') - - # The gap matrix. - real_gaps = zeros((2, 68), int16) - real_gaps[0, 15] = 1 - real_gaps[0, 17] = 1 - real_gaps[0, 19] = 1 - real_gaps[0, 20] = 1 - real_gaps[0, 21] = 1 - real_gaps[0, 36] = 1 - real_gaps[1, 67] = 1 - for i in range(2): - for j in range(68): + def test_align_multiple_from_pairwise(self): + """Test the multiple protein sequence, fusing pairwise alignments via lib.sequence_alignment.align_protein.align_multiple_from_pairwise(). + + This uses the sequences: + + - 'TEEQVDADGGT', + - 'ADQLTEEQVDADGNGTIDFPEFLTMMARKM', + - 'LTEEQMINEVDAGNGTIDFPEFLTMMAR'. + + The result should be: + + - '----TEEQ----VDADG-GT--------------', + - 'ADQLTEEQ----VDADGNGTIDFPEFLTMMARKM', + - ' +---LTEEQMINEVDA-GNGTIDFPEFLTMMAR--'. + """ + + # The sequences. + seq1 = 'TEEQVDADGGT' + seq2 = 'ADQLTEEQVDADGNGTIDFPEFLTMMARKM' + seq3 = 'LTEEQMINEVDAGNGTIDFPEFLTMMAR' + print(seq1) + print(seq2) + print(seq3) + + # Perform the alignment. + strings, gaps = align_multiple_from_pairwise(seq1, [seq2, seq3], matrix='BLOSUM62', gap_open_penalty=5.0, gap_extend_penalty=1.0) + print(strings[0]) + print(strings[1]) + print(strings[2]) + print(gaps) + + # Check the alignment. + self.assertEqual(strings[0], '----TEEQ----VDADG-GT--------------') + self.assertEqual(strings[1], 'ADQLTEEQ----VDADGNGTIDFPEFLTMMARKM') + self.assertEqual(strings[2], '---LTEEQMINEVDA-GNGTIDFPEFLTMMAR--') + + # The gap matrix. + real_gaps = zeros((3, 34), int16) + for i in (range(4) + range(8, 12) + [17] + range(20, 34)): + real_gaps[0, i] = 1 + for i in range(8, 12): + real_gaps[1, i] = 1 + for i in (range(3) + [15, 33, 34]): + real_gaps[2, i] = 1 + for i in range(3): + for j in range(34): self.assertEqual(gaps[i, j], real_gaps[i][j]) @@ -132,3 +131,105 @@ for i in range(2): for j in range(68): self.assertEqual(gaps[i, j], real_gaps[i][j]) + def test_align_pairwise(self): + """Test the Needleman-Wunsch sequence alignment for two protein sequences. + + This uses the sequences: + + - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla', + - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'. + + From online servers, the results with a gap open penalty of 5 and gap extend of 1 should be:: + + https://www.ebi.ac.uk/Tools/psa/emboss_needle/ + EMBOSS_001 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA + :|||:|||:|||:|| | | ||||||||||:|.. .|:|:||||||||:||:.|::|.:|:|.|| + EMBOSS_001 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL- + + http://web.expasy.org/cgi-bin/sim/sim.pl?prot + UserSeq1 IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKL + UserSeq2 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL + *** *** *** ** * * ********** * * * ******** ** * * * * ** + """ + + # The sequences. + seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla' + seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl' + print(seq1) + print(seq2) + + # Perform the alignment. + align1, align2, gaps = align_pairwise(seq1, seq2, matrix='BLOSUM62', gap_open_penalty=5.0, gap_extend_penalty=1.0) + print(align1) + print(align2) + print(gaps) + + # Check the alignment. + self.assertEqual(align1, 'IHAAEEKDWKTAYSY-B-G---FYEAFEGYDSIDSP-KAITSLKYMLLCKIMLNTPEDVQALVSGKLA') + self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDEKVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-') + + # The gap matrix. + real_gaps = zeros((2, 68), int16) + real_gaps[0, 15] = 1 + real_gaps[0, 17] = 1 + real_gaps[0, 19] = 1 + real_gaps[0, 20] = 1 + real_gaps[0, 21] = 1 + real_gaps[0, 36] = 1 + real_gaps[1, 67] = 1 + for i in range(2): + for j in range(68): + self.assertEqual(gaps[i, j], real_gaps[i][j]) + + + def test_align_pairwise_PAM250(self): + """Test the Needleman-Wunsch sequence alignment for two protein sequences using the PAM250 substitution matrix. + + This uses the sequences: + + - 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla', + - 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl'. + + From online servers, the results with a gap open penalty of 5 and gap extend of 0.5 should be:: + + https://www.ebi.ac.uk/Tools/psa/emboss_needle/ + EMBOSS_001 IHAAEEKDWKTAYSYb--g---FYEAFEGYdsidspk--aitslkymllckimlntpedvqalvsgkla + :|||:|||.|||:||. | ||||||||||:|. | |:|:||||||||:||:.|::|::|:|:|| + EMBOSS_001 LHAADEKDFKTAFSYabiggapFYEAFEGYdsvde-kvsaltalkymllckvmldlpdevnsllsakl- + + http://web.expasy.org/cgi-bin/sim/sim.pl?prot + UserSeq1 IHAAEEKDWKTAYSYBG-----FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKL + UserSeq2 LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL + *** *** *** ** ********** * * * * ******** ** * * * * ** + """ + + # The sequences. + seq1 = 'IHAAEEKDWKTAYSYbgFYEAFEGYdsidspkaitslkymllckimlntpedvqalvsgkla' + seq2 = 'LHAADEKDFKTAFSYabiggapFYEAFEGYdsvdekvsaltalkymllckvmldlpdevnsllsakl' + print(seq1) + print(seq2) + + # Perform the alignment. + align1, align2, gaps = align_pairwise(seq1, seq2, matrix='PAM250', gap_open_penalty=5.0, gap_extend_penalty=0.5) + print(align1) + print(align2) + print(gaps) + + # Check the alignment. + self.assertEqual(align1, 'IHAAEEKDWKTAYSYB--G---FYEAFEGYDSIDSPK--AITSLKYMLLCKIMLNTPEDVQALVSGKLA') + self.assertEqual(align2, 'LHAADEKDFKTAFSYABIGGAPFYEAFEGYDSVDE-KVSALTALKYMLLCKVMLDLPDEVNSLLSAKL-') + + # The gap matrix. + real_gaps = zeros((2, 69), int16) + real_gaps[0, 16] = 1 + real_gaps[0, 17] = 1 + real_gaps[0, 19] = 1 + real_gaps[0, 20] = 1 + real_gaps[0, 21] = 1 + real_gaps[0, 37] = 1 + real_gaps[0, 38] = 1 + real_gaps[1, 35] = 1 + real_gaps[1, 68] = 1 + for i in range(2): + for j in range(68): + self.assertEqual(gaps[i, j], real_gaps[i][j])