Author: bugman Date: Sat Jan 31 12:21:57 2015 New Revision: 27420 URL: http://svn.gna.org/viewcvs/relax?rev=27420&view=rev Log: Implemented the multiple sequence alignment method based on residue numbers. This is the new msa_residue_numbers() function in the lib.sequence_alignment.msa module. The logic is rather basic in that the alignment is based on a residue number range from the lowest residue number to the highest - i.e. it does not take into account gaps in common between all input sequences. Modified: trunk/lib/sequence_alignment/msa.py Modified: trunk/lib/sequence_alignment/msa.py URL: http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/msa.py?rev=27420&r1=27419&r2=27420&view=diff ============================================================================== --- trunk/lib/sequence_alignment/msa.py (original) +++ trunk/lib/sequence_alignment/msa.py Sat Jan 31 12:21:57 2015 @@ -165,3 +165,66 @@ # Return the results. return strings, gaps + + +def msa_residue_numbers(sequences, residue_numbers=None): + """Align multiple sequences based on the residue numbering. + + @param sequences: The list of residue sequences as one letter codes. + @type sequences: list of str + @keyword residue_numbers: The list of residue numbers for each sequence. + @type residue_numbers: list of list of int + @return: The list of alignment strings and the gap matrix. + @rtype: list of str, numpy rank-2 int array + """ + + # Initialise. + N = len(sequences) + strings = [] + for i in range(N): + strings.append('') + + # Printout. + sys.stdout.write("\nResidue number based multiple sequence alignment.\n\n") + sys.stdout.write("Initial sequences:\n") + for i in range(N): + sys.stdout.write("%3i %s\n" % (i+1, sequences[i])) + + # The maximum and minimum residue numbers. + res_min = 1e100 + res_max = -1e100 + for i in range(N): + if min(residue_numbers[i]) < res_min: + res_min = min(residue_numbers[i]) + if max(residue_numbers[i]) > res_max: + res_max = max(residue_numbers[i]) + + # The total number of residues. + M = res_max - res_min + 1 + + # Loop over the molecules and residues and determine if the residue is present. + for i in range(N): + for res_num in range(res_min, res_max+1): + # The residue is present. + if res_num in residue_numbers[i]: + index = residue_numbers[i].index(res_num) + strings[i] += sequences[i][index] + + # A gap. + else: + strings[i] += '-' + + # Create the gap matrix. + gaps = zeros((N, M), int16) + for i in range(N): + for j in range(M): + if strings[i][j] == '-': + gaps[i, j] = 1 + + # Final printout. + sys.stdout.write("\nFinal MSA:\n") + for i in range(N): + sys.stdout.write("%3i %s\n" % (i+1, strings[i])) + + # Return the results. + return strings, gaps