Author: bugman Date: Sat Jan 31 17:56:51 2015 New Revision: 27433 URL: http://svn.gna.org/viewcvs/relax?rev=27433&view=rev Log: Creation of the lib.sequence_alignment.msa.msa_general() function. This consists of code from the structure.sequence_alignment user function backend function pipe_control.structure.main.sequence_alignment() for selecting between the different sequence alignment methods. Modified: trunk/lib/sequence_alignment/msa.py trunk/pipe_control/structure/main.py Modified: trunk/lib/sequence_alignment/msa.py URL: http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/msa.py?rev=27433&r1=27432&r2=27433&view=diff ============================================================================== --- trunk/lib/sequence_alignment/msa.py (original) +++ trunk/lib/sequence_alignment/msa.py Sat Jan 31 17:56:51 2015 @@ -27,6 +27,7 @@ import sys # relax module imports. +from lib.errors import RelaxError from lib.sequence_alignment.align_protein import align_pairwise @@ -167,6 +168,68 @@ return strings, gaps +def msa_general(sequences, residue_numbers=None, msa_algorithm='Central Star', pairwise_algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0): + """General interface for multiple sequence alignments (MSA). + + This can be used to select between the following MSA algorithms: + + - 'Central Star', to use the central_star() function. + - 'residue number', to use the msa_residue_numbers() function. + + + @param sequences: The list of residue sequences as one letter codes. + @type sequences: list of str + @keyword residue_numbers: The list of residue numbers for each sequence. + @type residue_numbers: list of list of int + @keyword msa_algorithm: The multiple sequence alignment (MSA) algorithm to use. + @type msa_algorithm: str + @keyword pairwise_algorithm: The pairwise sequence alignment algorithm to use. + @type pairwise_algorithm: str + @keyword matrix: The substitution matrix to use. + @type matrix: str + @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. + @type gap_open_penalty: float + @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. + @type gap_extend_penalty: float + @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. + @type end_gap_open_penalty: float + @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. + @type end_gap_extend_penalty: float + @return: The list of alignment strings and the gap matrix. + @rtype: list of str, numpy rank-2 int array + """ + + # Check the MSA algorithm. + allowed_msa = ['Central Star', 'residue number'] + if msa_algorithm not in allowed_msa: + raise RelaxError("The MSA algorithm must be one of %s." % allowed_msa) + + # Check the penalty arguments. + if gap_open_penalty != None: + if gap_open_penalty < 0.0: + raise RelaxError("The gap opening penalty %s must be a positive number." % gap_open_penalty) + if gap_extend_penalty != None: + if gap_extend_penalty < 0.0: + raise RelaxError("The gap extension penalty %s must be a positive number." % gap_extend_penalty) + if end_gap_open_penalty != None: + if end_gap_open_penalty < 0.0: + raise RelaxError("The end gap opening penalty %s must be a positive number." % end_gap_open_penalty) + if end_gap_extend_penalty != None: + if end_gap_extend_penalty < 0.0: + raise RelaxError("The end gap extension penalty %s must be a positive number." % end_gap_extend_penalty) + + # Use the central star multiple alignment algorithm. + if msa_algorithm == 'Central Star': + strings, gaps = central_star(sequences, algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) + + # Alignment by residue number. + elif msa_algorithm == 'residue number': + strings, gaps = msa_residue_numbers(sequences, residue_numbers=residue_numbers) + + # Return the alignment strings and gap matrix. + return strings, gaps + + def msa_residue_numbers(sequences, residue_numbers=None): """Align multiple sequences based on the residue numbering. Modified: trunk/pipe_control/structure/main.py URL: http://svn.gna.org/viewcvs/relax/trunk/pipe_control/structure/main.py?rev=27433&r1=27432&r2=27433&view=diff ============================================================================== --- trunk/pipe_control/structure/main.py (original) +++ trunk/pipe_control/structure/main.py Sat Jan 31 17:56:51 2015 @@ -38,7 +38,7 @@ from lib.plotting.api import correlation_matrix from lib.selection import tokenise from lib.sequence import write_spin_data -from lib.sequence_alignment.msa import central_star, msa_residue_numbers, msa_residue_skipping +from lib.sequence_alignment.msa import msa_general, msa_residue_numbers, msa_residue_skipping from lib.structure.internal.coordinates import assemble_atomic_coordinates, assemble_coord_array, loop_coord_structures from lib.structure.internal.displacements import Displacements from lib.structure.internal.object import Internal @@ -1235,30 +1235,22 @@ @type end_gap_extend_penalty: float """ - # Check the penalty arguments. - if gap_open_penalty != None: - if gap_open_penalty < 0.0: - raise RelaxError("The gap opening penalty %s must be a positive number." % gap_open_penalty) - if gap_extend_penalty != None: - if gap_extend_penalty < 0.0: - raise RelaxError("The gap extension penalty %s must be a positive number." % gap_extend_penalty) - if end_gap_open_penalty != None: - if end_gap_open_penalty < 0.0: - raise RelaxError("The end gap opening penalty %s must be a positive number." % end_gap_open_penalty) - if end_gap_extend_penalty != None: - if end_gap_extend_penalty < 0.0: - raise RelaxError("The end gap extension penalty %s must be a positive number." % end_gap_extend_penalty) - # Assemble the structural objects. objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules) # Assemble the atomic coordinates of all molecules. ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models) + # Convert the residue number data structure. + res_num_list = [] + for mol_index in range(num_mols): + res_num_list.append([]) + for i in range(len(one_letter_codes[mol_index])): + key = res_nums[mol_index][i].keys()[0] + res_num_list[mol_index].append(res_nums[mol_index][i][key]) + # MSA. - if msa_algorithm == 'Central Star': - # Use the central star multiple alignment algorithm. - strings, gaps = central_star(one_letter_codes, algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) + strings, gaps = msa_general(one_letter_codes, residue_numbers=res_num_list, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) # Set up the data store object. if not hasattr(ds, 'sequence_alignments'):