data_store.seq

32 """The structure for holding all sequence alignments.""" 33

34 - def __init__(self):

35 """Initialise some class variables.""" 36 37 # Execute the base class __init__() method. 38 super(Sequence_alignments, self).__init__() 39 40 # Some generic initial names. 41 self.list_name = 'sequence_alignments' 42 self.list_desc = 'List of all multiple sequence alignments'

43 44

45 - def add(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):

46 """Add a new sequence alignment. 47 48 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments. 49 @type object_ids: list of str 50 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments. 51 @type models: list of int 52 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments. 53 @type molecules: list of str 54 @keyword sequences: The list of residue sequences for the alignment as one letter codes. 55 @type sequences: list of str 56 @keyword strings: The list of alignment strings. 57 @type strings: list of str 58 @keyword gaps: The alignment gap matrix. 59 @type gaps: numpy rank-2 int array 60 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm. 61 @type msa_algorithm: str 62 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm. 63 @type pairwise_algorithm: str 64 @keyword matrix: The substitution matrix 65 @type matrix: str 66 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. 67 @type gap_open_penalty: float 68 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. 69 @type gap_extend_penalty: float 70 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. 71 @type end_gap_open_penalty: float 72 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. 73 @type end_gap_extend_penalty: float 74 """ 75 76 # Append a new element. 77 self.append(Alignment(object_ids=object_ids, molecules=molecules, models=models, sequences=sequences, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty))

78 79

80 - def find_alignment(self, object_ids=None, models=None, molecules=None, sequences=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):

81 """Find any pre-existing sequence alignment. 82 83 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments. 84 @type object_ids: list of str 85 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments. 86 @type models: list of int 87 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments. 88 @type molecules: list of str 89 @keyword sequences: The list of residue sequences for the alignment as one letter codes. 90 @type sequences: list of str 91 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm. 92 @type msa_algorithm: str 93 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm. 94 @type pairwise_algorithm: str 95 @keyword matrix: The substitution matrix 96 @type matrix: str 97 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. 98 @type gap_open_penalty: float 99 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. 100 @type gap_extend_penalty: float 101 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. 102 @type end_gap_open_penalty: float 103 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. 104 @type end_gap_extend_penalty: float 105 @return: Any matching sequence alignment object. 106 @rtype: Alignment instance 107 """ 108 109 # Loop over all current alignments. 110 for i in range(len(self)): 111 # Starting flag. 112 found = True 113 114 # Loop over the molecules. 115 for j in range(len(object_ids)): 116 # Check for any mismatches (breaking the loop for speed by avoiding unnecessary checks). 117 if self[i].object_ids[j] != object_ids[j]: 118 found = False 119 break 120 if self[i].models[j] != models[j]: 121 found = False 122 break 123 if self[i].molecules[j] != molecules[j]: 124 found = False 125 break 126 if self[i].sequences[j] != sequences[j]: 127 found = False 128 break 129 130 # No match (skip the rest of the checks for speed). 131 if not found: 132 continue 133 134 # Check the alignment settings for mismatches. 135 if msa_algorithm and self[i].msa_algorithm != msa_algorithm: 136 continue 137 if pairwise_algorithm and self[i].pairwise_algorithm != pairwise_algorithm: 138 continue 139 if matrix and self[i].matrix != matrix: 140 continue 141 if gap_open_penalty != None and self[i].gap_open_penalty != gap_open_penalty: 142 continue 143 if gap_extend_penalty != None and self[i].gap_extend_penalty != gap_extend_penalty: 144 continue 145 if end_gap_open_penalty != None and self[i].end_gap_open_penalty != end_gap_open_penalty: 146 continue 147 if end_gap_extend_penalty != None and self[i].end_gap_extend_penalty != end_gap_extend_penalty: 148 continue 149 150 # No mismatches, so this must be the alignment. 151 return self[i]

152 153

154 - def from_xml(self, sequence_alignments_node, file_version=1):

155 """Recreate the analyses data structure from the XML analyses node. 156 157 @param sequence_alignments_node: The sequence alignments XML node. 158 @type sequence_alignments_node: xml.dom.minicompat.Element instance 159 @keyword file_version: The relax XML version of the XML file. 160 @type file_version: int 161 """ 162 163 # Get all the alignment nodes. 164 align_nodes = sequence_alignments_node.getElementsByTagName('sequence_alignment') 165 166 # Loop over the nodes. 167 for node in align_nodes: 168 # Add a blank alignment container. 169 self.append(Alignment(object_ids=[])) 170 171 # Recreate the analysis container. 172 self[-1].from_xml(node, file_version=file_version)

177 """Container for an individual sequence alignment.""" 178

179 - def __init__(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):

180 """Set up the sequence alignment object. 181 182 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments. 183 @type object_ids: list of str 184 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments. 185 @type models: list of int 186 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments. 187 @type molecules: list of str 188 @keyword sequences: The list of residue sequences for the alignment as one letter codes. 189 @type sequences: list of str 190 @keyword strings: The list of alignment strings. 191 @type strings: list of str 192 @keyword gaps: The alignment gap matrix. 193 @type gaps: numpy rank-2 int array 194 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm. 195 @type msa_algorithm: str 196 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm. 197 @type pairwise_algorithm: str 198 @keyword matrix: The substitution matrix 199 @type matrix: str 200 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. 201 @type gap_open_penalty: float 202 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. 203 @type gap_extend_penalty: float 204 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. 205 @type end_gap_open_penalty: float 206 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. 207 @type end_gap_extend_penalty: float 208 """ 209 210 # Execute the base class __init__() method. 211 super(Alignment, self).__init__(name='sequence_alignment', desc='Multiple sequence alignment container.') 212 213 # Store the values. 214 self.object_ids = object_ids 215 self.models = models 216 self.molecules = molecules 217 self.sequences = sequences 218 self.strings = strings 219 self.gaps = gaps 220 self.msa_algorithm = msa_algorithm 221 self.pairwise_algorithm = pairwise_algorithm 222 self.matrix = matrix 223 self.gap_open_penalty = gap_open_penalty 224 self.gap_extend_penalty = gap_extend_penalty 225 self.end_gap_open_penalty = end_gap_open_penalty 226 self.end_gap_extend_penalty = end_gap_extend_penalty 227 228 # Create a unique ID for each molecule. 229 self.ids = [] 230 for i in range(len(self.object_ids)): 231 self.ids.append(generate_id(object_id=self.object_ids[i], model=self.models[i], molecule=self.molecules[i])) 232 233 # Check the IDs for uniqueness. 234 for i in range(len(self.ids)): 235 for j in range(len(self.ids)): 236 if i == j: 237 continue 238 if self.ids[i] == self.ids[j]: 239 raise RelaxError("The molecule ID '%s' is not unique." % self.ids[i])

Source Code for Module data_store.seq_align