1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Sequence alignment data structures."""
24
25
26 from data_store.data_classes import Element, RelaxListType
27 from lib.errors import RelaxError
28 from lib.structure.internal.coordinates import generate_id
29
30
32 """The structure for holding all sequence alignments."""
33
35 """Initialise some class variables."""
36
37
38 super(Sequence_alignments, self).__init__()
39
40
41 self.list_name = 'sequence_alignments'
42 self.list_desc = 'List of all multiple sequence alignments'
43
44
45 - def add(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):
46 """Add a new sequence alignment.
47
48 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments.
49 @type object_ids: list of str
50 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments.
51 @type models: list of int
52 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments.
53 @type molecules: list of str
54 @keyword sequences: The list of residue sequences for the alignment as one letter codes.
55 @type sequences: list of str
56 @keyword strings: The list of alignment strings.
57 @type strings: list of str
58 @keyword gaps: The alignment gap matrix.
59 @type gaps: numpy rank-2 int array
60 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm.
61 @type msa_algorithm: str
62 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm.
63 @type pairwise_algorithm: str
64 @keyword matrix: The substitution matrix
65 @type matrix: str
66 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number.
67 @type gap_open_penalty: float
68 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number.
69 @type gap_extend_penalty: float
70 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence.
71 @type end_gap_open_penalty: float
72 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence.
73 @type end_gap_extend_penalty: float
74 """
75
76
77 self.append(Alignment(object_ids=object_ids, molecules=molecules, models=models, sequences=sequences, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty))
78
79
80 - def find_alignment(self, object_ids=None, models=None, molecules=None, sequences=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):
81 """Find any pre-existing sequence alignment.
82
83 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments.
84 @type object_ids: list of str
85 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments.
86 @type models: list of int
87 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments.
88 @type molecules: list of str
89 @keyword sequences: The list of residue sequences for the alignment as one letter codes.
90 @type sequences: list of str
91 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm.
92 @type msa_algorithm: str
93 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm.
94 @type pairwise_algorithm: str
95 @keyword matrix: The substitution matrix
96 @type matrix: str
97 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number.
98 @type gap_open_penalty: float
99 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number.
100 @type gap_extend_penalty: float
101 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence.
102 @type end_gap_open_penalty: float
103 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence.
104 @type end_gap_extend_penalty: float
105 @return: Any matching sequence alignment object.
106 @rtype: Alignment instance
107 """
108
109
110 for i in range(len(self)):
111
112 found = True
113
114
115 for j in range(len(object_ids)):
116
117 if self[i].object_ids[j] != object_ids[j]:
118 found = False
119 break
120 if self[i].models[j] != models[j]:
121 found = False
122 break
123 if self[i].molecules[j] != molecules[j]:
124 found = False
125 break
126 if self[i].sequences[j] != sequences[j]:
127 found = False
128 break
129
130
131 if not found:
132 continue
133
134
135 if msa_algorithm and self[i].msa_algorithm != msa_algorithm:
136 continue
137 if pairwise_algorithm and self[i].pairwise_algorithm != pairwise_algorithm:
138 continue
139 if matrix and self[i].matrix != matrix:
140 continue
141 if gap_open_penalty != None and self[i].gap_open_penalty != gap_open_penalty:
142 continue
143 if gap_extend_penalty != None and self[i].gap_extend_penalty != gap_extend_penalty:
144 continue
145 if end_gap_open_penalty != None and self[i].end_gap_open_penalty != end_gap_open_penalty:
146 continue
147 if end_gap_extend_penalty != None and self[i].end_gap_extend_penalty != end_gap_extend_penalty:
148 continue
149
150
151 return self[i]
152
153
154 - def from_xml(self, sequence_alignments_node, file_version=1):
155 """Recreate the analyses data structure from the XML analyses node.
156
157 @param sequence_alignments_node: The sequence alignments XML node.
158 @type sequence_alignments_node: xml.dom.minicompat.Element instance
159 @keyword file_version: The relax XML version of the XML file.
160 @type file_version: int
161 """
162
163
164 align_nodes = sequence_alignments_node.getElementsByTagName('sequence_alignment')
165
166
167 for node in align_nodes:
168
169 self.append(Alignment(object_ids=[]))
170
171
172 self[-1].from_xml(node, file_version=file_version)
173
174
175
177 """Container for an individual sequence alignment."""
178
179 - def __init__(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None):
180 """Set up the sequence alignment object.
181
182 @keyword object_ids: The list of IDs for each structural object in the alignment. In most cases this will be the data pipe name. This will be used to retrieve alignments.
183 @type object_ids: list of str
184 @keyword models: The list of model numbers used in the alignment. This will be used to retrieve alignments.
185 @type models: list of int
186 @keyword molecules: The list of molecules used in the alignment. This will be used to retrieve alignments.
187 @type molecules: list of str
188 @keyword sequences: The list of residue sequences for the alignment as one letter codes.
189 @type sequences: list of str
190 @keyword strings: The list of alignment strings.
191 @type strings: list of str
192 @keyword gaps: The alignment gap matrix.
193 @type gaps: numpy rank-2 int array
194 @keyword msa_algorithm: The global multiple sequence alignment (MSA) algorithm.
195 @type msa_algorithm: str
196 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm.
197 @type pairwise_algorithm: str
198 @keyword matrix: The substitution matrix
199 @type matrix: str
200 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number.
201 @type gap_open_penalty: float
202 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number.
203 @type gap_extend_penalty: float
204 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence.
205 @type end_gap_open_penalty: float
206 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence.
207 @type end_gap_extend_penalty: float
208 """
209
210
211 super(Alignment, self).__init__(name='sequence_alignment', desc='Multiple sequence alignment container.')
212
213
214 self.object_ids = object_ids
215 self.models = models
216 self.molecules = molecules
217 self.sequences = sequences
218 self.strings = strings
219 self.gaps = gaps
220 self.msa_algorithm = msa_algorithm
221 self.pairwise_algorithm = pairwise_algorithm
222 self.matrix = matrix
223 self.gap_open_penalty = gap_open_penalty
224 self.gap_extend_penalty = gap_extend_penalty
225 self.end_gap_open_penalty = end_gap_open_penalty
226 self.end_gap_extend_penalty = end_gap_extend_penalty
227
228
229 self.ids = []
230 for i in range(len(self.object_ids)):
231 self.ids.append(generate_id(object_id=self.object_ids[i], model=self.models[i], molecule=self.molecules[i]))
232
233
234 for i in range(len(self.ids)):
235 for j in range(len(self.ids)):
236 if i == j:
237 continue
238 if self.ids[i] == self.ids[j]:
239 raise RelaxError("The molecule ID '%s' is not unique." % self.ids[i])
240