1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  """Sequence alignment data structures.""" 
 24   
 25   
 26  from data_store.data_classes import Element, RelaxListType 
 27  from lib.errors import RelaxError 
 28  from lib.structure.internal.coordinates import generate_id 
 29   
 30   
 32      """The structure for holding all sequence alignments.""" 
 33   
 35          """Initialise some class variables.""" 
 36   
 37           
 38          super(Sequence_alignments, self).__init__() 
 39   
 40           
 41          self.list_name = 'sequence_alignments' 
 42          self.list_desc = 'List of all multiple sequence alignments' 
  43   
 44   
 45 -    def add(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None): 
  46          """Add a new sequence alignment. 
 47   
 48          @keyword object_ids:                The list of IDs for each structural object in the alignment.  In most cases this will be the data pipe name.  This will be used to retrieve alignments. 
 49          @type object_ids:                   list of str 
 50          @keyword models:                    The list of model numbers used in the alignment.  This will be used to retrieve alignments. 
 51          @type models:                       list of int 
 52          @keyword molecules:                 The list of molecules used in the alignment.  This will be used to retrieve alignments. 
 53          @type molecules:                    list of str 
 54          @keyword sequences:                 The list of residue sequences for the alignment as one letter codes. 
 55          @type sequences:                    list of str 
 56          @keyword strings:                   The list of alignment strings. 
 57          @type strings:                      list of str 
 58          @keyword gaps:                      The alignment gap matrix. 
 59          @type gaps:                         numpy rank-2 int array 
 60          @keyword msa_algorithm:             The global multiple sequence alignment (MSA) algorithm. 
 61          @type msa_algorithm:                str 
 62          @keyword pairwise_algorithm:        The pairwise sequence alignment algorithm. 
 63          @type pairwise_algorithm:           str 
 64          @keyword matrix:                    The substitution matrix 
 65          @type matrix:                       str 
 66          @keyword gap_open_penalty:          The penalty for introducing gaps, as a positive number. 
 67          @type gap_open_penalty:             float 
 68          @keyword gap_extend_penalty:        The penalty for extending a gap, as a positive number. 
 69          @type gap_extend_penalty:           float 
 70          @keyword end_gap_open_penalty:      The optional penalty for opening a gap at the end of a sequence. 
 71          @type end_gap_open_penalty:         float 
 72          @keyword end_gap_extend_penalty:    The optional penalty for extending a gap at the end of a sequence. 
 73          @type end_gap_extend_penalty:       float 
 74          """ 
 75   
 76           
 77          self.append(Alignment(object_ids=object_ids, molecules=molecules, models=models, sequences=sequences, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)) 
  78   
 79   
 80 -    def find_alignment(self, object_ids=None, models=None, molecules=None, sequences=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None): 
  81          """Find any pre-existing sequence alignment. 
 82   
 83          @keyword object_ids:                The list of IDs for each structural object in the alignment.  In most cases this will be the data pipe name.  This will be used to retrieve alignments. 
 84          @type object_ids:                   list of str 
 85          @keyword models:                    The list of model numbers used in the alignment.  This will be used to retrieve alignments. 
 86          @type models:                       list of int 
 87          @keyword molecules:                 The list of molecules used in the alignment.  This will be used to retrieve alignments. 
 88          @type molecules:                    list of str 
 89          @keyword sequences:                 The list of residue sequences for the alignment as one letter codes. 
 90          @type sequences:                    list of str 
 91          @keyword msa_algorithm:             The global multiple sequence alignment (MSA) algorithm. 
 92          @type msa_algorithm:                str 
 93          @keyword pairwise_algorithm:        The pairwise sequence alignment algorithm. 
 94          @type pairwise_algorithm:           str 
 95          @keyword matrix:                    The substitution matrix 
 96          @type matrix:                       str 
 97          @keyword gap_open_penalty:          The penalty for introducing gaps, as a positive number. 
 98          @type gap_open_penalty:             float 
 99          @keyword gap_extend_penalty:        The penalty for extending a gap, as a positive number. 
100          @type gap_extend_penalty:           float 
101          @keyword end_gap_open_penalty:      The optional penalty for opening a gap at the end of a sequence. 
102          @type end_gap_open_penalty:         float 
103          @keyword end_gap_extend_penalty:    The optional penalty for extending a gap at the end of a sequence. 
104          @type end_gap_extend_penalty:       float 
105          @return:                            Any matching sequence alignment object. 
106          @rtype:                             Alignment instance 
107          """ 
108   
109           
110          for i in range(len(self)): 
111               
112              found = True 
113   
114               
115              for j in range(len(object_ids)): 
116                   
117                  if self[i].object_ids[j] != object_ids[j]: 
118                      found = False 
119                      break 
120                  if self[i].models[j] != models[j]: 
121                      found = False 
122                      break 
123                  if self[i].molecules[j] != molecules[j]: 
124                      found = False 
125                      break 
126                  if self[i].sequences[j] != sequences[j]: 
127                      found = False 
128                      break 
129   
130               
131              if not found: 
132                  continue 
133   
134               
135              if msa_algorithm and self[i].msa_algorithm != msa_algorithm: 
136                  continue 
137              if pairwise_algorithm and self[i].pairwise_algorithm != pairwise_algorithm: 
138                  continue 
139              if matrix and self[i].matrix != matrix: 
140                  continue 
141              if gap_open_penalty != None and self[i].gap_open_penalty != gap_open_penalty: 
142                  continue 
143              if gap_extend_penalty != None and  self[i].gap_extend_penalty != gap_extend_penalty: 
144                  continue 
145              if end_gap_open_penalty != None and  self[i].end_gap_open_penalty != end_gap_open_penalty: 
146                  continue 
147              if end_gap_extend_penalty != None and  self[i].end_gap_extend_penalty != end_gap_extend_penalty: 
148                  continue 
149   
150               
151              return self[i] 
 152   
153   
154 -    def from_xml(self, sequence_alignments_node, file_version=1): 
 155          """Recreate the analyses data structure from the XML analyses node. 
156   
157          @param sequence_alignments_node:    The sequence alignments XML node. 
158          @type sequence_alignments_node:     xml.dom.minicompat.Element instance 
159          @keyword file_version:              The relax XML version of the XML file. 
160          @type file_version:                 int 
161          """ 
162   
163           
164          align_nodes = sequence_alignments_node.getElementsByTagName('sequence_alignment') 
165   
166           
167          for node in align_nodes: 
168               
169              self.append(Alignment(object_ids=[])) 
170   
171               
172              self[-1].from_xml(node, file_version=file_version) 
  173   
174   
175   
177      """Container for an individual sequence alignment.""" 
178   
179 -    def __init__(self, object_ids=None, models=None, molecules=None, sequences=None, strings=None, gaps=None, msa_algorithm=None, pairwise_algorithm=None, matrix=None, gap_open_penalty=None, gap_extend_penalty=None, end_gap_open_penalty=None, end_gap_extend_penalty=None): 
 180          """Set up the sequence alignment object. 
181   
182          @keyword object_ids:                The list of IDs for each structural object in the alignment.  In most cases this will be the data pipe name.  This will be used to retrieve alignments. 
183          @type object_ids:                   list of str 
184          @keyword models:                    The list of model numbers used in the alignment.  This will be used to retrieve alignments. 
185          @type models:                       list of int 
186          @keyword molecules:                 The list of molecules used in the alignment.  This will be used to retrieve alignments. 
187          @type molecules:                    list of str 
188          @keyword sequences:                 The list of residue sequences for the alignment as one letter codes. 
189          @type sequences:                    list of str 
190          @keyword strings:                   The list of alignment strings. 
191          @type strings:                      list of str 
192          @keyword gaps:                      The alignment gap matrix. 
193          @type gaps:                         numpy rank-2 int array 
194          @keyword msa_algorithm:             The global multiple sequence alignment (MSA) algorithm. 
195          @type msa_algorithm:                str 
196          @keyword pairwise_algorithm:        The pairwise sequence alignment algorithm. 
197          @type pairwise_algorithm:           str 
198          @keyword matrix:                    The substitution matrix 
199          @type matrix:                       str 
200          @keyword gap_open_penalty:          The penalty for introducing gaps, as a positive number. 
201          @type gap_open_penalty:             float 
202          @keyword gap_extend_penalty:        The penalty for extending a gap, as a positive number. 
203          @type gap_extend_penalty:           float 
204          @keyword end_gap_open_penalty:      The optional penalty for opening a gap at the end of a sequence. 
205          @type end_gap_open_penalty:         float 
206          @keyword end_gap_extend_penalty:    The optional penalty for extending a gap at the end of a sequence. 
207          @type end_gap_extend_penalty:       float 
208          """ 
209   
210           
211          super(Alignment, self).__init__(name='sequence_alignment', desc='Multiple sequence alignment container.') 
212   
213           
214          self.object_ids = object_ids 
215          self.models = models 
216          self.molecules = molecules 
217          self.sequences = sequences 
218          self.strings = strings 
219          self.gaps = gaps 
220          self.msa_algorithm = msa_algorithm 
221          self.pairwise_algorithm = pairwise_algorithm 
222          self.matrix = matrix 
223          self.gap_open_penalty = gap_open_penalty 
224          self.gap_extend_penalty = gap_extend_penalty 
225          self.end_gap_open_penalty = end_gap_open_penalty 
226          self.end_gap_extend_penalty = end_gap_extend_penalty 
227   
228           
229          self.ids = []  
230          for i in range(len(self.object_ids)): 
231              self.ids.append(generate_id(object_id=self.object_ids[i], model=self.models[i], molecule=self.molecules[i])) 
232   
233           
234          for i in range(len(self.ids)): 
235              for j in range(len(self.ids)): 
236                  if i == j: 
237                      continue 
238                  if self.ids[i] == self.ids[j]: 
239                      raise RelaxError("The molecule ID '%s' is not unique." % self.ids[i]) 
  240