1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The objects representing molecules in the internal structural object."""
24
25
26 from re import search
27 from string import digits
28 from warnings import warn
29
30
31 from lib.errors import RelaxError, RelaxFromXMLNotEmptyError
32 from lib.periodic_table import periodic_table
33 from lib.structure import pdb_read
34 from lib.warnings import RelaxWarning
35 from lib.xml import fill_object_contents, xml_to_object
36
37
39 """The container for the molecular information.
40
41 The structural data object for this class is a container possessing a number of different arrays
42 corresponding to different structural information. These objects include:
43
44 - atom_num: The atom name.
45 - atom_name: The atom name.
46 - bonded: Each element an array of bonded atom indices.
47 - chain_id: The chain ID.
48 - element: The element symbol.
49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER).
50 - res_name: The residue name.
51 - res_num: The residue number.
52 - seg_id: The segment ID.
53 - x: The x coordinate of the atom.
54 - y: The y coordinate of the atom.
55 - z: The z coordinate of the atom.
56
57 All arrays should be of equal length so that an atom index can retrieve all the corresponding
58 data. Only the atom identification string is compulsory, all other arrays can contain None.
59 """
60
61
63 """Initialise the molecular container."""
64
65
66 self.atom_num = []
67
68
69 self.atom_name = []
70
71
72 self.bonded = []
73
74
75 self.chain_id = []
76
77
78 self.element = []
79
80
81 self.pdb_record = []
82
83
84 self.res_name = []
85
86
87 self.res_num = []
88
89
90 self.seg_id = []
91
92
93 self.x = []
94
95
96 self.y = []
97
98
99 self.z = []
100
101
103 """Find the atom index corresponding to the given atom number.
104
105 @param atom_num: The atom number to find the index of.
106 @type atom_num: int
107 @return: The atom index corresponding to the atom.
108 @rtype: int
109 """
110
111
112 for j in range(len(self.atom_num)):
113
114 if self.atom_num[j] == atom_num:
115 return j
116
117
119 """Try to determine the element from the PDB atom name.
120
121 @param atom_name: The PDB atom name.
122 @type atom_name: str
123 @return: The element name, or None if unsuccessful.
124 @rtype: str or None
125 """
126
127
128 element = atom_name.strip("'")
129
130
131 element = element.strip(digits)
132
133
134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'],
135 'N': ['ND', 'NE', 'NH', 'NZ'],
136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'],
137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'],
138 'S': ['SD', 'SG']
139 }
140
141
142 for key in table:
143 if element in table[key]:
144 element = key
145 break
146
147
148 if periodic_table.has_element(symbol=element):
149 return element
150
151
152 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))
153
154
156 """Parse the Gaussian log record string and return an array of the corresponding atomic information.
157
158 The format of the Gaussian log records is::
159 __________________________________________________________________________________________
160 | | | |
161 | Columns | Data type | Description |
162 |_________|______________|_______________________________________________________________|
163 | | | |
164 | 1 | int | "Center Number" - the sequential atom number. |
165 | 2 | int | "Atomic Number" - the atomic number. |
166 | 3 | int | "Atomic Type" - the atomic type? |
167 | 4 | float | X coordinate in Angstrom |
168 | 5 | float | Y coordinate in Angstrom |
169 | 6 | float | Z coordinate in Angstrom |
170 |_________|______________|_______________________________________________________________|
171
172
173 @param record: The single line Gaussian record.
174 @type record: str
175 @return: The list of atomic information
176 @rtype: list of str
177 """
178
179
180 if search("---------", record):
181 return None
182 if search("Center", record):
183 return None
184 if search("Number", record):
185 return None
186
187
188 word = record.split()
189
190
191 if len(word) == 6:
192
193 atom_number = int(word[0])
194 atomic_num = int(word[1])
195 x = float(word[3])
196 y = float(word[4])
197 z = float(word[5])
198
199
200 return atom_number, atomic_num, x, y, z
201
202
204 """Parse the XYZ record string and return an array of the corresponding atomic information.
205
206 The format of the XYZ records is::
207 __________________________________________________________________________________________
208 | | | | |
209 | Columns | Data type | Field | Definition |
210 |_________|______________|______________|________________________________________________|
211 | | | | |
212 | 1 | String | element | |
213 | 2 | Real | x | Orthogonal coordinates for X in Angstroms |
214 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms |
215 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms |
216 |_________|______________|______________|________________________________________________|
217
218
219 @param record: The single line PDB record.
220 @type record: str
221 @return: The list of atomic information
222 @rtype: list of str
223 """
224
225
226 fields = []
227 word = record.split()
228
229
230 if len(word)==4:
231
232 fields.append(word[0])
233 fields.append(word[1])
234 fields.append(word[2])
235 fields.append(word[3])
236
237
238 for i in range(len(fields)):
239
240 fields[i] = fields[i].strip()
241
242
243 if fields[i] == '':
244 fields[i] = None
245
246
247 if fields[1]:
248 fields[1] = float(fields[1])
249 if fields[2]:
250 fields[2] = float(fields[2])
251 if fields[3]:
252 fields[3] = float(fields[3])
253
254
255 return fields
256
257
259 """Sort all structural data."""
260
261
262 indices = list(range(len(self.atom_name)))
263 indices.sort(key=self._sort_key)
264
265
266 self.atom_num = [self.atom_num[i] for i in indices]
267 self.atom_name = [self.atom_name[i] for i in indices]
268 self.bonded = [self.bonded[i] for i in indices]
269 self.chain_id = [self.chain_id[i] for i in indices]
270 self.element = [self.element[i] for i in indices]
271 self.pdb_record = [self.pdb_record[i] for i in indices]
272 self.res_name = [self.res_name[i] for i in indices]
273 self.res_num = [self.res_num[i] for i in indices]
274 self.seg_id = [self.seg_id[i] for i in indices]
275 self.x = [self.x[i] for i in indices]
276 self.y = [self.y[i] for i in indices]
277 self.z = [self.z[i] for i in indices]
278
279
280 for i in range(len(self.bonded)):
281 for j in range(len(self.bonded[i])):
282 self.bonded[i][j] = indices.index(self.bonded[i][j])
283
284
286 """Return the information for sorting the sequence data."""
287
288
289 if self.res_num[i] == None:
290 return 0
291
292
293 return self.res_num[i]
294
295
296 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
297 """Method for adding an atom to the structural data object.
298
299 This method will create the key-value pair for the given atom.
300
301
302 @keyword atom_name: The atom name, e.g. 'H1'.
303 @type atom_name: str or None
304 @keyword res_name: The residue name.
305 @type res_name: str or None
306 @keyword res_num: The residue number.
307 @type res_num: int or None
308 @keyword pos: The position vector of coordinates.
309 @type pos: list (length = 3)
310 @keyword element: The element symbol.
311 @type element: str or None
312 @keyword atom_num: The atom number.
313 @type atom_num: int or None
314 @keyword chain_id: The chain identifier.
315 @type chain_id: str or None
316 @keyword segment_id: The segment identifier.
317 @type segment_id: str or None
318 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
319 @type pdb_record: str or None
320 @return: The index of the added atom.
321 @rtype: int
322 """
323
324
325 self.atom_num.append(atom_num)
326 self.atom_name.append(atom_name)
327 self.bonded.append([])
328 self.chain_id.append(chain_id)
329 self.element.append(element)
330 self.pdb_record.append(pdb_record)
331 self.res_name.append(res_name)
332 self.res_num.append(res_num)
333 self.seg_id.append(segment_id)
334 self.x.append(pos[0])
335 self.y.append(pos[1])
336 self.z.append(pos[2])
337
338
339 return len(self.atom_num) - 1
340
341
343 """Method for connecting two atoms within the data structure object.
344
345 This method will append index2 to the array at bonded[index1] and vice versa.
346
347
348 @keyword index1: The index of the first atom.
349 @type index1: int
350 @keyword index2: The index of the second atom.
351 @type index2: int
352 """
353
354
355 if index2 not in self.bonded[index1]:
356 self.bonded[index1].append(index2)
357 if index1 not in self.bonded[index2]:
358 self.bonded[index2].append(index1)
359
360
362 """Method for generating a complete Structure_container object from the given Gaussian log records.
363
364 @param records: A list of structural Gaussian log records.
365 @type records: list of str
366 """
367
368
369 for record in records:
370
371 data = self._parse_gaussian_record(record)
372
373
374 if data == None:
375 continue
376
377
378 atom_number, atomic_num, x, y, z = data
379
380
381 atom_name = periodic_table.lookup_symbol(atomic_num)
382
383
384 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)
385
386
388 """Method for generating a complete Structure_container object from the given PDB records.
389
390 @param records: A list of structural PDB records.
391 @type records: list of str
392 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
393 @type alt_loc_select: str or None
394 """
395
396
397 water = []
398 missing_connect = []
399 for record in records:
400
401 if not record or record == '\n':
402 continue
403
404
405 if record[:4] == 'ATOM' or record[:6] == 'HETATM':
406
407 if record[:4] == 'ATOM':
408 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record)
409 if record[:6] == 'HETATM':
410 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record)
411
412
413 if res_name == 'HOH':
414 water.append(res_seq)
415 continue
416
417
418 if alt_loc != None:
419
420 if alt_loc_select == None:
421 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.")
422
423
424 if alt_loc != alt_loc_select:
425 continue
426
427
428 if not element:
429 element = self._det_pdb_element(name)
430
431
432 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, pos=[x, y, z], element=element)
433
434
435 if record[:6] == 'CONECT':
436
437 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record)
438
439
440 for bonded in [bonded1, bonded2, bonded3, bonded4]:
441
442 if not bonded:
443 continue
444
445
446 serial_index = self._atom_index(serial)
447 bonded_index = self._atom_index(bonded)
448
449
450 if serial_index == None:
451 if serial not in missing_connect:
452 missing_connect.append(serial)
453 continue
454 if bonded_index == None:
455 if bonded not in missing_connect:
456 missing_connect.append(bonded)
457 continue
458
459
460 self.atom_connect(index1=serial_index, index2=bonded_index)
461
462
463 if len(missing_connect):
464 missing_connect.sort()
465 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect))
466 if len(water):
467 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))
468
469
471 """Method for generating a complete Structure_container object from the given xyz records.
472
473 @param records: A list of structural xyz records.
474 @type records: list of str
475 """
476
477
478 atom_number = 1
479
480
481 for record in records:
482
483 record = self._parse_xyz_record(record)
484
485
486 if not record:
487 continue
488
489
490 if len(record) == 4:
491
492 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0])
493
494
495 atom_number = atom_number + 1
496
497
498 - def from_xml(self, mol_node, file_version=1):
499 """Recreate the MolContainer from the XML molecule node.
500
501 @param mol_node: The molecule XML node.
502 @type mol_node: xml.dom.minicompat.NodeList instance
503 @keyword file_version: The relax XML version of the XML file.
504 @type file_version: int
505 """
506
507
508 xml_to_object(mol_node, self, file_version=file_version)
509
510
512 """Check if the container is empty."""
513
514
515 if hasattr(self, 'file_name'): return False
516 if hasattr(self, 'file_path'): return False
517 if hasattr(self, 'file_mol_num'): return False
518 if hasattr(self, 'file_model'): return False
519
520
521 if not self.atom_num == []: return False
522 if not self.atom_name == []: return False
523 if not self.bonded == []: return False
524 if not self.chain_id == []: return False
525 if not self.element == []: return False
526 if not self.pdb_record == []: return False
527 if not self.res_name == []: return False
528 if not self.res_num == []: return False
529 if not self.seg_id == []: return False
530 if not self.x == []: return False
531 if not self.y == []: return False
532 if not self.z == []: return False
533
534
535 return True
536
537
539 """Return the number of the last residue.
540
541 @return: The last residue number.
542 @rtype: int
543 """
544
545
546 if not len(self.res_num):
547 return 0
548
549
550 return self.res_num[-1]
551
552
554 """Generator method for looping over the individual residues of the molecule.
555
556 @return: The residue name and number.
557 @rtype: str, int
558 """
559
560
561 last_res = None
562 for i in range(len(self.atom_num)):
563
564 if self.res_num[i] != last_res:
565 last_res = self.res_num[i]
566 yield self.res_name[i], self.res_num[i]
567
568
569 - def merge(self, mol_cont=None):
570 """Merge the contents of the given molecule container into here.
571
572 @keyword mol_cont: The data structure for the molecule to merge.
573 @type mol_cont: MolContainer instance
574 """
575
576
577 curr_index = len(self.atom_num)
578
579
580 for i in range(len(mol_cont.atom_num)):
581
582 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i])
583
584
585 for j in range(len(mol_cont.bonded[i])):
586 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)
587
588
589 - def to_xml(self, doc, element):
590 """Create XML elements for the contents of this molecule container.
591
592 @param doc: The XML document object.
593 @type doc: xml.dom.minidom.Document instance
594 @param element: The element to add the molecule XML elements to.
595 @type element: XML element object
596 """
597
598
599 mol_element = doc.createElement('mol_cont')
600 element.appendChild(mol_element)
601
602
603 mol_element.setAttribute('desc', 'Molecule container')
604 mol_element.setAttribute('name', str(self.mol_name))
605
606
607 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))
608
609
610
612 """List type data container for holding the different molecules of one model."""
613
615 """The string representation of the object.
616
617 Rather than using the standard Python conventions (either the string representation of the
618 value or the "<...desc...>" notation), a rich-formatted description of the object is given.
619 """
620
621 text = "Molecules.\n\n"
622 text = text + "%-8s%-8s" % ("Index", "Name") + "\n"
623 for i in range(len(self)):
624 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n"
625 return text
626
627
628 - def add_item(self, mol_name=None, mol_cont=None):
629 """Append the given MolContainer instance to the MolList.
630
631 @keyword mol_name: The molecule number.
632 @type mol_name: int
633 @keyword mol_cont: The data structure for the molecule.
634 @type mol_cont: MolContainer instance
635 @return: The new molecule container.
636 @rtype: MolContainer instance
637 """
638
639
640 if len(self) and self.is_empty():
641 self[0].mol_name = mol_name
642
643
644 else:
645
646 for i in range(len(self)):
647 if self[i].mol_name == mol_name:
648 raise RelaxError("The molecule '%s' already exists." % mol_name)
649
650
651 self.append(mol_cont)
652
653
654 self[-1].mol_name = mol_name
655
656
657 return self[-1]
658
659
661 """Method for testing if this MolList object is empty.
662
663 @return: True if this list only has one MolContainer and the molecule name has not
664 been set, False otherwise.
665 @rtype: bool
666 """
667
668
669 if len(self) == 0:
670 return True
671
672
673 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty():
674 return True
675
676
677 return False
678
679
680 - def from_xml(self, mol_nodes, file_version=1):
681 """Recreate a molecule list data structure from the XML molecule nodes.
682
683 @param mol_nodes: The molecule XML nodes.
684 @type mol_nodes: xml.dom.minicompat.NodeList instance
685 @keyword file_version: The relax XML version of the XML file.
686 @type file_version: int
687 """
688
689
690 if not self.is_empty():
691 raise RelaxFromXMLNotEmptyError(self.__class__.__name__)
692
693
694 for mol_node in mol_nodes:
695
696 mol_cont = MolContainer()
697
698
699 name = mol_node.getAttribute('name')
700 if name == 'None':
701 name = None
702
703
704 self.add_item(mol_name=name, mol_cont=mol_cont)
705
706
707 self[-1].from_xml(mol_node, file_version=file_version)
708
709
710 - def merge_item(self, mol_name=None, mol_cont=None):
711 """Mege the given MolContainer instance into a pre-existing molecule container.
712
713 @keyword mol_name: The molecule number.
714 @type mol_name: int
715 @keyword mol_cont: The data structure for the molecule.
716 @type mol_cont: MolContainer instance
717 @return: The new molecule container.
718 @rtype: MolContainer instance
719 """
720
721
722 index = None
723 for i in range(len(self)):
724 if self[i].mol_name == mol_name:
725 index = i
726 break
727
728
729 if index == None:
730 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name)
731
732
733 self[index].merge(mol_cont)
734
735
736 return self[index]
737
738
739 - def to_xml(self, doc, element):
740 """Create XML elements for each molecule.
741
742 @param doc: The XML document object.
743 @type doc: xml.dom.minidom.Document instance
744 @param element: The element to add the molecule XML elements to.
745 @type element: XML element object
746 """
747
748
749 for i in range(len(self)):
750
751 self[i].to_xml(doc, element)
752