1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The objects representing molecules in the internal structural object."""
24
25
26 from re import search
27 from string import digits
28 from warnings import warn
29
30
31 from lib.errors import RelaxError, RelaxFromXMLNotEmptyError
32 from lib.periodic_table import periodic_table
33 from lib.structure import pdb_read
34 from lib.warnings import RelaxWarning
35 from lib.xml import fill_object_contents, xml_to_object
36
37
39 """The container for the molecular information.
40
41 The structural data object for this class is a container possessing a number of different arrays
42 corresponding to different structural information. These objects include:
43
44 - atom_num: The atom name.
45 - atom_name: The atom name.
46 - bonded: Each element an array of bonded atom indices.
47 - chain_id: The chain ID.
48 - element: The element symbol.
49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER).
50 - res_name: The residue name.
51 - res_num: The residue number.
52 - seg_id: The segment ID.
53 - x: The x coordinate of the atom.
54 - y: The y coordinate of the atom.
55 - z: The z coordinate of the atom.
56
57 All arrays should be of equal length so that an atom index can retrieve all the corresponding
58 data. Only the atom identification string is compulsory, all other arrays can contain None.
59 """
60
61
63 """Initialise the molecular container."""
64
65
66 self.atom_num = []
67
68
69 self.atom_name = []
70
71
72 self.bonded = []
73
74
75 self.chain_id = []
76
77
78 self.element = []
79
80
81 self.pdb_record = []
82
83
84 self.res_name = []
85
86
87 self.res_num = []
88
89
90 self.seg_id = []
91
92
93 self.x = []
94
95
96 self.y = []
97
98
99 self.z = []
100
101
103 """Find the atom index corresponding to the given atom number.
104
105 @param atom_num: The atom number to find the index of.
106 @type atom_num: int
107 @return: The atom index corresponding to the atom.
108 @rtype: int
109 """
110
111
112 for j in range(len(self.atom_num)):
113
114 if self.atom_num[j] == atom_num:
115 return j
116
117
119 """Try to determine the element from the PDB atom name.
120
121 @param atom_name: The PDB atom name.
122 @type atom_name: str
123 @return: The element name, or None if unsuccessful.
124 @rtype: str or None
125 """
126
127
128 element = atom_name.strip("'")
129
130
131 element = element.strip(digits)
132
133
134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'],
135 'N': ['ND', 'NE', 'NH', 'NZ'],
136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'],
137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'],
138 'S': ['SD', 'SG']
139 }
140
141
142 for key in table:
143 if element in table[key]:
144 element = key
145 break
146
147
148 if periodic_table.has_element(symbol=element):
149 return element
150
151
152 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))
153
154
156 """Parse the Gaussian log record string and return an array of the corresponding atomic information.
157
158 The format of the Gaussian log records is::
159 __________________________________________________________________________________________
160 | | | |
161 | Columns | Data type | Description |
162 |_________|______________|_______________________________________________________________|
163 | | | |
164 | 1 | int | "Center Number" - the sequential atom number. |
165 | 2 | int | "Atomic Number" - the atomic number. |
166 | 3 | int | "Atomic Type" - the atomic type? |
167 | 4 | float | X coordinate in Angstrom |
168 | 5 | float | Y coordinate in Angstrom |
169 | 6 | float | Z coordinate in Angstrom |
170 |_________|______________|_______________________________________________________________|
171
172
173 @param record: The single line Gaussian record.
174 @type record: str
175 @return: The list of atomic information
176 @rtype: list of str
177 """
178
179
180 if search("---------", record):
181 return None
182 if search("Center", record):
183 return None
184 if search("Number", record):
185 return None
186
187
188 word = record.split()
189
190
191 if len(word) == 6:
192
193 atom_number = int(word[0])
194 atomic_num = int(word[1])
195 x = float(word[3])
196 y = float(word[4])
197 z = float(word[5])
198
199
200 return atom_number, atomic_num, x, y, z
201
202
204 """Parse the XYZ record string and return an array of the corresponding atomic information.
205
206 The format of the XYZ records is::
207 __________________________________________________________________________________________
208 | | | | |
209 | Columns | Data type | Field | Definition |
210 |_________|______________|______________|________________________________________________|
211 | | | | |
212 | 1 | String | element | |
213 | 2 | Real | x | Orthogonal coordinates for X in Angstroms |
214 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms |
215 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms |
216 |_________|______________|______________|________________________________________________|
217
218
219 @param record: The single line PDB record.
220 @type record: str
221 @return: The list of atomic information
222 @rtype: list of str
223 """
224
225
226 fields = []
227 word = record.split()
228
229
230 if len(word)==4:
231
232 fields.append(word[0])
233 fields.append(word[1])
234 fields.append(word[2])
235 fields.append(word[3])
236
237
238 for i in range(len(fields)):
239
240 fields[i] = fields[i].strip()
241
242
243 if fields[i] == '':
244 fields[i] = None
245
246
247 if fields[1]:
248 fields[1] = float(fields[1])
249 if fields[2]:
250 fields[2] = float(fields[2])
251 if fields[3]:
252 fields[3] = float(fields[3])
253
254
255 return fields
256
257
259 """Sort all structural data."""
260
261
262 indices = list(range(len(self.atom_name)))
263 indices.sort(key=self._sort_key)
264
265
266 self.atom_num = [self.atom_num[i] for i in indices]
267 self.atom_name = [self.atom_name[i] for i in indices]
268 self.bonded = [self.bonded[i] for i in indices]
269 self.chain_id = [self.chain_id[i] for i in indices]
270 self.element = [self.element[i] for i in indices]
271 self.pdb_record = [self.pdb_record[i] for i in indices]
272 self.res_name = [self.res_name[i] for i in indices]
273 self.res_num = [self.res_num[i] for i in indices]
274 self.seg_id = [self.seg_id[i] for i in indices]
275 self.x = [self.x[i] for i in indices]
276 self.y = [self.y[i] for i in indices]
277 self.z = [self.z[i] for i in indices]
278
279
280 for i in range(len(self.bonded)):
281 for j in range(len(self.bonded[i])):
282 self.bonded[i][j] = indices.index(self.bonded[i][j])
283
284
286 """Return the information for sorting the sequence data."""
287
288
289 if self.res_num[i] == None:
290 return 0
291
292
293 return self.res_num[i]
294
295
296 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
297 """Method for adding an atom to the structural data object.
298
299 This method will create the key-value pair for the given atom.
300
301
302 @keyword atom_name: The atom name, e.g. 'H1'.
303 @type atom_name: str or None
304 @keyword res_name: The residue name.
305 @type res_name: str or None
306 @keyword res_num: The residue number.
307 @type res_num: int or None
308 @keyword pos: The position vector of coordinates.
309 @type pos: list (length = 3)
310 @keyword element: The element symbol.
311 @type element: str or None
312 @keyword atom_num: The atom number.
313 @type atom_num: int or None
314 @keyword chain_id: The chain identifier.
315 @type chain_id: str or None
316 @keyword segment_id: The segment identifier.
317 @type segment_id: str or None
318 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
319 @type pdb_record: str or None
320 @return: The index of the added atom.
321 @rtype: int
322 """
323
324
325 self.atom_num.append(atom_num)
326 self.atom_name.append(atom_name)
327 self.bonded.append([])
328 self.chain_id.append(chain_id)
329 self.element.append(element)
330 self.pdb_record.append(pdb_record)
331 self.res_name.append(res_name)
332 self.res_num.append(res_num)
333 self.seg_id.append(segment_id)
334 self.x.append(pos[0])
335 self.y.append(pos[1])
336 self.z.append(pos[2])
337
338
339 return len(self.atom_num) - 1
340
341
343 """Method for connecting two atoms within the data structure object.
344
345 This method will append index2 to the array at bonded[index1] and vice versa.
346
347
348 @keyword index1: The index of the first atom.
349 @type index1: int
350 @keyword index2: The index of the second atom.
351 @type index2: int
352 """
353
354
355 if index2 not in self.bonded[index1]:
356 self.bonded[index1].append(index2)
357 if index1 not in self.bonded[index2]:
358 self.bonded[index2].append(index1)
359
360
362 """Method for generating a complete Structure_container object from the given Gaussian log records.
363
364 @param records: A list of structural Gaussian log records.
365 @type records: list of str
366 """
367
368
369 for record in records:
370
371 data = self._parse_gaussian_record(record)
372
373
374 if data == None:
375 continue
376
377
378 atom_number, atomic_num, x, y, z = data
379
380
381 atom_name = periodic_table.lookup_symbol(atomic_num)
382
383
384 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)
385
386
388 """Method for generating a complete Structure_container object from the given PDB records.
389
390 @param records: A list of structural PDB records.
391 @type records: list of str
392 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
393 @type alt_loc_select: str or None
394 @keyword reset_serial: A flag which if True will cause the first serial number (or atom number) to be reset to 1, and all other numbers shifted.
395 @type reset_serial: bool
396 """
397
398
399 water = []
400 missing_connect = []
401 number_offset = None
402 for record in records:
403
404 if not record or record == '\n':
405 continue
406
407
408 if record[:4] == 'ATOM' or record[:6] == 'HETATM':
409
410 if record[:4] == 'ATOM':
411 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record)
412 if record[:6] == 'HETATM':
413 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record)
414
415
416 if reset_serial:
417
418 if number_offset == None:
419 number_offset = serial - 1
420
421
422 serial -= number_offset
423
424
425 if res_name == 'HOH':
426 water.append(res_seq)
427 continue
428
429
430 if alt_loc != None:
431
432 if alt_loc_select == None:
433 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.")
434
435
436 if alt_loc != alt_loc_select:
437 continue
438
439
440 if not element:
441 element = self._det_pdb_element(name)
442
443
444 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, res_num=res_seq, pos=[x, y, z], element=element)
445
446
447 if record[:6] == 'CONECT':
448
449 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record)
450
451
452 if reset_serial:
453 serial -= number_offset
454 bonded1 -= number_offset
455 if bonded2 != None:
456 bonded2 -= number_offset
457 if bonded3 != None:
458 bonded3 -= number_offset
459 if bonded4 != None:
460 bonded4 -= number_offset
461
462
463 for bonded in [bonded1, bonded2, bonded3, bonded4]:
464
465 if not bonded:
466 continue
467
468
469 serial_index = self._atom_index(serial)
470 bonded_index = self._atom_index(bonded)
471
472
473 if serial_index == None:
474 if serial not in missing_connect:
475 missing_connect.append(serial)
476 continue
477 if bonded_index == None:
478 if bonded not in missing_connect:
479 missing_connect.append(bonded)
480 continue
481
482
483 self.atom_connect(index1=serial_index, index2=bonded_index)
484
485
486 if len(missing_connect):
487 missing_connect.sort()
488 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect))
489 if len(water):
490 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))
491
492
494 """Method for generating a complete Structure_container object from the given xyz records.
495
496 @param records: A list of structural xyz records.
497 @type records: list of str
498 """
499
500
501 atom_number = 1
502
503
504 for record in records:
505
506 record = self._parse_xyz_record(record)
507
508
509 if not record:
510 continue
511
512
513 if len(record) == 4:
514
515 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0])
516
517
518 atom_number = atom_number + 1
519
520
521 - def from_xml(self, mol_node, file_version=1):
522 """Recreate the MolContainer from the XML molecule node.
523
524 @param mol_node: The molecule XML node.
525 @type mol_node: xml.dom.minicompat.NodeList instance
526 @keyword file_version: The relax XML version of the XML file.
527 @type file_version: int
528 """
529
530
531 xml_to_object(mol_node, self, file_version=file_version)
532
533
535 """Check if the container is empty."""
536
537
538 if hasattr(self, 'file_name'): return False
539 if hasattr(self, 'file_path'): return False
540 if hasattr(self, 'file_mol_num'): return False
541 if hasattr(self, 'file_model'): return False
542
543
544 if not self.atom_num == []: return False
545 if not self.atom_name == []: return False
546 if not self.bonded == []: return False
547 if not self.chain_id == []: return False
548 if not self.element == []: return False
549 if not self.pdb_record == []: return False
550 if not self.res_name == []: return False
551 if not self.res_num == []: return False
552 if not self.seg_id == []: return False
553 if not self.x == []: return False
554 if not self.y == []: return False
555 if not self.z == []: return False
556
557
558 return True
559
560
562 """Return the number of the last residue.
563
564 @return: The last residue number.
565 @rtype: int
566 """
567
568
569 if not len(self.res_num):
570 return 0
571
572
573 return self.res_num[-1]
574
575
577 """Generator method for looping over the individual residues of the molecule.
578
579 @return: The residue name and number.
580 @rtype: str, int
581 """
582
583
584 last_res = None
585 for i in range(len(self.atom_num)):
586
587 if self.res_num[i] != last_res:
588 last_res = self.res_num[i]
589 yield self.res_name[i], self.res_num[i]
590
591
592 - def merge(self, mol_cont=None):
593 """Merge the contents of the given molecule container into here.
594
595 @keyword mol_cont: The data structure for the molecule to merge.
596 @type mol_cont: MolContainer instance
597 """
598
599
600 curr_index = len(self.atom_num)
601
602
603 for i in range(len(mol_cont.atom_num)):
604
605 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i])
606
607
608 for j in range(len(mol_cont.bonded[i])):
609 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)
610
611
612 - def to_xml(self, doc, element):
613 """Create XML elements for the contents of this molecule container.
614
615 @param doc: The XML document object.
616 @type doc: xml.dom.minidom.Document instance
617 @param element: The element to add the molecule XML elements to.
618 @type element: XML element object
619 """
620
621
622 mol_element = doc.createElement('mol_cont')
623 element.appendChild(mol_element)
624
625
626 mol_element.setAttribute('desc', 'Molecule container')
627 mol_element.setAttribute('name', str(self.mol_name))
628
629
630 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))
631
632
633
635 """List type data container for holding the different molecules of one model."""
636
638 """The string representation of the object.
639
640 Rather than using the standard Python conventions (either the string representation of the
641 value or the "<...desc...>" notation), a rich-formatted description of the object is given.
642 """
643
644 text = "Molecules.\n\n"
645 text = text + "%-8s%-8s" % ("Index", "Name") + "\n"
646 for i in range(len(self)):
647 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n"
648 return text
649
650
651 - def add_item(self, mol_name=None, mol_cont=None):
652 """Append the given MolContainer instance to the MolList.
653
654 @keyword mol_name: The molecule number.
655 @type mol_name: int
656 @keyword mol_cont: The data structure for the molecule.
657 @type mol_cont: MolContainer instance
658 @return: The new molecule container.
659 @rtype: MolContainer instance
660 """
661
662
663 if len(self) and self.is_empty():
664 self[0].mol_name = mol_name
665
666
667 else:
668
669 for i in range(len(self)):
670 if self[i].mol_name == mol_name:
671 raise RelaxError("The molecule '%s' already exists." % mol_name)
672
673
674 self.append(mol_cont)
675
676
677 self[-1].mol_name = mol_name
678
679
680 return self[-1]
681
682
684 """Method for testing if this MolList object is empty.
685
686 @return: True if this list only has one MolContainer and the molecule name has not
687 been set, False otherwise.
688 @rtype: bool
689 """
690
691
692 if len(self) == 0:
693 return True
694
695
696 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty():
697 return True
698
699
700 return False
701
702
703 - def from_xml(self, mol_nodes, file_version=1):
704 """Recreate a molecule list data structure from the XML molecule nodes.
705
706 @param mol_nodes: The molecule XML nodes.
707 @type mol_nodes: xml.dom.minicompat.NodeList instance
708 @keyword file_version: The relax XML version of the XML file.
709 @type file_version: int
710 """
711
712
713 if not self.is_empty():
714 raise RelaxFromXMLNotEmptyError(self.__class__.__name__)
715
716
717 for mol_node in mol_nodes:
718
719 mol_cont = MolContainer()
720
721
722 name = mol_node.getAttribute('name')
723 if name == 'None':
724 name = None
725
726
727 self.add_item(mol_name=name, mol_cont=mol_cont)
728
729
730 self[-1].from_xml(mol_node, file_version=file_version)
731
732
733 - def merge_item(self, mol_name=None, mol_cont=None):
734 """Mege the given MolContainer instance into a pre-existing molecule container.
735
736 @keyword mol_name: The molecule number.
737 @type mol_name: int
738 @keyword mol_cont: The data structure for the molecule.
739 @type mol_cont: MolContainer instance
740 @return: The new molecule container.
741 @rtype: MolContainer instance
742 """
743
744
745 index = None
746 for i in range(len(self)):
747 if self[i].mol_name == mol_name:
748 index = i
749 break
750
751
752 if index == None:
753 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name)
754
755
756 self[index].merge(mol_cont)
757
758
759 return self[index]
760
761
762 - def to_xml(self, doc, element):
763 """Create XML elements for each molecule.
764
765 @param doc: The XML document object.
766 @type doc: xml.dom.minidom.Document instance
767 @param element: The element to add the molecule XML elements to.
768 @type element: XML element object
769 """
770
771
772 for i in range(len(self)):
773
774 self[i].to_xml(doc, element)
775