1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The objects representing molecules in the internal structural object."""
24
25
26 from re import search
27 from string import digits
28 from warnings import warn
29
30
31 from lib.errors import RelaxError, RelaxFromXMLNotEmptyError
32 from lib.periodic_table import periodic_table
33 from lib.structure import pdb_read
34 from lib.warnings import RelaxWarning
35 from lib.xml import fill_object_contents, xml_to_object
36
37
39 """The container for the molecular information.
40
41 The structural data object for this class is a container possessing a number of different arrays
42 corresponding to different structural information. These objects include:
43
44 - atom_num: The atom name.
45 - atom_name: The atom name.
46 - bonded: Each element an array of bonded atom indices.
47 - chain_id: The chain ID.
48 - element: The element symbol.
49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER).
50 - res_name: The residue name.
51 - res_num: The residue number.
52 - seg_id: The segment ID.
53 - x: The x coordinate of the atom.
54 - y: The y coordinate of the atom.
55 - z: The z coordinate of the atom.
56
57 All arrays should be of equal length so that an atom index can retrieve all the corresponding
58 data. Only the atom identification string is compulsory, all other arrays can contain None.
59 """
60
61
63 """Initialise the molecular container."""
64
65
66 self.atom_num = []
67
68
69 self.atom_name = []
70
71
72 self.bonded = []
73
74
75 self.chain_id = []
76
77
78 self.element = []
79
80
81 self.pdb_record = []
82
83
84 self.res_name = []
85
86
87 self.res_num = []
88
89
90 self.seg_id = []
91
92
93 self.x = []
94
95
96 self.y = []
97
98
99 self.z = []
100
101
103 """Find the atom index corresponding to the given atom number.
104
105 @param atom_num: The atom number to find the index of.
106 @type atom_num: int
107 @return: The atom index corresponding to the atom.
108 @rtype: int
109 """
110
111
112 for j in range(len(self.atom_num)):
113
114 if self.atom_num[j] == atom_num:
115 return j
116
117
119 """Try to determine the element from the PDB atom name.
120
121 @param atom_name: The PDB atom name.
122 @type atom_name: str
123 @return: The element name, or None if unsuccessful.
124 @rtype: str or None
125 """
126
127
128 element = atom_name.strip("'")
129
130
131 element = element.strip(digits)
132
133
134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'],
135 'N': ['ND', 'NE', 'NH', 'NZ'],
136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'],
137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'],
138 'S': ['SD', 'SG']
139 }
140
141
142 table['H'] += ['QA', 'QB', 'QD', 'QE', 'QG', 'QH', 'QQD', 'QQG', 'QR', 'QZ']
143
144
145 for key in table:
146 if element in table[key]:
147 element = key
148 break
149
150
151 if periodic_table.has_element(symbol=element):
152 return element
153
154
155 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))
156
157
159 """Parse the Gaussian log record string and return an array of the corresponding atomic information.
160
161 The format of the Gaussian log records is::
162 __________________________________________________________________________________________
163 | | | |
164 | Columns | Data type | Description |
165 |_________|______________|_______________________________________________________________|
166 | | | |
167 | 1 | int | "Center Number" - the sequential atom number. |
168 | 2 | int | "Atomic Number" - the atomic number. |
169 | 3 | int | "Atomic Type" - the atomic type? |
170 | 4 | float | X coordinate in Angstrom |
171 | 5 | float | Y coordinate in Angstrom |
172 | 6 | float | Z coordinate in Angstrom |
173 |_________|______________|_______________________________________________________________|
174
175
176 @param record: The single line Gaussian record.
177 @type record: str
178 @return: The list of atomic information
179 @rtype: list of str
180 """
181
182
183 if search("---------", record):
184 return None
185 if search("Center", record):
186 return None
187 if search("Number", record):
188 return None
189
190
191 word = record.split()
192
193
194 if len(word) == 6:
195
196 atom_number = int(word[0])
197 atomic_num = int(word[1])
198 x = float(word[3])
199 y = float(word[4])
200 z = float(word[5])
201
202
203 return atom_number, atomic_num, x, y, z
204
205
207 """Parse the XYZ record string and return an array of the corresponding atomic information.
208
209 The format of the XYZ records is::
210 __________________________________________________________________________________________
211 | | | | |
212 | Columns | Data type | Field | Definition |
213 |_________|______________|______________|________________________________________________|
214 | | | | |
215 | 1 | String | element | |
216 | 2 | Real | x | Orthogonal coordinates for X in Angstroms |
217 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms |
218 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms |
219 |_________|______________|______________|________________________________________________|
220
221
222 @param record: The single line PDB record.
223 @type record: str
224 @return: The list of atomic information
225 @rtype: list of str
226 """
227
228
229 fields = []
230 word = record.split()
231
232
233 if len(word)==4:
234
235 fields.append(word[0])
236 fields.append(word[1])
237 fields.append(word[2])
238 fields.append(word[3])
239
240
241 for i in range(len(fields)):
242
243 fields[i] = fields[i].strip()
244
245
246 if fields[i] == '':
247 fields[i] = None
248
249
250 if fields[1]:
251 fields[1] = float(fields[1])
252 if fields[2]:
253 fields[2] = float(fields[2])
254 if fields[3]:
255 fields[3] = float(fields[3])
256
257
258 return fields
259
260
262 """Sort all structural data."""
263
264
265 indices = list(range(len(self.atom_name)))
266 indices.sort(key=self._sort_key)
267
268
269 self.atom_num = [self.atom_num[i] for i in indices]
270 self.atom_name = [self.atom_name[i] for i in indices]
271 self.bonded = [self.bonded[i] for i in indices]
272 self.chain_id = [self.chain_id[i] for i in indices]
273 self.element = [self.element[i] for i in indices]
274 self.pdb_record = [self.pdb_record[i] for i in indices]
275 self.res_name = [self.res_name[i] for i in indices]
276 self.res_num = [self.res_num[i] for i in indices]
277 self.seg_id = [self.seg_id[i] for i in indices]
278 self.x = [self.x[i] for i in indices]
279 self.y = [self.y[i] for i in indices]
280 self.z = [self.z[i] for i in indices]
281
282
283 for i in range(len(self.bonded)):
284 for j in range(len(self.bonded[i])):
285 self.bonded[i][j] = indices.index(self.bonded[i][j])
286
287
289 """Return the information for sorting the sequence data."""
290
291
292 if self.res_num[i] == None:
293 return 0
294
295
296 return self.res_num[i]
297
298
299 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
300 """Method for adding an atom to the structural data object.
301
302 This method will create the key-value pair for the given atom.
303
304
305 @keyword atom_name: The atom name, e.g. 'H1'.
306 @type atom_name: str or None
307 @keyword res_name: The residue name.
308 @type res_name: str or None
309 @keyword res_num: The residue number.
310 @type res_num: int or None
311 @keyword pos: The position vector of coordinates.
312 @type pos: list (length = 3)
313 @keyword element: The element symbol.
314 @type element: str or None
315 @keyword atom_num: The atom number.
316 @type atom_num: int or None
317 @keyword chain_id: The chain identifier.
318 @type chain_id: str or None
319 @keyword segment_id: The segment identifier.
320 @type segment_id: str or None
321 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
322 @type pdb_record: str or None
323 @return: The index of the added atom.
324 @rtype: int
325 """
326
327
328 self.atom_num.append(atom_num)
329 self.atom_name.append(atom_name)
330 self.bonded.append([])
331 self.chain_id.append(chain_id)
332 self.element.append(element)
333 self.pdb_record.append(pdb_record)
334 self.res_name.append(res_name)
335 self.res_num.append(res_num)
336 self.seg_id.append(segment_id)
337 self.x.append(pos[0])
338 self.y.append(pos[1])
339 self.z.append(pos[2])
340
341
342 return len(self.atom_num) - 1
343
344
346 """Method for connecting two atoms within the data structure object.
347
348 This method will append index2 to the array at bonded[index1] and vice versa.
349
350
351 @keyword index1: The index of the first atom.
352 @type index1: int
353 @keyword index2: The index of the second atom.
354 @type index2: int
355 """
356
357
358 if index2 not in self.bonded[index1]:
359 self.bonded[index1].append(index2)
360 if index1 not in self.bonded[index2]:
361 self.bonded[index2].append(index1)
362
363
365 """Method for generating a complete Structure_container object from the given Gaussian log records.
366
367 @param records: A list of structural Gaussian log records.
368 @type records: list of str
369 """
370
371
372 for record in records:
373
374 data = self._parse_gaussian_record(record)
375
376
377 if data == None:
378 continue
379
380
381 atom_number, atomic_num, x, y, z = data
382
383
384 atom_name = periodic_table.lookup_symbol(atomic_num)
385
386
387 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)
388
389
391 """Method for generating a complete Structure_container object from the given PDB records.
392
393 @param records: A list of structural PDB records.
394 @type records: list of str
395 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
396 @type alt_loc_select: str or None
397 @keyword reset_serial: A flag which if True will cause the first serial number (or atom number) to be reset to 1, and all other numbers shifted.
398 @type reset_serial: bool
399 """
400
401
402 water = []
403 missing_connect = []
404 number_offset = None
405 for record in records:
406
407 if not record or record == '\n':
408 continue
409
410
411 if record[:4] == 'ATOM' or record[:6] == 'HETATM':
412
413 if record[:4] == 'ATOM':
414 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record)
415 if record[:6] == 'HETATM':
416 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record)
417
418
419 if reset_serial:
420
421 if number_offset == None:
422 number_offset = serial - 1
423
424
425 serial -= number_offset
426
427
428 if res_name == 'HOH':
429 water.append(res_seq)
430 continue
431
432
433 if alt_loc != None:
434
435 if alt_loc_select == None:
436 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.")
437
438
439 if alt_loc != alt_loc_select:
440 continue
441
442
443 if not element:
444 element = self._det_pdb_element(name)
445
446
447 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, res_num=res_seq, pos=[x, y, z], element=element)
448
449
450 if record[:6] == 'CONECT':
451
452 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record)
453
454
455 if reset_serial:
456 serial -= number_offset
457 bonded1 -= number_offset
458 if bonded2 != None:
459 bonded2 -= number_offset
460 if bonded3 != None:
461 bonded3 -= number_offset
462 if bonded4 != None:
463 bonded4 -= number_offset
464
465
466 for bonded in [bonded1, bonded2, bonded3, bonded4]:
467
468 if not bonded:
469 continue
470
471
472 serial_index = self._atom_index(serial)
473 bonded_index = self._atom_index(bonded)
474
475
476 if serial_index == None:
477 if serial not in missing_connect:
478 missing_connect.append(serial)
479 continue
480 if bonded_index == None:
481 if bonded not in missing_connect:
482 missing_connect.append(bonded)
483 continue
484
485
486 self.atom_connect(index1=serial_index, index2=bonded_index)
487
488
489 if len(missing_connect):
490 missing_connect.sort()
491 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect))
492 if len(water):
493 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))
494
495
497 """Method for generating a complete Structure_container object from the given xyz records.
498
499 @param records: A list of structural xyz records.
500 @type records: list of str
501 """
502
503
504 atom_number = 1
505
506
507 for record in records:
508
509 record = self._parse_xyz_record(record)
510
511
512 if not record:
513 continue
514
515
516 if len(record) == 4:
517
518 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0])
519
520
521 atom_number = atom_number + 1
522
523
524 - def from_xml(self, mol_node, file_version=1):
525 """Recreate the MolContainer from the XML molecule node.
526
527 @param mol_node: The molecule XML node.
528 @type mol_node: xml.dom.minicompat.NodeList instance
529 @keyword file_version: The relax XML version of the XML file.
530 @type file_version: int
531 """
532
533
534 xml_to_object(mol_node, self, file_version=file_version)
535
536
538 """Check if the container is empty."""
539
540
541 if hasattr(self, 'file_name'): return False
542 if hasattr(self, 'file_path'): return False
543 if hasattr(self, 'file_mol_num'): return False
544 if hasattr(self, 'file_model'): return False
545
546
547 if not self.atom_num == []: return False
548 if not self.atom_name == []: return False
549 if not self.bonded == []: return False
550 if not self.chain_id == []: return False
551 if not self.element == []: return False
552 if not self.pdb_record == []: return False
553 if not self.res_name == []: return False
554 if not self.res_num == []: return False
555 if not self.seg_id == []: return False
556 if not self.x == []: return False
557 if not self.y == []: return False
558 if not self.z == []: return False
559
560
561 return True
562
563
565 """Return the number of the last residue.
566
567 @return: The last residue number.
568 @rtype: int
569 """
570
571
572 if not len(self.res_num):
573 return 0
574
575
576 return self.res_num[-1]
577
578
580 """Generator method for looping over the individual residues of the molecule.
581
582 @return: The residue name and number.
583 @rtype: str, int
584 """
585
586
587 last_res = None
588 for i in range(len(self.atom_num)):
589
590 if self.res_num[i] != last_res:
591 last_res = self.res_num[i]
592 yield self.res_name[i], self.res_num[i]
593
594
595 - def merge(self, mol_cont=None):
596 """Merge the contents of the given molecule container into here.
597
598 @keyword mol_cont: The data structure for the molecule to merge.
599 @type mol_cont: MolContainer instance
600 """
601
602
603 curr_index = len(self.atom_num)
604
605
606 for i in range(len(mol_cont.atom_num)):
607
608 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i])
609
610
611 for j in range(len(mol_cont.bonded[i])):
612 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)
613
614
615 - def to_xml(self, doc, element):
616 """Create XML elements for the contents of this molecule container.
617
618 @param doc: The XML document object.
619 @type doc: xml.dom.minidom.Document instance
620 @param element: The element to add the molecule XML elements to.
621 @type element: XML element object
622 """
623
624
625 mol_element = doc.createElement('mol_cont')
626 element.appendChild(mol_element)
627
628
629 mol_element.setAttribute('desc', 'Molecule container')
630 mol_element.setAttribute('name', str(self.mol_name))
631
632
633 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))
634
635
636
638 """List type data container for holding the different molecules of one model."""
639
641 """The string representation of the object.
642
643 Rather than using the standard Python conventions (either the string representation of the
644 value or the "<...desc...>" notation), a rich-formatted description of the object is given.
645 """
646
647 text = "Molecules.\n\n"
648 text = text + "%-8s%-8s" % ("Index", "Name") + "\n"
649 for i in range(len(self)):
650 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n"
651 return text
652
653
654 - def add_item(self, mol_name=None, mol_cont=None):
655 """Append the given MolContainer instance to the MolList.
656
657 @keyword mol_name: The molecule number.
658 @type mol_name: int
659 @keyword mol_cont: The data structure for the molecule.
660 @type mol_cont: MolContainer instance
661 @return: The new molecule container.
662 @rtype: MolContainer instance
663 """
664
665
666 if len(self) and self.is_empty():
667 self[0].mol_name = mol_name
668
669
670 else:
671
672 for i in range(len(self)):
673 if self[i].mol_name == mol_name:
674 raise RelaxError("The molecule '%s' already exists." % mol_name)
675
676
677 self.append(mol_cont)
678
679
680 self[-1].mol_name = mol_name
681
682
683 return self[-1]
684
685
687 """Method for testing if this MolList object is empty.
688
689 @return: True if this list only has one MolContainer and the molecule name has not
690 been set, False otherwise.
691 @rtype: bool
692 """
693
694
695 if len(self) == 0:
696 return True
697
698
699 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty():
700 return True
701
702
703 return False
704
705
706 - def from_xml(self, mol_nodes, file_version=1):
707 """Recreate a molecule list data structure from the XML molecule nodes.
708
709 @param mol_nodes: The molecule XML nodes.
710 @type mol_nodes: xml.dom.minicompat.NodeList instance
711 @keyword file_version: The relax XML version of the XML file.
712 @type file_version: int
713 """
714
715
716 if not self.is_empty():
717 raise RelaxFromXMLNotEmptyError(self.__class__.__name__)
718
719
720 for mol_node in mol_nodes:
721
722 mol_cont = MolContainer()
723
724
725 name = mol_node.getAttribute('name')
726 if name == 'None':
727 name = None
728
729
730 self.add_item(mol_name=name, mol_cont=mol_cont)
731
732
733 self[-1].from_xml(mol_node, file_version=file_version)
734
735
736 - def merge_item(self, mol_name=None, mol_cont=None):
737 """Mege the given MolContainer instance into a pre-existing molecule container.
738
739 @keyword mol_name: The molecule number.
740 @type mol_name: int
741 @keyword mol_cont: The data structure for the molecule.
742 @type mol_cont: MolContainer instance
743 @return: The new molecule container.
744 @rtype: MolContainer instance
745 """
746
747
748 index = None
749 for i in range(len(self)):
750 if self[i].mol_name == mol_name:
751 index = i
752 break
753
754
755 if index == None:
756 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name)
757
758
759 self[index].merge(mol_cont)
760
761
762 return self[index]
763
764
765 - def to_xml(self, doc, element):
766 """Create XML elements for each molecule.
767
768 @param doc: The XML document object.
769 @type doc: xml.dom.minidom.Document instance
770 @param element: The element to add the molecule XML elements to.
771 @type element: XML element object
772 """
773
774
775 for i in range(len(self)):
776
777 self[i].to_xml(doc, element)
778