1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module containing the internal relax structural object."""
25
26
27 from numpy import array, dot, float64, linalg, zeros
28 import os
29 from os import F_OK, access
30 from re import search
31 from string import digits, split, strip, upper
32 from warnings import warn
33
34
35 from api_base import Base_struct_API, ModelList, Displacements
36 from data.relax_xml import fill_object_contents, xml_to_object
37 from generic_fns import pipes, relax_re
38 from generic_fns.mol_res_spin import spin_loop
39 from generic_fns.mol_res_spin import Selection
40 from relax_errors import RelaxError, RelaxNoneIntError, RelaxNoPdbError
41 from relax_io import file_root, open_read_file
42 from relax_warnings import RelaxWarning
43
44
45
47 """The internal relax structural data object.
48
49 The structural data object for this class is a container possessing a number of different arrays
50 corresponding to different structural information. These objects are described in the
51 structural container docstring.
52 """
53
54
55 id = 'internal'
56
57
59 """Find the atom named attached_atom directly bonded to the atom located at the index.
60
61 @param attached_atom: The name of the attached atom to return.
62 @type attached_atom: str
63 @param index: The index of the atom which the attached atom is attached to.
64 @type index: int
65 @param mol: The molecule container.
66 @type mol: MolContainer instance
67 @return: A tuple of information about the bonded atom.
68 @rtype: tuple consisting of the atom number (int), atom name (str), element
69 name (str), and atomic position (Numeric array of len 3)
70 """
71
72
73 bonded_found = False
74
75
76 if not mol.bonded[index]:
77
78 if not hasattr(mol, 'type'):
79 self._mol_type(mol)
80
81
82 if mol.type == 'protein':
83 self._protein_connect(mol)
84
85
86 else:
87 self._find_bonded_atoms(index, mol, radius=2)
88
89
90 matching_list = []
91 for bonded_index in mol.bonded[index]:
92 if relax_re.search(mol.atom_name[bonded_index], attached_atom):
93 matching_list.append(bonded_index)
94 num_attached = len(matching_list)
95
96
97 if num_attached > 1:
98
99 matching_names = []
100 for i in matching_list:
101 matching_names.append(mol.atom_name[i])
102
103
104 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names)
105
106
107 if num_attached == 0:
108 if relax_re.search('@*', attached_atom):
109 matching_list = []
110 bonded_num=[]
111 bonded_name=[]
112 element=[]
113 pos=[]
114 for spin, mol_name, res_num, res_name in spin_loop(selection=attached_atom, full_info=True):
115 bonded_num.append(spin.num)
116 bonded_name.append(spin.name)
117 element.append(spin.element)
118 pos.append(spin.pos)
119 if len(bonded_num) == 1:
120 return bonded_num[0], bonded_name[0], element[0], pos[0], attached_atom, None
121 elif len(bonded_num) > 1:
122
123 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names)
124 elif len(bonded_num) > 1:
125
126 return None, None, None, None, None, "No attached atom could be found"
127 else:
128 return None, None, None, None, None, "No attached atom could be found"
129
130
131 index = matching_list[0]
132 bonded_num = mol.atom_num[index]
133 bonded_name = mol.atom_name[index]
134 element = mol.element[index]
135 pos = [mol.x[index], mol.y[index], mol.z[index]]
136 attached_name = mol.atom_name[index]
137
138
139 return bonded_num, bonded_name, element, pos, attached_name, None
140
141
143 """Find all atoms within a sphere and say that they are attached to the central atom.
144
145 The found atoms will be added to the 'bonded' data structure.
146
147
148 @param index: The index of the central atom.
149 @type index: int
150 @param mol: The molecule container.
151 @type mol: MolContainer instance
152 """
153
154
155 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64)
156
157
158 dist_list = []
159 connect_list = {}
160 element_list = {}
161 for i in xrange(len(mol.atom_num)):
162
163 if mol.element[index] == 'H' and mol.element[i] == 'H':
164 continue
165
166
167 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64)
168
169
170 dist = linalg.norm(centre-pos)
171
172
173 if dist < radius:
174
175 dist_list.append(dist)
176
177
178 connect_list[dist] = i
179
180
181 element_list[dist] = mol.element[i]
182
183
184 max_conn = 1000
185 if mol.element[index] == 'H':
186 max_conn = 1
187 elif mol.element[index] == 'O':
188 max_conn = 2
189 elif mol.element[index] == 'N':
190 max_conn = 3
191 elif mol.element[index] == 'C':
192 max_conn = 4
193
194
195 dist_list.sort()
196
197
198 for i in range(min(max_conn, len(dist_list))):
199 mol.atom_connect(index, connect_list[dist_list[i]])
200
201
203 """Return the chemical name corresponding to the given residue ID.
204
205 The following names are currently returned::
206 ________________________________________________
207 | | |
208 | hetID | Chemical name |
209 |________|_____________________________________|
210 | | |
211 | TNS | Tensor |
212 | COM | Centre of mass |
213 | AXS | Tensor axes |
214 | SIM | Monte Carlo simulation tensor axes |
215 | PIV | Pivot point |
216 | CON | Cone object |
217 | AVE | Average vector |
218 |________|_____________________________________|
219
220 For any other residues, no description is returned.
221
222 @param hetID: The residue ID.
223 @type hetID: str
224 @return: The chemical name.
225 @rtype: str or None
226 """
227
228
229 if hetID == 'TNS':
230 return 'Tensor'
231
232
233 if hetID == 'COM':
234 return 'Centre of mass'
235
236
237 if hetID == 'AXS':
238 return 'Tensor axes'
239
240
241 if hetID == 'SIM':
242 return 'Monte Carlo simulation tensor axes'
243
244
245 if hetID == 'PIV':
246 return 'Pivot point'
247
248
249 if hetID == 'CON':
250 return 'Cone'
251
252
253 if hetID == 'AVE':
254 return 'Average vector'
255
256
258 """Generator function for looping over the models in the PDB file.
259
260 @param file_path: The full path of the PDB file.
261 @type file_path: str
262 @return: The model number and all the records for that model.
263 @rtype: tuple of int and array of str
264 """
265
266
267 file = open_read_file(file_path)
268 lines = file.readlines()
269 file.close()
270
271
272 if lines == []:
273 raise RelaxError("The PDB file is empty.")
274
275
276 model = None
277 records = []
278
279
280 for i in xrange(len(lines)):
281
282 if search('^MODEL', lines[i]):
283 try:
284 model = int(split(lines[i])[1])
285 except:
286 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.")
287
288
289 if not (search('^ATOM', lines[i]) or search('^HETATM', lines[i])) and not len(records):
290 continue
291
292
293 if search('^ENDMDL', lines[i]):
294
295 yield model, records
296
297
298 records = []
299
300
301 continue
302
303
304 records.append(lines[i])
305
306
307 if len(records):
308 yield model, records
309
310
312 """Generator function for looping over the models in the XYZ file.
313
314 @param file_path: The full path of the XYZ file.
315 @type file_path: str
316 @return: The model number and all the records for that model.
317 @rtype: tuple of int and array of str
318 """
319
320
321 file = open_read_file(file_path)
322 lines = file.readlines()
323 file.close()
324
325
326 if lines == []:
327 raise RelaxError("The XYZ file is empty.")
328
329
330 total_atom = 0
331 model = 0
332 records = []
333
334
335 for i in xrange(len(lines)):
336 num=0
337 word=split(lines[i])
338
339 if (i==0) and (len(word)==1):
340 try:
341 total_atom = int(word[0])
342 num = 1
343 except:
344 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.")
345
346
347 if (len(records) == total_atom):
348
349 yield records
350
351
352 records = []
353
354
355 if (len(word) != 4):
356 continue
357
358
359 records.append(lines[i])
360
361
362 if len(records):
363 yield records
364
365
367 """Generator function for looping over the molecules in the PDB records of a model.
368
369 @param records: The list of PDB records for the model, or if no models exist the entire
370 PDB file.
371 @type records: list of str
372 @return: The molecule number and all the records for that molecule.
373 @rtype: tuple of int and list of str
374 """
375
376
377 if records == []:
378 raise RelaxError("There are no PDB records for this model.")
379
380
381 mol_num = 1
382 mol_records = []
383 end = False
384
385
386 for i in range(len(records)):
387
388 if search('^END', records[i]):
389 break
390
391
392 if search('^ENDMDL', records[i]):
393 end = True
394
395
396 elif i < len(records)-1 and search('^TER', records[i]) and not search('^HETATM', records[i+1]):
397 end = True
398
399
400 elif i < len(records)-1 and search('^HETATM', records[i]) and search('^ATOM', records[i+1]):
401 end = True
402
403
404 if end:
405
406 yield mol_num, mol_records
407
408
409 mol_records = []
410
411
412 mol_num = mol_num + 1
413
414
415 end = False
416
417
418 continue
419
420
421 mol_records.append(records[i])
422
423
424 if len(mol_records):
425 yield mol_num, mol_records
426
427
429 """Check the validity of the data arrays in the given structure object.
430
431 @param struct: The structural object.
432 @type struct: Structure_container instance
433 """
434
435
436 num = len(struct.atom_name)
437
438
439 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num:
440 raise RelaxError("The structural data is invalid.")
441
442
444 """Determine the type of molecule.
445
446 @param mol: The molecule data container.
447 @type mol: MolContainer instance
448 """
449
450
451 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
452
453
454 mol.type = 'other'
455
456
457 for res in mol.res_name:
458
459 if res in aa:
460
461 mol.type = 'protein'
462 return
463
464
466 """Set up the connectivities for the protein.
467
468 @param mol: The molecule data container.
469 @type mol: MolContainer instance
470 """
471
472
473 curr_res_num = None
474 res_atoms = []
475
476
477 for i in range(len(mol.atom_num)):
478
479 if mol.res_num[i] != curr_res_num:
480
481 if len(res_atoms):
482 self._protein_intra_connect(mol, res_atoms)
483
484
485 curr_res_num = mol.res_num[i]
486
487
488 res_atoms = []
489
490
491 res_atoms.append(i)
492
493
494 if i == len(mol.atom_num) - 1 and len(res_atoms):
495 self._protein_intra_connect(mol, res_atoms)
496
497
499 """Set up the connectivities for the protein.
500
501 @param mol: The molecule data container.
502 @type mol: MolContainer instance
503 @param res_atoms: The list of atom indices corresponding to the residue.
504 @type res_atoms: list of int
505 """
506
507
508 indices = {
509 'N': None,
510 'C': None,
511 'O': None,
512 'CA': None,
513 'HN': None,
514 'H': None,
515 'HA': None
516 }
517
518
519 for index in res_atoms:
520 if indices.has_key(mol.atom_name[index]):
521 indices[mol.atom_name[index]] = index
522
523
524 pairs = [
525 ['N', 'HN'],
526 ['N', 'H'],
527 ['N', 'CA'],
528 ['CA', 'HA'],
529 ['CA', 'C'],
530 ['C', 'O']
531 ]
532
533
534 for pair in pairs:
535 if indices[pair[0]] != None and indices[pair[1]] != None:
536 mol.atom_connect(indices[pair[0]], indices[pair[1]])
537
538
540 """Convert the data into a format for writing to file.
541
542 @param data: The data to convert to the required format.
543 @type data: anything
544 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'.
545 @type format: str
546 @return: The converted version of the data.
547 @rtype: str
548 """
549
550
551 if format == 'str':
552
553 if data == None:
554 data = ''
555
556
557 if not isinstance(data, str):
558 data = repr(data)
559
560
561 if format == 'float':
562
563 if data == None:
564 data = 0.0
565
566
567 if not isinstance(data, float):
568 data = float(data)
569
570
571 return data
572
573
574 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
575 """Add a new atom to the structural data object.
576
577 @keyword mol_name: The name of the molecule.
578 @type mol_name: str
579 @keyword atom_name: The atom name, e.g. 'H1'.
580 @type atom_name: str or None
581 @keyword res_name: The residue name.
582 @type res_name: str or None
583 @keyword res_num: The residue number.
584 @type res_num: int or None
585 @keyword pos: The position vector of coordinates.
586 @type pos: list (length = 3)
587 @keyword element: The element symbol.
588 @type element: str or None
589 @keyword atom_num: The atom number.
590 @type atom_num: int or None
591 @keyword chain_id: The chain identifier.
592 @type chain_id: str or None
593 @keyword segment_id: The segment identifier.
594 @type segment_id: str or None
595 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
596 @type pdb_record: str or None
597 """
598
599
600 pipes.test()
601
602
603 if len(self.structural_data) == 0:
604 self.add_model()
605
606
607 for model in self.structural_data:
608
609 mol = self.get_molecule(mol_name, model=model.num)
610
611
612 if mol == None:
613 self.add_molecule(name=mol_name)
614 mol = self.get_molecule(mol_name, model=model.num)
615
616
617 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)
618
619
620 - def add_model(self, model=None, coords_from=None):
621 """Add a new model to the store.
622
623 The new model will be constructured with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used.
624
625 @keyword model: The number of the model to create.
626 @type model: int or None
627 @keyword coords_from: The model number to take the coordinates from.
628 @type coords_from: int or None
629 @return: The model container.
630 @rtype: ModelContainer instance
631 """
632
633
634 if model != None:
635 for i in range(len(self.structural_data)):
636 if model == self.structural_data[i].num:
637 raise RelaxError("The model '%s' already exists." % model)
638
639
640 self.structural_data.add_item(model_num=model)
641
642
643 if coords_from == None:
644 coords_from = self.structural_data[0].num
645
646
647 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in self.atom_loop(model_num=coords_from, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True):
648
649 self.add_atom(self, mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num)
650
651
652 return self.structural_data[-1]
653
654
656 """Add a new molecule to the store.
657
658 @keyword name: The molecule identifier string.
659 @type name: str
660 """
661
662
663 if len(self.structural_data) == 0:
664 self.add_model()
665
666
667 for i in range(len(self.structural_data)):
668
669 self.structural_data[i].mol.add_item(mol_name=name, mol_cont=MolContainer())
670
671
672 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False):
673 """Generator function for looping over all atoms in the internal relax structural object.
674
675 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded.
676 @type atom_id: str
677 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over.
678 @type str_id: str, int, or None
679 @keyword model_num: Only loop over a specific model.
680 @type model_num: int or None
681 @keyword model_num_flag: A flag which if True will cause the model number to be yielded.
682 @type model_num_flag: bool
683 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded.
684 @type mol_name_flag: bool
685 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded.
686 @type res_num_flag: bool
687 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded.
688 @type res_name_flag: bool
689 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded.
690 @type atom_num_flag: bool
691 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded.
692 @type atom_name_flag: bool
693 @keyword element_flag: A flag which if True will cause the element name to be yielded.
694 @type element_flag: bool
695 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded.
696 @type pos_flag: bool
697 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures.
698 @type ave: bool
699 @return: A tuple of atomic information, as described in the docstring.
700 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3).
701 """
702
703
704 if not len(self.structural_data):
705 raise RelaxNoPdbError
706
707
708 sel_obj = None
709 if atom_id:
710 sel_obj = Selection(atom_id)
711
712
713 for model in self.model_loop(model_num):
714
715 for mol_index in range(len(model.mol)):
716 mol = model.mol[mol_index]
717
718
719 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
720 continue
721
722
723 for i in xrange(len(mol.atom_name)):
724
725 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name):
726 continue
727
728
729 res_num = mol.res_num[i]
730 res_name = mol.res_name[i]
731 atom_num = mol.atom_num[i]
732 atom_name = mol.atom_name[i]
733 element = mol.element[i]
734 pos = zeros(3, float64)
735
736
737 if ave:
738
739 for model in self.model_loop():
740
741 mol = model.mol[mol_index]
742
743
744 if mol.atom_num[i] != atom_num:
745 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.")
746
747
748 pos = pos + array([mol.x[i], mol.y[i], mol.z[i]], float64)
749
750
751 pos = pos / len(self.structural_data)
752 else:
753 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64)
754
755
756 mol_name = mol.mol_name
757
758
759 atomic_tuple = ()
760 if model_num_flag:
761 if ave:
762 atomic_tuple = atomic_tuple + (None,)
763 else:
764 atomic_tuple = atomic_tuple + (model.num,)
765 if mol_name_flag:
766 atomic_tuple = atomic_tuple + (mol_name,)
767 if res_num_flag:
768 atomic_tuple = atomic_tuple + (res_num,)
769 if res_name_flag:
770 atomic_tuple = atomic_tuple + (res_name,)
771 if atom_num_flag:
772 atomic_tuple = atomic_tuple + (atom_num,)
773 if atom_name_flag:
774 atomic_tuple = atomic_tuple + (atom_name,)
775 if element_flag:
776 atomic_tuple = atomic_tuple + (element,)
777 if pos_flag:
778 atomic_tuple = atomic_tuple + (pos,)
779
780
781 yield atomic_tuple
782
783
784 if ave:
785 break
786
787
788 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):
789 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'.
790
791 @keyword attached_atom: The name of the bonded atom.
792 @type attached_atom: str
793 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned.
794 @type model_num: None or int
795 @keyword mol_name: The name of the molecule that attached_atom belongs to.
796 @type mol_name: str
797 @keyword res_num: The number of the residue that attached_atom belongs to.
798 @type res_num: str
799 @keyword res_name: The name of the residue that attached_atom belongs to.
800 @type res_name: str
801 @keyword spin_num: The number of the spin that attached_atom is attached to.
802 @type spin_num: str
803 @keyword spin_name: The name of the spin that attached_atom is attached to.
804 @type spin_name: str
805 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors.
806 @type return_name: bool
807 @keyword return_warnings: A flag which if True will cause warning messages to be returned.
808 @type return_warnings: bool
809 @return: The list of bond vectors for each model.
810 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set)
811 """
812
813
814 vectors = []
815 attached_name = None
816 warnings = None
817
818
819 for model in self.model_loop(model_num):
820
821 for mol in model.mol:
822
823 if mol_name and mol_name != mol.mol_name:
824 continue
825
826
827 index = None
828 for i in range(len(mol.atom_name)):
829
830 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name):
831 continue
832
833
834 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name):
835 continue
836
837
838 index = i
839 break
840
841
842 if index != None:
843
844 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol)
845
846
847 if (bonded_num, bonded_name, element) == (None, None, None):
848 continue
849
850
851 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64)
852
853
854 vectors.append(vector)
855
856
857 else:
858 warnings = "Cannot find the atom in the structure"
859
860
861 data = (vectors,)
862 if return_name:
863 data = data + (attached_name,)
864 if return_warnings:
865 data = data + (warnings,)
866
867
868 return data
869
870
871 - def connect_atom(self, mol_name=None, index1=None, index2=None):
872 """Connect two atoms in the structural data object.
873
874 @keyword mol_name: The name of the molecule.
875 @type mol_name: str
876 @keyword index1: The global index of the first atom.
877 @type index1: str
878 @keyword index2: The global index of the first atom.
879 @type index2: str
880 """
881
882
883 pipes.test()
884
885
886 if self.get_molecule(mol_name) == None:
887 self.add_molecule(name=mol_name)
888
889
890 for model in self.structural_data:
891
892 mol = self.get_molecule(mol_name)
893
894
895 mol.atom_connect(index1=index1, index2=index2)
896
897
899 """Delete all the structural information."""
900
901
902 print("Deleting the following structural data:\n")
903 print(self.structural_data)
904
905
906 del self.structural_data
907
908
909 self.structural_data = ModelList()
910
911
913 """Return the molecule.
914
915 Only one model can be specified.
916
917
918 @param molecule: The molecule name.
919 @type molecule: int or None
920 @keyword model: The model number.
921 @type model: int or None
922 @raises RelaxError: If the model is not specified and there is more than one model loaded.
923 @return: The MolContainer corresponding to the molecule name and model number.
924 @rtype: MolContainer instance or None
925 """
926
927
928 if model == None and self.num_models() > 1:
929 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models())
930
931
932 if not isinstance(model, int) and not model == None:
933 raise RelaxNoneIntError
934
935
936 if not len(self.structural_data):
937 return
938
939
940 for model_cont in self.model_loop(model):
941
942 for mol in model_cont.mol:
943
944 if mol.mol_name == molecule:
945 return mol
946
947
948 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):
949 """Method for loading structures from a PDB file.
950
951 @param file_path: The full path of the PDB file.
952 @type file_path: str
953 @keyword read_mol: The molecule(s) to read from the file, independent of model. The
954 molecules are determined differently by the different parsers, but
955 are numbered consecutively from 1. If set to None, then all
956 molecules will be loaded.
957 @type read_mol: None, int, or list of int
958 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None,
959 then the molecules will be automatically labelled based on the file
960 name or other information.
961 @type set_mol_name: None, str, or list of str
962 @keyword read_model: The PDB model to extract from the file. If set to None, then all
963 models will be loaded.
964 @type read_model: None, int, or list of int
965 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then
966 the PDB model numbers will be preserved, if they exist.
967 @type set_model_num: None, int, or list of int
968 @keyword verbosity: A flag which if True will cause messages to be printed.
969 @type verbosity: bool
970 @return: The status of the loading of the PDB file.
971 @rtype: bool
972 """
973
974
975 if verbosity:
976 print("\nInternal relax PDB parser.")
977
978
979 if not access(file_path, F_OK):
980
981 return False
982
983
984 path, file = os.path.split(file_path)
985
986
987 if read_mol and not isinstance(read_mol, list):
988 read_mol = [read_mol]
989 if set_mol_name and not isinstance(set_mol_name, list):
990 set_mol_name = [set_mol_name]
991 if read_model and not isinstance(read_model, list):
992 read_model = [read_model]
993 if set_model_num and not isinstance(set_model_num, list):
994 set_model_num = [set_model_num]
995
996
997 model_index = 0
998 orig_model_num = []
999 mol_conts = []
1000 for model_num, model_records in self._parse_models_pdb(file_path):
1001
1002 if read_model and model_num not in read_model:
1003 continue
1004
1005
1006 orig_model_num.append(model_num)
1007
1008
1009 mol_conts.append([])
1010 mol_index = 0
1011 orig_mol_num = []
1012 new_mol_name = []
1013 for mol_num, mol_records in self._parse_mols(model_records):
1014
1015 if read_mol and mol_num not in read_mol:
1016 continue
1017
1018
1019 if set_mol_name:
1020 new_mol_name.append(set_mol_name[mol_index])
1021 else:
1022
1023 num_struct = 0
1024 for model in self.structural_data:
1025 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == model.num):
1026 num_struct = len(model.mol)
1027
1028
1029 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct))
1030
1031
1032 orig_mol_num.append(mol_num)
1033
1034
1035 mol = MolContainer()
1036
1037
1038 mol.fill_object_from_pdb(mol_records)
1039
1040
1041 mol_conts[model_index].append(mol)
1042
1043
1044 mol_index = mol_index + 1
1045
1046
1047 model_index = model_index + 1
1048
1049
1050 if not len(mol_conts):
1051 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path))
1052 return False
1053
1054
1055 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path)
1056
1057
1058 return True
1059
1060
1061 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):
1062 """Method for loading structures from a XYZ file.
1063
1064 @param file_path: The full path of the XYZ file.
1065 @type file_path: str
1066 @keyword read_mol: The molecule(s) to read from the file, independent of model. The
1067 molecules are determined differently by the different parsers, but
1068 are numbered consecutively from 1. If set to None, then all
1069 molecules will be loaded.
1070 @type read_mol: None, int, or list of int
1071 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None,
1072 then the molecules will be automatically labelled based on the file
1073 name or other information.
1074 @type set_mol_name: None, str, or list of str
1075 @keyword read_model: The XYZ model to extract from the file. If set to None, then all
1076 models will be loaded.
1077 @type read_model: None, int, or list of int
1078 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then
1079 the XYZ model numbers will be preserved, if they exist.
1080 @type set_model_num: None, int, or list of int
1081 @keyword verbosity: A flag which if True will cause messages to be printed.
1082 @type verbosity: bool
1083 @return: The status of the loading of the XYZ file.
1084 @rtype: bool
1085 """
1086
1087
1088 if verbosity:
1089 print("\nInternal relax XYZ parser.")
1090
1091
1092 if not access(file_path, F_OK):
1093
1094 return False
1095
1096
1097 path, file = os.path.split(file_path)
1098
1099
1100 if read_mol and not isinstance(read_mol, list):
1101 read_mol = [read_mol]
1102 if set_mol_name and not isinstance(set_mol_name, list):
1103 set_mol_name = [set_mol_name]
1104 if read_model and not isinstance(read_model, list):
1105 read_model = [read_model]
1106 if set_model_num and not isinstance(set_model_num, list):
1107 set_model_num = [set_model_num]
1108
1109
1110 mol_index=0
1111 model_index = 0
1112 xyz_model_increment = 0
1113 orig_model_num = []
1114 mol_conts = []
1115 orig_mol_num = []
1116 new_mol_name = []
1117 for model_records in self._parse_models_xyz(file_path):
1118
1119 xyz_model_increment = xyz_model_increment +1
1120
1121
1122 if read_model and xyz_model_increment not in read_model:
1123 continue
1124
1125
1126 orig_model_num.append(model_index)
1127
1128
1129 if read_mol and mol_index not in read_mol:
1130 continue
1131
1132
1133 if set_mol_name:
1134 new_mol_name.append(set_mol_name[mol_index])
1135 else:
1136 if mol_index==0:
1137
1138 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1))
1139
1140
1141 orig_mol_num.append(mol_index)
1142
1143
1144 mol = MolContainer()
1145
1146
1147 mol.fill_object_from_xyz(model_records)
1148
1149
1150 mol_conts.append([])
1151 mol_conts[model_index].append(mol)
1152
1153
1154 mol_index = mol_index + 1
1155
1156
1157 model_index = model_index + 1
1158
1159 orig_mol_num=[0]
1160
1161 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path)
1162
1163
1164 return True
1165
1166
1167 - def rotate(self, R=None, origin=None, model=None, atom_id=None):
1168 """Rotate the structural information about the given origin.
1169
1170 @keyword R: The forwards rotation matrix.
1171 @type R: numpy 3D, rank-2 array
1172 @keyword origin: The origin of the rotation.
1173 @type origin: numpy 3D, rank-1 array
1174 @keyword model: The model to rotate. If None, all models will be rotated.
1175 @type model: int
1176 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1177 @type atom_id: str or None
1178 """
1179
1180
1181 sel_obj = None
1182 if atom_id:
1183 sel_obj = Selection(atom_id)
1184
1185
1186 for model_cont in self.model_loop(model):
1187
1188 for mol in model_cont.mol:
1189
1190 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
1191 continue
1192
1193
1194 for i in range(len(mol.atom_num)):
1195
1196 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name):
1197 continue
1198
1199
1200 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin
1201
1202
1203 rot_vect = dot(R, vect)
1204
1205
1206 pos = rot_vect + origin
1207 mol.x[i] = pos[0]
1208 mol.y[i] = pos[1]
1209 mol.z[i] = pos[2]
1210
1211
1212 - def translate(self, T=None, model=None, atom_id=None):
1213 """Displace the structural information by the given translation vector.
1214
1215 @keyword T: The translation vector.
1216 @type T: numpy 3D, rank-1 array
1217 @keyword model: The model to rotate. If None, all models will be rotated.
1218 @type model: int
1219 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1220 @type atom_id: str or None
1221 """
1222
1223
1224 sel_obj = None
1225 if atom_id:
1226 sel_obj = Selection(atom_id)
1227
1228
1229 for model_cont in self.model_loop(model):
1230
1231 for mol in model_cont.mol:
1232
1233 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
1234 continue
1235
1236
1237 for i in range(len(mol.atom_num)):
1238
1239 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name):
1240 continue
1241
1242
1243 mol.x[i] = mol.x[i] + T[0]
1244 mol.y[i] = mol.y[i] + T[1]
1245 mol.z[i] = mol.z[i] + T[2]
1246
1247
1249 """Check that the models are consistent with each other.
1250
1251 This checks that the primary structure is identical between the models.
1252 """
1253
1254
1255 print("Validating models:")
1256
1257
1258 for i in range(len(self.structural_data)):
1259
1260 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol):
1261 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol)))
1262
1263
1264 for j in range(len(self.structural_data[i].mol)):
1265
1266 mol = self.structural_data[i].mol[j]
1267 mol_ref = self.structural_data[0].mol[j]
1268
1269
1270 if mol.mol_name != mol_ref.mol_name:
1271 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name))
1272
1273
1274 for k in range(len(mol.atom_name)):
1275
1276 atom = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '')
1277 atom_ref = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '')
1278
1279
1280 if atom != atom_ref:
1281 print(atom)
1282 print(atom_ref)
1283 raise RelaxError("The atoms of model %i do not match the first model." % self.structural_data[i].num)
1284
1285
1286 print("\tAll models are consistent")
1287
1288
1290 """Method for the creation of a PDB file from the structural data.
1291
1292 A number of PDB records including HET, HETNAM, FORMUL, HETATM, TER, CONECT, MASTER, and END
1293 are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data
1294 structure 'het_data' is created. It is an array of arrays where the first dimension
1295 corresponds to a different residue and the second dimension has the elements:
1296
1297 0. Residue number.
1298 1. Residue name.
1299 2. Chain ID.
1300 3. Total number of atoms in the residue.
1301 4. Number of H atoms in the residue.
1302 5. Number of C atoms in the residue.
1303
1304
1305 @param file: The PDB file object. This object must be writable.
1306 @type file: file object
1307 @keyword model_num: The model to place into the PDB file. If not supplied, then all
1308 models will be placed into the file.
1309 @type model_num: None or int
1310 """
1311
1312
1313 self.validate()
1314
1315
1316 num_hetatm = 0
1317 num_atom = 0
1318 num_ter = 0
1319 num_conect = 0
1320
1321
1322 print("\nCreating the PDB records\n")
1323
1324
1325 print("REMARK")
1326 file.write("REMARK 4 THIS FILE COMPLIES WITH FORMAT V. 3.1, 1-AUG-2007\n")
1327 file.write("REMARK 40 CREATED BY RELAX (HTTP://NMR-RELAX.COM)\n")
1328 num_remark = 2
1329
1330
1331 model_records = False
1332 for model in self.model_loop():
1333 if hasattr(model, 'num') and model.num != None:
1334 model_records = True
1335
1336
1337
1338
1339
1340
1341
1342 het_data = []
1343 het_data_coll = []
1344
1345
1346 index = 0
1347 for mol in self.structural_data[0].mol:
1348
1349 self._validate_data_arrays(mol)
1350
1351
1352 het_data.append([])
1353
1354
1355 for i in xrange(len(mol.atom_name)):
1356
1357 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None:
1358 continue
1359
1360
1361
1362 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]:
1363 het_data[index].append([mol.res_num[i], mol.res_name[i], mol.chain_id[i], 0, []])
1364
1365
1366 if het_data[index][-1][2] == None:
1367 het_data[index][-1][2] = ''
1368
1369
1370 het_data[index][-1][3] = het_data[index][-1][3] + 1
1371
1372
1373 entry = False
1374 for j in xrange(len(het_data[index][-1][4])):
1375 if mol.element[i] == het_data[index][-1][4][j][0]:
1376 entry = True
1377
1378
1379 if not entry:
1380 het_data[index][-1][4].append([mol.element[i], 0])
1381
1382
1383 for j in xrange(len(het_data[index][-1][4])):
1384 if mol.element[i] == het_data[index][-1][4][j][0]:
1385 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1
1386
1387
1388 for i in xrange(len(het_data[index])):
1389
1390 found = False
1391 for j in xrange(len(het_data_coll)):
1392
1393 if het_data[index][i][0] == het_data_coll[j][0]:
1394
1395 found = True
1396
1397
1398 if het_data_coll[j][1] != het_data[index][i][1]:
1399 raise RelaxError("The " + repr(het_data[index][i][1]) + " residue name of hetrogen " + repr(het_data[index][i][0]) + " " + het_data[index][i][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][1]) + " name of the previous structures.")
1400
1401 elif het_data_coll[j][2] != het_data[index][i][2]:
1402 raise RelaxError("The hetrogen chain id " + repr(het_data[index][i][2]) + " does not match " + repr(het_data_coll[j][2]) + " of residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of the previous structures.")
1403
1404 elif het_data_coll[j][3] != het_data[index][i][3]:
1405 raise RelaxError("The " + repr(het_data[index][i][3]) + " atoms of hetrogen " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][3]) + " of the previous structures.")
1406
1407 elif het_data_coll[j][4] != het_data[index][i][4]:
1408 raise RelaxError("The atom counts " + repr(het_data[index][i][4]) + " for the hetrogen residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " do not match the counts " + repr(het_data_coll[j][4]) + " of the previous structures.")
1409
1410
1411 if not found:
1412 het_data_coll.append(het_data[index][i])
1413
1414
1415 index = index + 1
1416
1417
1418
1419
1420
1421
1422 print("HET")
1423
1424
1425 for het in het_data_coll:
1426 file.write("%-6s %3s %1s%4s%1s %5s %-40s\n" % ('HET', het[2], het[1], het[0], '', het[3], ''))
1427
1428
1429
1430
1431
1432
1433 print("HETNAM")
1434
1435
1436 residues = []
1437 for het in het_data_coll:
1438
1439 if het[1] in residues:
1440 continue
1441 else:
1442 residues.append(het[1])
1443
1444
1445 chemical_name = self._get_chemical_name(het[1])
1446 if not chemical_name:
1447 chemical_name = 'Unknown'
1448
1449
1450 file.write("%-6s %2s %3s %-55s\n" % ('HETNAM', '', het[1], chemical_name))
1451
1452
1453
1454
1455
1456
1457 print("FORMUL")
1458
1459
1460 residues = []
1461 for het in het_data_coll:
1462
1463 if het[1] in residues:
1464 continue
1465 else:
1466 residues.append(het[1])
1467
1468
1469 formula = ''
1470
1471
1472 for atom_count in het[4]:
1473 formula = formula + atom_count[0] + repr(atom_count[1])
1474
1475
1476 file.write("%-6s %2s %3s %2s%1s%-51s\n" % ('FORMUL', het[0], het[1], '', '', formula))
1477
1478
1479
1480
1481
1482
1483
1484 for model in self.model_loop(model_num):
1485
1486
1487
1488 if model_records:
1489
1490 print(("\nMODEL %s" % model.num))
1491
1492
1493 file.write("%-6s %4i\n" % ('MODEL', model.num))
1494
1495
1496
1497
1498
1499
1500 for mol in model.mol:
1501
1502 print("ATOM, HETATM, TER")
1503
1504
1505 atom_record = False
1506 for i in xrange(len(mol.atom_name)):
1507
1508 if mol.pdb_record[i] in [None, 'ATOM']:
1509 atom_record = True
1510
1511
1512 atom_num = mol.atom_num[i]
1513 if atom_num == None:
1514 atom_num = i + 1
1515
1516
1517 file.write("%-6s%5s %4s%1s%3s %1s%4s%1s %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" % ('ATOM', atom_num, self._translate(mol.atom_name[i]), '', self._translate(mol.res_name[i]), self._translate(mol.chain_id[i]), self._translate(mol.res_num[i]), '', self._translate(mol.x[i], 'float'), self._translate(mol.y[i], 'float'), self._translate(mol.z[i], 'float'), 1.0, 0, self._translate(mol.seg_id[i]), self._translate(mol.element[i]), ''))
1518 num_atom = num_atom + 1
1519
1520
1521 ter_num = atom_num + 1
1522 ter_name = mol.res_name[i]
1523 ter_chain_id = mol.chain_id[i]
1524 ter_res_num = mol.res_num[i]
1525
1526
1527 if atom_record:
1528 file.write("%-6s%5s %3s %1s%4s%1s\n" % ('TER', ter_num, self._translate(ter_name), self._translate(ter_chain_id), self._translate(ter_res_num), ''))
1529 num_ter = num_ter + 1
1530
1531
1532 count_shift = False
1533 for i in xrange(len(mol.atom_name)):
1534
1535 if mol.pdb_record[i] == 'HETATM':
1536
1537 atom_num = mol.atom_num[i]
1538 if atom_num == None:
1539 atom_num = i + 1
1540
1541
1542 if atom_record and atom_num == ter_num:
1543 count_shift = True
1544 if atom_record and count_shift:
1545 atom_num += 1
1546
1547
1548 file.write("%-6s%5s %4s%1s%3s %1s%4s%1s %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" % ('HETATM', atom_num, self._translate(mol.atom_name[i]), '', self._translate(mol.res_name[i]), self._translate(mol.chain_id[i]), self._translate(mol.res_num[i]), '', self._translate(mol.x[i], 'float'), self._translate(mol.y[i], 'float'), self._translate(mol.z[i], 'float'), 1.0, 0, self._translate(mol.seg_id[i]), self._translate(mol.element[i]), ''))
1549 num_hetatm = num_hetatm + 1
1550
1551
1552
1553
1554
1555 if model_records:
1556
1557 print("ENDMDL")
1558
1559
1560 file.write("%-6s\n" % 'ENDMDL')
1561
1562
1563
1564
1565
1566
1567 print("CONECT")
1568
1569
1570 for mol in self.structural_data[0].mol:
1571
1572 for i in xrange(len(mol.atom_name)):
1573
1574 if not len(mol.bonded[i]):
1575 continue
1576
1577
1578 flush = 0
1579 bonded_index = 0
1580 bonded = ['', '', '', '']
1581
1582
1583 for j in xrange(len(mol.bonded[i])):
1584
1585 if j == len(mol.bonded[i])-1:
1586 flush = True
1587
1588
1589 if bonded_index == 3:
1590 flush = True
1591
1592
1593 bonded[bonded_index] = mol.bonded[i][j]
1594
1595
1596 bonded_index = bonded_index + 1
1597
1598
1599 if flush:
1600
1601 for k in range(4):
1602 if bonded[k] != '':
1603 if mol.atom_num[bonded[k]] != None:
1604 bonded[k] = mol.atom_num[bonded[k]]
1605 else:
1606 bonded[k] = bonded[k] + 1
1607
1608
1609 file.write("%-6s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s\n" % ('CONECT', i+1, bonded[0], bonded[1], bonded[2], bonded[3], '', '', '', '', '', ''))
1610
1611
1612 flush = False
1613 bonded_index = 0
1614 bonded = ['', '', '', '']
1615
1616
1617 num_conect = num_conect + 1
1618
1619
1620
1621
1622
1623
1624
1625 print("\nMASTER")
1626
1627
1628 file.write("%-6s %5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s\n" % ('MASTER', 0, 0, len(het_data_coll), 0, 0, 0, 0, 0, num_atom+num_hetatm, num_ter, num_conect, 0))
1629
1630
1631
1632
1633
1634
1635 print("END")
1636
1637
1638 file.write("END\n")
1639
1640
1642 """The container for the molecular information.
1643
1644 The structural data object for this class is a container possessing a number of different arrays
1645 corresponding to different structural information. These objects include:
1646
1647 - atom_num: The atom name.
1648 - atom_name: The atom name.
1649 - bonded: Each element an array of bonded atom indices.
1650 - chain_id: The chain ID.
1651 - element: The element symbol.
1652 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER).
1653 - res_name: The residue name.
1654 - res_num: The residue number.
1655 - seg_id: The segment ID.
1656 - x: The x coordinate of the atom.
1657 - y: The y coordinate of the atom.
1658 - z: The z coordinate of the atom.
1659
1660 All arrays should be of equal length so that an atom index can retrieve all the corresponding
1661 data. Only the atom identification string is compulsory, all other arrays can contain None.
1662 """
1663
1664
1666 """Initialise the molecular container."""
1667
1668
1669 self.atom_num = []
1670
1671
1672 self.atom_name = []
1673
1674
1675 self.bonded = []
1676
1677
1678 self.chain_id = []
1679
1680
1681 self.element = []
1682
1683
1684 self.pdb_record = []
1685
1686
1687 self.res_name = []
1688
1689
1690 self.res_num = []
1691
1692
1693 self.seg_id = []
1694
1695
1696 self.x = []
1697
1698
1699 self.y = []
1700
1701
1702 self.z = []
1703
1704
1706 """Find the atom index corresponding to the given atom number.
1707
1708 @param atom_num: The atom number to find the index of.
1709 @type atom_num: int
1710 @return: The atom index corresponding to the atom.
1711 @rtype: int
1712 """
1713
1714
1715 for j in xrange(len(self.atom_num)):
1716
1717 if self.atom_num[j] == atom_num:
1718 return j
1719
1720
1721 warn(RelaxWarning("The atom number " + repr(atom_num) + " from the CONECT record cannot be found within the ATOM and HETATM records."))
1722
1723
1725 """Try to determine the element from the PDB atom name.
1726
1727 @param atom_name: The PDB atom name.
1728 @type atom_name: str
1729 @return: The element name, or None if unsuccessful.
1730 @rtype: str or None
1731 """
1732
1733
1734 element = strip(atom_name, "'")
1735
1736
1737 element = strip(element, digits)
1738
1739
1740 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'],
1741 'N': ['ND', 'NE', 'NH', 'NZ'],
1742 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'],
1743 'O': ['OG', 'OD', 'OE', 'OH', 'OT'],
1744 'S': ['SD', 'SG']
1745 }
1746
1747
1748 for key in list(table.keys()):
1749 if element in table[key]:
1750 element = key
1751 break
1752
1753
1754 elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S']
1755
1756
1757 if element in elements:
1758 return element
1759
1760
1761 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))
1762
1763
1765 """Parse the PDB record string and return an array of the corresponding atomic information.
1766
1767 The format of the ATOM and HETATM records is::
1768 __________________________________________________________________________________________
1769 | | | | |
1770 | Columns | Data type | Field | Definition |
1771 |_________|______________|______________|________________________________________________|
1772 | | | | |
1773 | 1 - 6 | Record name | "ATOM" | |
1774 | 7 - 11 | Integer | serial | Atom serial number. |
1775 | 13 - 16 | Atom | name | Atom name. |
1776 | 17 | Character | altLoc | Alternate location indicator. |
1777 | 18 - 20 | Residue name | resName | Residue name. |
1778 | 22 | Character | chainID | Chain identifier. |
1779 | 23 - 26 | Integer | resSeq | Residue sequence number. |
1780 | 27 | AChar | iCode | Code for insertion of residues. |
1781 | 31 - 38 | Real(8.3) | x | Orthogonal coordinates for X in Angstroms. |
1782 | 39 - 46 | Real(8.3) | y | Orthogonal coordinates for Y in Angstroms. |
1783 | 47 - 54 | Real(8.3) | z | Orthogonal coordinates for Z in Angstroms. |
1784 | 55 - 60 | Real(6.2) | occupancy | Occupancy. |
1785 | 61 - 66 | Real(6.2) | tempFactor | Temperature factor. |
1786 | 73 - 76 | LString(4) | segID | Segment identifier, left-justified. |
1787 | 77 - 78 | LString(2) | element | Element symbol, right-justified. |
1788 | 79 - 80 | LString(2) | charge | Charge on the atom. |
1789 |_________|______________|______________|________________________________________________|
1790
1791
1792 The format of the TER record is::
1793 __________________________________________________________________________________________
1794 | | | | |
1795 | Columns | Data type | Field | Definition |
1796 |_________|______________|______________|________________________________________________|
1797 | | | | |
1798 | 1 - 6 | Record name | "TER " | |
1799 | 7 - 11 | Integer | serial | Serial number. |
1800 | 18 - 20 | Residue name | resName | Residue name. |
1801 | 22 | Character | chainID | Chain identifier. |
1802 | 23 - 26 | Integer | resSeq | Residue sequence number. |
1803 | 27 | AChar | iCode | Insertion code. |
1804 |_________|______________|______________|________________________________________________|
1805
1806
1807 The format of the CONECT record is::
1808 __________________________________________________________________________________________
1809 | | | | |
1810 | Columns | Data type | Field | Definition |
1811 |_________|______________|______________|________________________________________________|
1812 | | | | |
1813 | 1 - 6 | Record name | "CONECT" | |
1814 | 7 - 11 | Integer | serial | Atom serial number |
1815 | 12 - 16 | Integer | serial | Serial number of bonded atom |
1816 | 17 - 21 | Integer | serial | Serial number of bonded atom |
1817 | 22 - 26 | Integer | serial | Serial number of bonded atom |
1818 | 27 - 31 | Integer | serial | Serial number of bonded atom |
1819 | 32 - 36 | Integer | serial | Serial number of hydrogen bonded atom |
1820 | 37 - 41 | Integer | serial | Serial number of hydrogen bonded atom |
1821 | 42 - 46 | Integer | serial | Serial number of salt bridged atom |
1822 | 47 - 51 | Integer | serial | Serial number of hydrogen bonded atom |
1823 | 52 - 56 | Integer | serial | Serial number of hydrogen bonded atom |
1824 | 57 - 61 | Integer | serial | Serial number of salt bridged atom |
1825 |_________|______________|______________|________________________________________________|
1826
1827
1828 @param record: The single line PDB record.
1829 @type record: str
1830 @return: The list of atomic information, each element corresponding to the PDB fields
1831 as defined in "Protein Data Bank Contents Guide: Atomic Coordinate Entry
1832 Format Description" version 2.1 (draft), October 25, 1996.
1833 @rtype: list of str
1834 """
1835
1836
1837 fields = []
1838
1839
1840 if search('^ATOM', record) or search('^HETATM', record):
1841
1842 fields.append(record[0:6])
1843 fields.append(record[6:11])
1844 fields.append(record[12:16])
1845 fields.append(record[16])
1846 fields.append(record[17:20])
1847 fields.append(record[21])
1848 fields.append(record[22:26])
1849 fields.append(record[26])
1850 fields.append(record[30:38])
1851 fields.append(record[38:46])
1852 fields.append(record[46:54])
1853 fields.append(record[54:60])
1854 fields.append(record[60:66])
1855 fields.append(record[72:76])
1856 fields.append(record[76:78])
1857 fields.append(record[78:80])
1858
1859
1860 for i in xrange(len(fields)):
1861
1862 fields[i] = strip(fields[i])
1863
1864
1865 if fields[i] == '':
1866 fields[i] = None
1867
1868
1869 if fields[1]:
1870 fields[1] = int(fields[1])
1871 if fields[6]:
1872 fields[6] = int(fields[6])
1873 if fields[8]:
1874 fields[8] = float(fields[8])
1875 if fields[9]:
1876 fields[9] = float(fields[9])
1877 if fields[10]:
1878 fields[10] = float(fields[10])
1879 if fields[11]:
1880 fields[11] = float(fields[11])
1881 if fields[12]:
1882 fields[12] = float(fields[12])
1883
1884
1885 if search('^CONECT', record):
1886
1887 fields.append(record[0:6])
1888 fields.append(record[6:11])
1889 fields.append(record[11:16])
1890 fields.append(record[16:21])
1891 fields.append(record[21:26])
1892 fields.append(record[26:31])
1893
1894
1895 for i in xrange(len(fields)):
1896
1897 fields[i] = strip(fields[i])
1898
1899
1900 if fields[i] == '':
1901 fields[i] = None
1902
1903
1904 if fields[1]:
1905 fields[1] = int(fields[1])
1906 if fields[2]:
1907 fields[2] = int(fields[2])
1908 if fields[3]:
1909 fields[3] = int(fields[3])
1910 if fields[4]:
1911 fields[4] = int(fields[4])
1912 if fields[5]:
1913 fields[5] = int(fields[5])
1914
1915
1916 return fields
1917
1918
1920 """Parse the XYZ record string and return an array of the corresponding atomic information.
1921
1922 The format of the XYZ records is::
1923 __________________________________________________________________________________________
1924 | | | | |
1925 | Columns | Data type | Field | Definition |
1926 |_________|______________|______________|________________________________________________|
1927 | | | | |
1928 | 1 | String | element | |
1929 | 2 | Real | x | Orthogonal coordinates for X in Angstroms |
1930 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms |
1931 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms |
1932 |_________|______________|______________|________________________________________________|
1933
1934
1935 @param record: The single line PDB record.
1936 @type record: str
1937 @return: The list of atomic information
1938 @rtype: list of str
1939 """
1940
1941
1942 fields = []
1943 word=split(record)
1944
1945
1946 if len(word)==4:
1947
1948 fields.append(word[0])
1949 fields.append(word[1])
1950 fields.append(word[2])
1951 fields.append(word[3])
1952
1953
1954 for i in xrange(len(fields)):
1955
1956 fields[i] = strip(fields[i])
1957
1958
1959 if fields[i] == '':
1960 fields[i] = None
1961
1962
1963 if fields[1]:
1964 fields[1] = float(fields[1])
1965 if fields[2]:
1966 fields[2] = float(fields[2])
1967 if fields[3]:
1968 fields[3] = float(fields[3])
1969
1970
1971 return fields
1972
1973
1974 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
1975 """Method for adding an atom to the structural data object.
1976
1977 This method will create the key-value pair for the given atom.
1978
1979
1980 @keyword atom_name: The atom name, e.g. 'H1'.
1981 @type atom_name: str or None
1982 @keyword res_name: The residue name.
1983 @type res_name: str or None
1984 @keyword res_num: The residue number.
1985 @type res_num: int or None
1986 @keyword pos: The position vector of coordinates.
1987 @type pos: list (length = 3)
1988 @keyword element: The element symbol.
1989 @type element: str or None
1990 @keyword atom_num: The atom number.
1991 @type atom_num: int or None
1992 @keyword chain_id: The chain identifier.
1993 @type chain_id: str or None
1994 @keyword segment_id: The segment identifier.
1995 @type segment_id: str or None
1996 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
1997 @type pdb_record: str or None
1998 """
1999
2000
2001 self.atom_num.append(atom_num)
2002 self.atom_name.append(atom_name)
2003 self.bonded.append([])
2004 self.chain_id.append(chain_id)
2005 self.element.append(element)
2006 self.pdb_record.append(pdb_record)
2007 self.res_name.append(res_name)
2008 self.res_num.append(res_num)
2009 self.seg_id.append(segment_id)
2010 self.x.append(pos[0])
2011 self.y.append(pos[1])
2012 self.z.append(pos[2])
2013
2014
2016 """Method for connecting two atoms within the data structure object.
2017
2018 This method will append index2 to the array at bonded[index1] and vice versa.
2019
2020
2021 @keyword index1: The index of the first atom.
2022 @type index1: int
2023 @keyword index2: The index of the second atom.
2024 @type index2: int
2025 """
2026
2027
2028 if index2 not in self.bonded[index1]:
2029 self.bonded[index1].append(index2)
2030 if index1 not in self.bonded[index2]:
2031 self.bonded[index2].append(index1)
2032
2033
2035 """Method for generating a complete Structure_container object from the given PDB records.
2036
2037 @param records: A list of structural PDB records.
2038 @type records: list of str
2039 """
2040
2041
2042 for record in records:
2043
2044 record = self._parse_pdb_record(record)
2045
2046
2047 if not record:
2048 continue
2049
2050
2051 if record[0] == 'ATOM' or record[0] == 'HETATM':
2052
2053 element = record[14]
2054 if not element:
2055 element = self._det_pdb_element(record[2])
2056
2057
2058 self.atom_add(pdb_record=record[0], atom_num=record[1], atom_name=record[2], res_name=record[4], chain_id=record[5], res_num=record[6], pos=[record[8], record[9], record[10]], segment_id=record[13], element=element)
2059
2060
2061 if record[0] == 'CONECT':
2062
2063 for i in xrange(len(record)-2):
2064
2065 if record[i+2] == None:
2066 continue
2067
2068
2069 self.atom_connect(index1=self._atom_index(record[1]), index2=self._atom_index(record[i+2]))
2070
2071
2073 """Method for generating a complete Structure_container object from the given xyz records.
2074
2075 @param records: A list of structural xyz records.
2076 @type records: list of str
2077 """
2078
2079
2080 atom_number = 1
2081
2082
2083 for record in records:
2084
2085 record = self._parse_xyz_record(record)
2086
2087
2088 if not record:
2089 continue
2090
2091
2092 if len(record) == 4:
2093
2094 element = record[0]
2095 if not element:
2096 element = self._det_pdb_element(record[2])
2097
2098
2099 self.atom_add(atom_num=atom_number, pos=[record[1], record[2], record[3]], element=element)
2100
2101
2102 atom_number = atom_number + 1
2103
2104
2105 - def from_xml(self, mol_node, file_version=1):
2106 """Recreate the MolContainer from the XML molecule node.
2107
2108 @param mol_node: The molecule XML node.
2109 @type mol_node: xml.dom.minicompat.NodeList instance
2110 @keyword file_version: The relax XML version of the XML file.
2111 @type file_version: int
2112 """
2113
2114
2115 xml_to_object(mol_node, self, file_version=file_version)
2116
2117
2119 """Check if the container is empty."""
2120
2121
2122 if hasattr(self, 'mol_name'): return False
2123 if hasattr(self, 'file_name'): return False
2124 if hasattr(self, 'file_path'): return False
2125 if hasattr(self, 'file_mol_num'): return False
2126 if hasattr(self, 'file_model'): return False
2127
2128
2129 if not self.atom_num == []: return False
2130 if not self.atom_name == []: return False
2131 if not self.bonded == []: return False
2132 if not self.chain_id == []: return False
2133 if not self.element == []: return False
2134 if not self.pdb_record == []: return False
2135 if not self.res_name == []: return False
2136 if not self.res_num == []: return False
2137 if not self.seg_id == []: return False
2138 if not self.x == []: return False
2139 if not self.y == []: return False
2140 if not self.z == []: return False
2141
2142
2143 return True
2144
2145
2146 - def to_xml(self, doc, element):
2147 """Create XML elements for the contents of this molecule container.
2148
2149 @param doc: The XML document object.
2150 @type doc: xml.dom.minidom.Document instance
2151 @param element: The element to add the molecule XML elements to.
2152 @type element: XML element object
2153 """
2154
2155
2156 mol_element = doc.createElement('mol_cont')
2157 element.appendChild(mol_element)
2158
2159
2160 mol_element.setAttribute('desc', 'Molecule container')
2161 mol_element.setAttribute('name', str(self.mol_name))
2162
2163
2164 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))
2165