1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module containing the Scientific Python PDB specific structural object class."""
24
25
26 import dep_check
27
28
29 from math import sqrt
30 from numpy import array, dot, float64, linalg, zeros
31 import os
32 from os import F_OK, access, sep
33 from extern.scientific_python.IO import PDB
34 import sys
35 from warnings import warn
36
37
38 from data.relax_xml import fill_object_contents, xml_to_object
39 from generic_fns import pipes, relax_re
40 from generic_fns.mol_res_spin import Selection, generate_spin_id, parse_token, tokenise
41 from generic_fns.structure.api_base import Base_struct_API
42 from relax_errors import RelaxError, RelaxPdbLoadError
43 from relax_io import file_root
44 from relax_warnings import RelaxWarning, RelaxNoAtomWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning
45
46
48 """The Scientific Python specific data object."""
49
50
51 id = 'scientific'
52
58
59
61 """Find the atom named attached_atom directly bonded to the desired atom.
62
63 @param attached_atom: The name of the attached atom to return.
64 @type attached_atom: str
65 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid',
66 or 'other'.
67 @type mol_type: str
68 @param res: The Scientific Python residue object.
69 @type res: Scientific Python residue instance
70 @return: A tuple of information about the bonded atom.
71 @rtype: tuple consisting of the atom number (int), atom name (str), element
72 name (str), and atomic position (Numeric array of len 3)
73 """
74
75
76 bonded_found = False
77
78
79 matching_list = []
80 for atom in list(res.atoms.keys()):
81 if relax_re.search(atom, attached_atom):
82 matching_list.append(atom)
83 num_attached = len(matching_list)
84
85
86 if num_attached > 1:
87 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list)
88
89
90 if num_attached == 0:
91 return None, None, None, None, None, "No attached atom could be found"
92
93
94 bonded = res[attached_atom]
95
96
97 bonded_num = bonded.properties['serial_number']
98 bonded_name = bonded.name
99 element = bonded.properties['element']
100 pos = bonded.position.array
101 attached_name = matching_list[0]
102
103
104 return bonded_num, bonded_name, element, pos, attached_name, None
105
106
108 """Generator function for looping over all residues in the Scientific PDB data objects.
109
110 @param mol: The individual molecule Scientific Python PDB data object.
111 @type mol: Scientific Python PDB object
112 @keyword sel_obj: The selection object.
113 @type sel_obj: instance of generic_fns.mol_res_spin.Selection
114 @return: A tuple of the Scientific Python PDB object representing a single
115 residue, the residue number, and residue name.
116 @rtype: (Scientific Python PDB object, str, str)
117 """
118
119
120 if mol.mol_type != 'other':
121
122 res_index = -1
123 for res in mol.data.residues:
124
125 if mol.mol_type == 'nucleic acid':
126 res_name = res.name[-1]
127 else:
128 res_name = res.name
129 res_num = res.number
130
131
132 res_index = res_index + 1
133
134
135 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name):
136 continue
137
138
139 yield res, res_num, res_name, res_index
140
141
142 else:
143 res_index = -1
144 for res in mol.data:
145
146 res_index = res_index + 1
147
148
149 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name):
150 continue
151
152
153 yield res, res.number, res.name, res_index
154
155
156 - def are_bonded(self, atom_id1=None, atom_id2=None):
157 """Determine if two atoms are directly bonded to each other.
158
159 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom.
160 @type atom_id1: str
161 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom.
162 @type atom_id2: str
163 @return: True if the atoms are directly bonded.
164 @rtype: bool
165 """
166
167
168 for pos1 in self.atom_loop(atom_id=atom_id1, pos_flag=True):
169 for pos2 in self.atom_loop(atom_id=atom_id2, pos_flag=True):
170
171 dist = linalg.norm(pos2[0]-pos1[0])
172
173
174 if dist < 2.0:
175 return True
176
177
178 return False
179
180
181 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False):
182 """Generator function for looping over all atoms in the Scientific Python data objects.
183
184 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms
185 matching this selection will be yielded.
186 @type atom_id: str
187 @keyword str_id: The structure identifier. This can be the file name, model
188 number, or structure number. If None, then all structures will
189 be looped over.
190 @type str_id: str, int, or None
191 @keyword model_num: Only loop over a specific model.
192 @type model_num: int or None
193 @keyword model_num_flag: A flag which if True will cause the model number to be yielded.
194 @type model_num_flag: bool
195 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded.
196 @type mol_name_flag: bool
197 @keyword res_num_flag: A flag which if True will cause the residue number to be
198 yielded.
199 @type res_num_flag: bool
200 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded.
201 @type res_name_flag: bool
202 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded.
203 @type atom_num_flag: bool
204 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded.
205 @type atom_name_flag: bool
206 @keyword element_flag: A flag which if True will cause the element name to be yielded.
207 @type element_flag: bool
208 @keyword pos_flag: A flag which if True will cause the atomic position to be
209 yielded.
210 @type pos_flag: bool
211 average atom properties across all loaded structures.
212 @type ave: bool
213 @return: A tuple of atomic information, as described in the docstring.
214 @rtype: tuple consisting of optional molecule name (str), residue number
215 (int), residue name (str), atom number (int), atom name(str),
216 element name (str), and atomic position (array of len 3).
217 """
218
219
220 sel_obj = Selection(atom_id)
221
222
223 for model in self.model_loop():
224
225 for mol_index in range(len(model.mol)):
226 mol = model.mol[mol_index]
227
228
229 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
230 continue
231
232
233 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
234
235 atom_index = -1
236 for atom in res:
237
238 atom_num = atom.properties['serial_number']
239 atom_name = atom.name
240 element = atom.properties['element']
241 atom_index = atom_index + 1
242
243
244 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
245 continue
246
247
248 if ave:
249 pos = self.__ave_atom_pos(mol_index, res_index, atom_index)
250 else:
251 pos = atom.position.array
252
253
254 mol_name = mol.mol_name
255
256
257 if not mol.mol_name:
258 mol_name = None
259 if not res_num:
260 res_num = None
261 if not res_name:
262 res_name = None
263 if not atom_num:
264 atom_num = None
265 if not atom_name:
266 atom_name = None
267
268
269 atomic_tuple = ()
270 if model_num_flag:
271 if ave:
272 atomic_tuple = atomic_tuple + (None,)
273 else:
274 atomic_tuple = atomic_tuple + (model.num,)
275 if mol_name_flag:
276 atomic_tuple = atomic_tuple + (mol_name,)
277 if res_num_flag:
278 atomic_tuple = atomic_tuple + (res_num,)
279 if res_name_flag:
280 atomic_tuple = atomic_tuple + (res_name,)
281 if atom_num_flag:
282 atomic_tuple = atomic_tuple + (atom_num,)
283 if atom_name_flag:
284 atomic_tuple = atomic_tuple + (atom_name,)
285 if element_flag:
286 atomic_tuple = atomic_tuple + (element,)
287 if pos_flag:
288 atomic_tuple = atomic_tuple + (pos,)
289
290
291 yield atomic_tuple
292
293
294 if ave:
295 break
296
297
298 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):
299 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'.
300
301 @keyword atom_id: The molecule, residue, and atom identifier string. This must
302 correspond to a single atom in the system.
303 @type atom_id: str
304 @keyword attached_atom: The name of the attached atom to return.
305 @type attached_atom: str
306 @keyword model: The model to return the positional information from. If not
307 supplied and multiple models exist, then the returned atomic
308 position will be a list of the positions in each model.
309 @type model: None or int
310 @return: A tuple of information about the bonded atom.
311 @rtype: tuple consisting of the atom number (int), atom name (str), element
312 name (str), and atomic positions for each model (list of numpy
313 arrays)
314 """
315
316
317 sel_obj = Selection(atom_id)
318
319
320 bonded_num = None
321 bonded_name = None
322 element = None
323 pos_array = []
324
325
326 for model in self.model_loop(model):
327
328 atom_found = False
329
330
331 for mol in model.mol:
332
333 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
334
335 for atom in res:
336
337 atom_num = atom.properties['serial_number']
338 atom_name = atom.name
339
340
341 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
342 continue
343
344
345 if atom_found:
346 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
347
348
349 atom_found = True
350 atom_num_match = atom_num
351 atom_name_match = atom_name
352 mol_type_match = mol.mol_type
353 res_match = res
354
355
356 if atom_found:
357
358 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
359
360
361 pos_array.append(array(pos, float64))
362
363
364 return bonded_num, bonded_name, element, pos_array
365
366
368 """Determine the average atom position across all models.
369
370 @param mol_index: The molecule index.
371 @type mol_index: int
372 @param res_index: The residue index.
373 @type res_index: int
374 @param atom_index: The atom index.
375 @type atom_index: int
376 """
377
378
379 pos = zeros(3, float64)
380
381
382 for model in self.model_loop():
383
384 mol = model.mol[mol_index]
385
386
387 if mol.mol_type != 'other':
388 res = mol.data.residues[res_index]
389 else:
390 res = mol.data[res_index]
391
392
393 atom = res[atom_index]
394
395
396 pos = pos + atom.position.array
397
398
399 pos = pos / len(self.structural_data)
400
401
402 return pos
403
404
405 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):
406 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'.
407
408 @keyword attached_atom: The name of the bonded atom.
409 @type attached_atom: str
410 @keyword model_num: The model of which to return the vectors from. If not supplied
411 and multiple models exist, then vectors from all models will be
412 returned.
413 @type model_num: None or int
414 @keyword mol_name: The name of the molecule that attached_atom belongs to.
415 @type mol_name: str
416 @keyword res_num: The number of the residue that attached_atom belongs to.
417 @type res_num: str
418 @keyword res_name: The name of the residue that attached_atom belongs to.
419 @type res_name: str
420 @keyword spin_num: The number of the spin that attached_atom is attached to.
421 @type spin_num: str
422 @keyword spin_name: The name of the spin that attached_atom is attached to.
423 @type spin_name: str
424 @keyword return_name: A flag which if True will cause the name of the attached atom to
425 be returned together with the bond vectors.
426 @type return_name: bool
427 @keyword return_warnings: A flag which if True will cause warning messages to be returned.
428 @type return_warnings: bool
429 @return: The list of bond vectors for each structure.
430 @rtype: list of numpy arrays
431 """
432
433
434 sel_obj = Selection(generate_spin_id(mol_name, res_num, res_name, spin_num, spin_name))
435
436
437 vectors = []
438 attached_name = None
439 warnings = None
440
441
442 for model in self.model_loop(model_num):
443
444 atom_found = False
445
446
447 for mol in model.mol:
448
449 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
450
451 for atom in res:
452
453 atom_num = atom.properties['serial_number']
454 atom_name = atom.name
455
456
457 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
458 continue
459
460
461 if atom_found:
462 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
463
464
465 atom_found = True
466 pos_match = atom.position.array
467 mol_type_match = mol.mol_type
468 res_match = res
469
470
471 if atom_found:
472
473 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
474
475
476 if (bonded_num, bonded_name, element) == (None, None, None):
477 continue
478
479
480 vector = pos - pos_match
481
482
483 vectors.append(array(vector, float64))
484
485
486 data = (vectors,)
487 if return_name:
488 data = data + (attached_name,)
489 if return_warnings:
490 data = data + (warnings,)
491
492
493 return data
494
495
497 """Return the molecule.
498
499 Only one model can be specified.
500
501
502 @param molecule: The molecule name.
503 @type molecule: int or None
504 @keyword model: The model number.
505 @type model: int or None
506 @raises RelaxError: If the model is not specified and there is more than one model loaded.
507 @return: The MolContainer corresponding to the molecule name and model number.
508 @rtype: MolContainer instance or None
509 """
510
511
512 if model == None and self.num_models() > 1:
513 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models())
514
515
516 if not isinstance(model, int) and not model == None:
517 raise RelaxNoneIntError
518
519
520 if not len(self.structural_data):
521 return
522
523
524 for model_cont in self.model_loop(model):
525
526 for mol in model_cont.mol:
527
528 if mol.mol_name == molecule:
529 return mol
530
531
532 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):
533 """Function for loading the structures from the PDB file.
534
535 @param file_path: The full path of the file.
536 @type file_path: str
537 @keyword read_mol: The molecule(s) to read from the file, independent of model. The
538 molecules are determined differently by the different parsers, but
539 are numbered consecutively from 1. If set to None, then all
540 molecules will be loaded.
541 @type read_mol: None, int, or list of int
542 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None,
543 then the molecules will be automatically labelled based on the file
544 name or other information.
545 @type set_mol_name: None, str, or list of str
546 @keyword read_model: The PDB model to extract from the file. If set to None, then all
547 models will be loaded.
548 @type read_model: None, int, or list of int
549 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then
550 the PDB model numbers will be preserved, if they exist.
551 @type set_model_num: None, int, or list of int
552 @keyword verbosity: A flag which if True will cause messages to be printed.
553 @type verbosity: bool
554 @return: The status of the loading of the PDB file.
555 @rtype: bool
556 """
557
558
559 if verbosity:
560 print("\nScientific Python PDB parser.")
561
562
563 if not access(file_path, F_OK):
564
565 return False
566
567
568 path, file = os.path.split(file_path)
569
570
571 if read_mol and not isinstance(read_mol, list):
572 read_mol = [read_mol]
573 if set_mol_name and not isinstance(set_mol_name, list):
574 set_mol_name = [set_mol_name]
575 if read_model and not isinstance(read_model, list):
576 read_model = [read_model]
577 if set_model_num and not isinstance(set_model_num, list):
578 set_model_num = [set_model_num]
579
580
581 model_flag = True
582 model_num = 1
583 model_load_num = 1
584 orig_model_num = []
585 mol_conts = []
586 while True:
587
588 if read_model:
589
590 if model_num > max(read_model):
591 break
592
593
594 if model_num not in read_model:
595
596 model_num = model_num + 1
597
598
599 continue
600
601
602 model = PDB.Structure(file_path, model_num)
603
604
605 if not len(model) and not len(mol_conts):
606
607 model = PDB.Structure(file_path)
608 model_flag = False
609
610
611 if not len(model):
612 raise RelaxPdbLoadError(file_path)
613
614
615 if not len(model):
616 del model
617 break
618
619
620 mol_conts.append([])
621 mol_index = 0
622 new_mol_name = []
623
624
625 mol_offset = 0
626 for i in range(len(self.structural_data)):
627 model_index = model_load_num - 1
628 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num):
629 mol_offset = len(self.structural_data[i].mol)
630
631
632 orig_model_num.append(model_num)
633
634
635 if verbosity:
636 print(model)
637
638
639 if hasattr(model, 'peptide_chains'):
640 for mol in model.peptide_chains:
641
642 if read_mol and mol_index+1 not in read_mol:
643 mol_index = mol_index + 1
644 continue
645
646 mol.mol_type = 'protein'
647 mol_conts[-1].append(MolContainer())
648 mol_conts[-1][-1].data = mol
649 mol_conts[-1][-1].mol_type = 'protein'
650 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
651 mol_index = mol_index + 1
652
653
654 if hasattr(model, 'nucleotide_chains'):
655 for mol in model.nucleotide_chains:
656
657 if read_mol and mol_index+1 not in read_mol:
658 mol_index = mol_index + 1
659 continue
660
661 mol_conts[-1].append(MolContainer())
662 mol_conts[-1][-1].data = mol
663 mol_conts[-1][-1].mol_type = 'nucleic acid'
664 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
665 mol_index = mol_index + 1
666
667
668 if hasattr(model, 'molecules'):
669 for key in list(model.molecules.keys()):
670
671 if read_mol and mol_index+1 not in read_mol:
672 mol_index = mol_index + 1
673 continue
674
675
676 mol_conts[-1].append(MolContainer())
677 mol_conts[-1][-1].mol_type = 'other'
678
679
680 mol_conts[-1][-1].data = []
681 for mol in model.molecules[key]:
682 mol_conts[-1][-1].data.append(mol)
683
684
685 if set_mol_name and mol_index >= len(set_mol_name):
686 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name))
687
688
689 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
690 mol_index = mol_index + 1
691
692
693 model_num = model_num + 1
694 model_load_num = model_load_num + 1
695
696
697 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=list(range(1, len(mol_conts[0])+1)), set_mol_name=new_mol_name, file_name=file, file_path=path)
698
699
700 return True
701
702
704 """The empty list-type container for the non-protein and non-RNA molecular information."""
705
706
707 - def from_xml(self, mol_node, file_version=1):
708 """Recreate the MolContainer from the XML molecule node.
709
710 @param mol_node: The molecule XML node.
711 @type mol_node: xml.dom.minicompat.NodeList instance
712 @keyword file_version: The relax XML version of the XML file.
713 @type file_version: int
714 """
715
716
717 xml_to_object(mol_node, self, file_version=file_version)
718
719
720 self.reload_pdb()
721
722
724 """Reload the PDB from the original file."""
725
726
727 if self.file_path:
728 file_path = self.file_path + sep + self.file_name
729 else:
730 file_path = self.file_name
731
732
733 if not access(file_path, F_OK):
734 found = False
735
736
737 for dir in sys.path:
738 if access(dir + sep + file_path, F_OK):
739
740 file_path = dir + sep + file_path
741 found = True
742 break
743
744
745 if not found:
746 warn(RelaxNoPDBFileWarning(file_path))
747 return
748
749
750 model = PDB.Structure(file_path, self.file_model)
751
752
753 print("\n" + repr(model))
754
755
756 mol_num = 1
757
758
759 if hasattr(model, 'peptide_chains'):
760 for mol in model.peptide_chains:
761
762 if mol_num == self.file_mol_num:
763 self.data = mol
764 return
765
766 mol_num = mol_num + 1
767
768
769 if hasattr(model, 'nucleotide_chains'):
770 for mol in model.nucleotide_chains:
771
772 if mol_num == self.file_mol_num:
773 self.data = mol
774 return
775
776 mol_num = mol_num + 1
777
778
779 if hasattr(model, 'molecules'):
780 for key in list(model.molecules.keys()):
781
782 if mol_num == self.file_mol_num:
783
784 self.data = []
785 for mol in model.molecules[key]:
786 self.data.append(mol)
787 return
788
789 mol_num = mol_num + 1
790
791
792 - def to_xml(self, doc, element):
793 """Create XML elements for the contents of this molecule container.
794
795 @param doc: The XML document object.
796 @type doc: xml.dom.minidom.Document instance
797 @param element: The element to add the molecule XML elements to.
798 @type element: XML element object
799 """
800
801
802 mol_element = doc.createElement('mol_cont')
803 element.appendChild(mol_element)
804
805
806 mol_element.setAttribute('desc', 'Molecule container')
807 mol_element.setAttribute('name', str(self.mol_name))
808
809
810 fill_object_contents(doc, mol_element, object=self, blacklist=['data'] + list(self.__class__.__dict__.keys()))
811