1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module containing the Scientific Python PDB specific structural object class."""
24
25
26 import dep_check
27
28
29 from math import sqrt
30 from numpy import array, dot, float64, linalg, zeros
31 import os
32 from os import F_OK, access, sep
33 from extern.scientific_python.IO import PDB
34 import sys
35 from warnings import warn
36
37
38 from data.relax_xml import fill_object_contents, xml_to_object
39 from generic_fns import pipes, relax_re
40 from generic_fns.mol_res_spin import Selection, generate_spin_id, parse_token, tokenise
41 from generic_fns.structure.api_base import Base_struct_API
42 from relax_errors import RelaxError, RelaxPdbLoadError
43 from relax_io import file_root
44 from relax_warnings import RelaxWarning, RelaxNoAtomWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning
45
46
48 """The Scientific Python specific data object."""
49
50
51 id = 'scientific'
52
58
59
61 """Find the atom named attached_atom directly bonded to the desired atom.
62
63 @param attached_atom: The name of the attached atom to return.
64 @type attached_atom: str
65 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid',
66 or 'other'.
67 @type mol_type: str
68 @param res: The Scientific Python residue object.
69 @type res: Scientific Python residue instance
70 @return: A tuple of information about the bonded atom.
71 @rtype: tuple consisting of the atom number (int), atom name (str), element
72 name (str), and atomic position (Numeric array of len 3)
73 """
74
75
76 bonded_found = False
77
78
79 matching_list = []
80 for atom in list(res.atoms.keys()):
81 if relax_re.search(atom, attached_atom):
82 matching_list.append(atom)
83 num_attached = len(matching_list)
84
85
86 if num_attached > 1:
87 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list)
88
89
90 if num_attached == 0:
91 return None, None, None, None, None, "No attached atom could be found"
92
93
94 bonded = res[attached_atom]
95
96
97 bonded_num = bonded.properties['serial_number']
98 bonded_name = bonded.name
99 element = bonded.properties['element']
100 pos = bonded.position.array
101 attached_name = matching_list[0]
102
103
104 return bonded_num, bonded_name, element, pos, attached_name, None
105
106
108 """Generator function for looping over all residues in the Scientific PDB data objects.
109
110 @param mol: The individual molecule Scientific Python PDB data object.
111 @type mol: Scientific Python PDB object
112 @keyword sel_obj: The selection object.
113 @type sel_obj: instance of generic_fns.mol_res_spin.Selection
114 @return: A tuple of the Scientific Python PDB object representing a single
115 residue, the residue number, and residue name.
116 @rtype: (Scientific Python PDB object, str, str)
117 """
118
119
120 if mol.mol_type != 'other':
121
122 res_index = -1
123 for res in mol.data.residues:
124
125 if mol.mol_type == 'nucleic acid':
126 res_name = res.name[-1]
127 else:
128 res_name = res.name
129 res_num = res.number
130
131
132 res_index = res_index + 1
133
134
135 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name):
136 continue
137
138
139 yield res, res_num, res_name, res_index
140
141
142 else:
143 res_index = -1
144 for res in mol.data:
145
146 res_index = res_index + 1
147
148
149 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name):
150 continue
151
152
153 yield res, res.number, res.name, res_index
154
155
156 - def are_bonded(self, atom_id1=None, atom_id2=None):
157 """Determine if two atoms are directly bonded to each other.
158
159 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom.
160 @type atom_id1: str
161 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom.
162 @type atom_id2: str
163 @return: True if the atoms are directly bonded.
164 @rtype: bool
165 """
166
167
168 for pos1 in self.atom_loop(atom_id=atom_id1, pos_flag=True):
169 for pos2 in self.atom_loop(atom_id=atom_id2, pos_flag=True):
170
171 dist = linalg.norm(pos2-pos1)
172
173
174 if dist < 2.0:
175 return True
176
177
178 return False
179
180
181 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, index_flag=False, ave=False):
182 """Generator function for looping over all atoms in the Scientific Python data objects.
183
184 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded.
185 @type atom_id: str
186 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over.
187 @type str_id: str, int, or None
188 @keyword model_num: Only loop over a specific model.
189 @type model_num: int or None
190 @keyword model_num_flag: A flag which if True will cause the model number to be yielded.
191 @type model_num_flag: bool
192 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded.
193 @type mol_name_flag: bool
194 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded.
195 @type res_num_flag: bool
196 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded.
197 @type res_name_flag: bool
198 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded.
199 @type atom_num_flag: bool
200 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded.
201 @type atom_name_flag: bool
202 @keyword element_flag: A flag which if True will cause the element name to be yielded.
203 @type element_flag: bool
204 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded.
205 @type pos_flag: bool
206 @keyword index_flag: A flag which if True will cause the atomic index to be yielded.
207 @type index_flag: bool
208 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures.
209 @type ave: bool
210 @return: A tuple of atomic information, as described in the docstring.
211 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3).
212 """
213
214
215 sel_obj = Selection(atom_id)
216
217
218 for model in self.model_loop():
219
220 for mol_index in range(len(model.mol)):
221 mol = model.mol[mol_index]
222
223
224 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
225 continue
226
227
228 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
229
230 atom_index = -1
231 for atom in res:
232
233 atom_num = atom.properties['serial_number']
234 atom_name = atom.name
235 element = atom.properties['element']
236 atom_index = atom_index + 1
237
238
239 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
240 continue
241
242
243 if ave:
244 pos = self.__ave_atom_pos(mol_index, res_index, atom_index)
245 else:
246 pos = atom.position.array
247
248
249 mol_name = mol.mol_name
250
251
252 if not mol.mol_name:
253 mol_name = None
254 if not res_num:
255 res_num = None
256 if not res_name:
257 res_name = None
258 if not atom_num:
259 atom_num = None
260 if not atom_name:
261 atom_name = None
262
263
264 atomic_tuple = ()
265 if model_num_flag:
266 if ave:
267 atomic_tuple = atomic_tuple + (None,)
268 else:
269 atomic_tuple = atomic_tuple + (model.num,)
270 if mol_name_flag:
271 atomic_tuple = atomic_tuple + (mol_name,)
272 if res_num_flag:
273 atomic_tuple = atomic_tuple + (res_num,)
274 if res_name_flag:
275 atomic_tuple = atomic_tuple + (res_name,)
276 if atom_num_flag:
277 atomic_tuple = atomic_tuple + (atom_num,)
278 if atom_name_flag:
279 atomic_tuple = atomic_tuple + (atom_name,)
280 if element_flag:
281 atomic_tuple = atomic_tuple + (element,)
282 if pos_flag:
283 atomic_tuple = atomic_tuple + (pos,)
284 if index_flag:
285 atomic_tuple += (atom_index,)
286
287
288 if len(atomic_tuple) == 1:
289 atomic_tuple = atomic_tuple[0]
290 yield atomic_tuple
291
292
293 if ave:
294 break
295
296
297 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):
298 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'.
299
300 @keyword atom_id: The molecule, residue, and atom identifier string. This must
301 correspond to a single atom in the system.
302 @type atom_id: str
303 @keyword attached_atom: The name of the attached atom to return.
304 @type attached_atom: str
305 @keyword model: The model to return the positional information from. If not
306 supplied and multiple models exist, then the returned atomic
307 position will be a list of the positions in each model.
308 @type model: None or int
309 @return: A tuple of information about the bonded atom.
310 @rtype: tuple consisting of the atom number (int), atom name (str), element
311 name (str), and atomic positions for each model (list of numpy
312 arrays)
313 """
314
315
316 sel_obj = Selection(atom_id)
317
318
319 bonded_num = None
320 bonded_name = None
321 element = None
322 pos_array = []
323
324
325 for model in self.model_loop(model):
326
327 atom_found = False
328
329
330 for mol in model.mol:
331
332 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
333
334 for atom in res:
335
336 atom_num = atom.properties['serial_number']
337 atom_name = atom.name
338
339
340 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
341 continue
342
343
344 if atom_found:
345 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
346
347
348 atom_found = True
349 atom_num_match = atom_num
350 atom_name_match = atom_name
351 mol_type_match = mol.mol_type
352 res_match = res
353
354
355 if atom_found:
356
357 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
358
359
360 pos_array.append(array(pos, float64))
361
362
363 return bonded_num, bonded_name, element, pos_array
364
365
367 """Determine the average atom position across all models.
368
369 @param mol_index: The molecule index.
370 @type mol_index: int
371 @param res_index: The residue index.
372 @type res_index: int
373 @param atom_index: The atom index.
374 @type atom_index: int
375 """
376
377
378 pos = zeros(3, float64)
379
380
381 for model in self.model_loop():
382
383 mol = model.mol[mol_index]
384
385
386 if mol.mol_type != 'other':
387 res = mol.data.residues[res_index]
388 else:
389 res = mol.data[res_index]
390
391
392 atom = res[atom_index]
393
394
395 pos = pos + atom.position.array
396
397
398 pos = pos / len(self.structural_data)
399
400
401 return pos
402
403
404 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):
405 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'.
406
407 @keyword attached_atom: The name of the bonded atom.
408 @type attached_atom: str
409 @keyword model_num: The model of which to return the vectors from. If not supplied
410 and multiple models exist, then vectors from all models will be
411 returned.
412 @type model_num: None or int
413 @keyword mol_name: The name of the molecule that attached_atom belongs to.
414 @type mol_name: str
415 @keyword res_num: The number of the residue that attached_atom belongs to.
416 @type res_num: str
417 @keyword res_name: The name of the residue that attached_atom belongs to.
418 @type res_name: str
419 @keyword spin_num: The number of the spin that attached_atom is attached to.
420 @type spin_num: str
421 @keyword spin_name: The name of the spin that attached_atom is attached to.
422 @type spin_name: str
423 @keyword return_name: A flag which if True will cause the name of the attached atom to
424 be returned together with the bond vectors.
425 @type return_name: bool
426 @keyword return_warnings: A flag which if True will cause warning messages to be returned.
427 @type return_warnings: bool
428 @return: The list of bond vectors for each structure.
429 @rtype: list of numpy arrays
430 """
431
432
433 sel_obj = Selection(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name))
434
435
436 vectors = []
437 attached_name = None
438 warnings = None
439
440
441 for model in self.model_loop(model_num):
442
443 atom_found = False
444
445
446 for mol in model.mol:
447
448 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
449
450 for atom in res:
451
452 atom_num = atom.properties['serial_number']
453 atom_name = atom.name
454
455
456 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
457 continue
458
459
460 if atom_found:
461 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
462
463
464 atom_found = True
465 pos_match = atom.position.array
466 mol_type_match = mol.mol_type
467 res_match = res
468
469
470 if atom_found:
471
472 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
473
474
475 if (bonded_num, bonded_name, element) == (None, None, None):
476 continue
477
478
479 vector = pos - pos_match
480
481
482 vectors.append(array(vector, float64))
483
484
485 data = (vectors,)
486 if return_name:
487 data = data + (attached_name,)
488 if return_warnings:
489 data = data + (warnings,)
490
491
492 return data
493
494
496 """Return the molecule.
497
498 Only one model can be specified.
499
500
501 @param molecule: The molecule name.
502 @type molecule: int or None
503 @keyword model: The model number.
504 @type model: int or None
505 @raises RelaxError: If the model is not specified and there is more than one model loaded.
506 @return: The MolContainer corresponding to the molecule name and model number.
507 @rtype: MolContainer instance or None
508 """
509
510
511 if model == None and self.num_models() > 1:
512 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models())
513
514
515 if not isinstance(model, int) and not model == None:
516 raise RelaxNoneIntError
517
518
519 if not len(self.structural_data):
520 return
521
522
523 for model_cont in self.model_loop(model):
524
525 for mol in model_cont.mol:
526
527 if mol.mol_name == molecule:
528 return mol
529
530
531 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):
532 """Function for loading the structures from the PDB file.
533
534 @param file_path: The full path of the file.
535 @type file_path: str
536 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded.
537 @type read_mol: None, int, or list of int
538 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information.
539 @type set_mol_name: None, str, or list of str
540 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded.
541 @type read_model: None, int, or list of int
542 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist.
543 @type set_model_num: None, int, or list of int
544 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
545 @type alt_loc: str or None
546 @keyword verbosity: A flag which if True will cause messages to be printed.
547 @type verbosity: bool
548 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures.
549 @type merge: bool
550 @return: The status of the loading of the PDB file.
551 @rtype: bool
552 """
553
554
555 if verbosity:
556 print("\nScientific Python PDB parser.")
557
558
559 if not access(file_path, F_OK):
560
561 return False
562
563
564 if alt_loc:
565 warn(RelaxWarning("The alt_loc argument will be ignored for the ScientificPython PDB parser."))
566
567
568 path, file = os.path.split(file_path)
569
570
571 if read_mol and not isinstance(read_mol, list):
572 read_mol = [read_mol]
573 if set_mol_name and not isinstance(set_mol_name, list):
574 set_mol_name = [set_mol_name]
575 if read_model and not isinstance(read_model, list):
576 read_model = [read_model]
577 if set_model_num and not isinstance(set_model_num, list):
578 set_model_num = [set_model_num]
579
580
581 model_flag = True
582 model_num = 1
583 model_load_num = 1
584 orig_model_num = []
585 mol_conts = []
586 while True:
587
588 if read_model:
589
590 if model_num > max(read_model):
591 break
592
593
594 if model_num not in read_model:
595
596 model_num = model_num + 1
597
598
599 continue
600
601
602 model = PDB.Structure(file_path, model_num)
603
604
605 if not len(model) and not len(mol_conts):
606
607 model = PDB.Structure(file_path)
608 model_flag = False
609
610
611 if not len(model):
612 raise RelaxPdbLoadError(file_path)
613
614
615 if not len(model):
616 del model
617 break
618
619
620 mol_conts.append([])
621 mol_index = 0
622 new_mol_name = []
623
624
625 mol_offset = 0
626 for i in range(len(self.structural_data)):
627 model_index = model_load_num - 1
628 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num):
629 mol_offset = len(self.structural_data[i].mol)
630
631
632 orig_model_num.append(model_num)
633
634
635 if verbosity:
636 print(model)
637
638
639 if hasattr(model, 'peptide_chains'):
640 for mol in model.peptide_chains:
641
642 if read_mol and mol_index+1 not in read_mol:
643 mol_index = mol_index + 1
644 continue
645
646 mol.mol_type = 'protein'
647 mol_conts[-1].append(MolContainer())
648 mol_conts[-1][-1].data = mol
649 mol_conts[-1][-1].mol_type = 'protein'
650 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
651 mol_index = mol_index + 1
652
653
654 if hasattr(model, 'nucleotide_chains'):
655 for mol in model.nucleotide_chains:
656
657 if read_mol and mol_index+1 not in read_mol:
658 mol_index = mol_index + 1
659 continue
660
661 mol_conts[-1].append(MolContainer())
662 mol_conts[-1][-1].data = mol
663 mol_conts[-1][-1].mol_type = 'nucleic acid'
664 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
665 mol_index = mol_index + 1
666
667
668 if hasattr(model, 'molecules'):
669 for key in list(model.molecules.keys()):
670
671 if read_mol and mol_index+1 not in read_mol:
672 mol_index = mol_index + 1
673 continue
674
675
676 mol_conts[-1].append(MolContainer())
677 mol_conts[-1][-1].mol_type = 'other'
678
679
680 mol_conts[-1][-1].data = []
681 for mol in model.molecules[key]:
682 mol_conts[-1][-1].data.append(mol)
683
684
685 if set_mol_name and mol_index >= len(set_mol_name):
686 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name))
687
688
689 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
690 mol_index = mol_index + 1
691
692
693 model_num = model_num + 1
694 model_load_num = model_load_num + 1
695
696
697 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=list(range(1, len(mol_conts[0])+1)), set_mol_name=new_mol_name, file_name=file, file_path=path, merge=merge)
698
699
700 return True
701
702
704 """The empty list-type container for the non-protein and non-RNA molecular information."""
705
706
707 - def from_xml(self, mol_node, file_version=1):
708 """Recreate the MolContainer from the XML molecule node.
709
710 @param mol_node: The molecule XML node.
711 @type mol_node: xml.dom.minicompat.NodeList instance
712 @keyword file_version: The relax XML version of the XML file.
713 @type file_version: int
714 """
715
716
717 xml_to_object(mol_node, self, file_version=file_version)
718
719
720 self.reload_pdb()
721
722
724 """Reload the PDB from the original file."""
725
726
727 if self.file_path:
728 file_path = self.file_path + sep + self.file_name
729 else:
730 file_path = self.file_name
731
732
733 if not access(file_path, F_OK):
734 found = False
735
736
737 for dir in sys.path:
738 if access(dir + sep + file_path, F_OK):
739
740 file_path = dir + sep + file_path
741 found = True
742 break
743
744
745 if not found:
746 warn(RelaxNoPDBFileWarning(file_path))
747 return
748
749
750 model = PDB.Structure(file_path, self.file_model)
751
752
753 print("\n" + repr(model))
754
755
756 mol_num = 1
757
758
759 if hasattr(model, 'peptide_chains'):
760 for mol in model.peptide_chains:
761
762 if mol_num == self.file_mol_num:
763 self.data = mol
764 return
765
766 mol_num = mol_num + 1
767
768
769 if hasattr(model, 'nucleotide_chains'):
770 for mol in model.nucleotide_chains:
771
772 if mol_num == self.file_mol_num:
773 self.data = mol
774 return
775
776 mol_num = mol_num + 1
777
778
779 if hasattr(model, 'molecules'):
780 for key in list(model.molecules.keys()):
781
782 if mol_num == self.file_mol_num:
783
784 self.data = []
785 for mol in model.molecules[key]:
786 self.data.append(mol)
787 return
788
789 mol_num = mol_num + 1
790
791
792 - def to_xml(self, doc, element):
793 """Create XML elements for the contents of this molecule container.
794
795 @param doc: The XML document object.
796 @type doc: xml.dom.minidom.Document instance
797 @param element: The element to add the molecule XML elements to.
798 @type element: XML element object
799 """
800
801
802 mol_element = doc.createElement('mol_cont')
803 element.appendChild(mol_element)
804
805
806 mol_element.setAttribute('desc', 'Molecule container')
807 mol_element.setAttribute('name', str(self.mol_name))
808
809
810 fill_object_contents(doc, mol_element, object=self, blacklist=['data'] + list(self.__class__.__dict__.keys()))
811