1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """Module containing the Scientific Python PDB specific structural object class."""
25
26
27 import dep_check
28
29
30 from math import sqrt
31 from numpy import array, dot, float64, zeros
32 import os
33 from os import F_OK, access, sep
34 from extern.scientific_python.IO import PDB
35 import sys
36 from warnings import warn
37
38
39 from api_base import Base_struct_API
40 from data.relax_xml import fill_object_contents, xml_to_object
41 from generic_fns import pipes, relax_re
42 from generic_fns.mol_res_spin import Selection, generate_spin_id, parse_token, tokenise
43 from relax_errors import RelaxError, RelaxPdbLoadError
44 from relax_io import file_root
45 from relax_warnings import RelaxWarning, RelaxNoAtomWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning
46
47
49 """The Scientific Python specific data object."""
50
51
52 id = 'scientific'
53
59
60
62 """Find the atom named attached_atom directly bonded to the desired atom.
63
64 @param attached_atom: The name of the attached atom to return.
65 @type attached_atom: str
66 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid',
67 or 'other'.
68 @type mol_type: str
69 @param res: The Scientific Python residue object.
70 @type res: Scientific Python residue instance
71 @return: A tuple of information about the bonded atom.
72 @rtype: tuple consisting of the atom number (int), atom name (str), element
73 name (str), and atomic position (Numeric array of len 3)
74 """
75
76
77 bonded_found = False
78
79
80 matching_list = []
81 for atom in list(res.atoms.keys()):
82 if relax_re.search(atom, attached_atom):
83 matching_list.append(atom)
84 num_attached = len(matching_list)
85
86
87 if num_attached > 1:
88 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list)
89
90
91 if num_attached == 0:
92 return None, None, None, None, None, "No attached atom could be found"
93
94
95 bonded = res[attached_atom]
96
97
98 bonded_num = bonded.properties['serial_number']
99 bonded_name = bonded.name
100 element = bonded.properties['element']
101 pos = bonded.position.array
102 attached_name = matching_list[0]
103
104
105 return bonded_num, bonded_name, element, pos, attached_name, None
106
107
109 """Generator function for looping over all residues in the Scientific PDB data objects.
110
111 @param mol: The individual molecule Scientific Python PDB data object.
112 @type mol: Scientific Python PDB object
113 @keyword sel_obj: The selection object.
114 @type sel_obj: instance of generic_fns.mol_res_spin.Selection
115 @return: A tuple of the Scientific Python PDB object representing a single
116 residue, the residue number, and residue name.
117 @rtype: (Scientific Python PDB object, str, str)
118 """
119
120
121 if mol.mol_type != 'other':
122
123 res_index = -1
124 for res in mol.data.residues:
125
126 if mol.mol_type == 'nucleic acid':
127 res_name = res.name[-1]
128 else:
129 res_name = res.name
130 res_num = res.number
131
132
133 res_index = res_index + 1
134
135
136 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name):
137 continue
138
139
140 yield res, res_num, res_name, res_index
141
142
143 else:
144 res_index = -1
145 for res in mol.data:
146
147 res_index = res_index + 1
148
149
150 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name):
151 continue
152
153
154 yield res, res.number, res.name, res_index
155
156
157 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False):
158 """Generator function for looping over all atoms in the Scientific Python data objects.
159
160 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms
161 matching this selection will be yielded.
162 @type atom_id: str
163 @keyword str_id: The structure identifier. This can be the file name, model
164 number, or structure number. If None, then all structures will
165 be looped over.
166 @type str_id: str, int, or None
167 @keyword model_num: Only loop over a specific model.
168 @type model_num: int or None
169 @keyword model_num_flag: A flag which if True will cause the model number to be yielded.
170 @type model_num_flag: bool
171 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded.
172 @type mol_name_flag: bool
173 @keyword res_num_flag: A flag which if True will cause the residue number to be
174 yielded.
175 @type res_num_flag: bool
176 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded.
177 @type res_name_flag: bool
178 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded.
179 @type atom_num_flag: bool
180 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded.
181 @type atom_name_flag: bool
182 @keyword element_flag: A flag which if True will cause the element name to be yielded.
183 @type element_flag: bool
184 @keyword pos_flag: A flag which if True will cause the atomic position to be
185 yielded.
186 @type pos_flag: bool
187 average atom properties across all loaded structures.
188 @type ave: bool
189 @return: A tuple of atomic information, as described in the docstring.
190 @rtype: tuple consisting of optional molecule name (str), residue number
191 (int), residue name (str), atom number (int), atom name(str),
192 element name (str), and atomic position (array of len 3).
193 """
194
195
196 sel_obj = Selection(atom_id)
197
198
199 for model in self.model_loop():
200
201 for mol_index in range(len(model.mol)):
202 mol = model.mol[mol_index]
203
204
205 if sel_obj and not sel_obj.contains_mol(mol.mol_name):
206 continue
207
208
209 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
210
211 atom_index = -1
212 for atom in res:
213
214 atom_num = atom.properties['serial_number']
215 atom_name = atom.name
216 element = atom.properties['element']
217 atom_index = atom_index + 1
218
219
220 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
221 continue
222
223
224 if ave:
225 pos = self.__ave_atom_pos(mol_index, res_index, atom_index)
226 else:
227 pos = atom.position.array
228
229
230 mol_name = mol.mol_name
231
232
233 if not mol.mol_name:
234 mol_name = None
235 if not res_num:
236 res_num = None
237 if not res_name:
238 res_name = None
239 if not atom_num:
240 atom_num = None
241 if not atom_name:
242 atom_name = None
243
244
245 atomic_tuple = ()
246 if model_num_flag:
247 if ave:
248 atomic_tuple = atomic_tuple + (None,)
249 else:
250 atomic_tuple = atomic_tuple + (model.num,)
251 if mol_name_flag:
252 atomic_tuple = atomic_tuple + (mol_name,)
253 if res_num_flag:
254 atomic_tuple = atomic_tuple + (res_num,)
255 if res_name_flag:
256 atomic_tuple = atomic_tuple + (res_name,)
257 if atom_num_flag:
258 atomic_tuple = atomic_tuple + (atom_num,)
259 if atom_name_flag:
260 atomic_tuple = atomic_tuple + (atom_name,)
261 if element_flag:
262 atomic_tuple = atomic_tuple + (element,)
263 if pos_flag:
264 atomic_tuple = atomic_tuple + (pos,)
265
266
267 yield atomic_tuple
268
269
270 if ave:
271 break
272
273
274 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):
275 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'.
276
277 @keyword atom_id: The molecule, residue, and atom identifier string. This must
278 correspond to a single atom in the system.
279 @type atom_id: str
280 @keyword attached_atom: The name of the attached atom to return.
281 @type attached_atom: str
282 @keyword model: The model to return the positional information from. If not
283 supplied and multiple models exist, then the returned atomic
284 position will be a list of the positions in each model.
285 @type model: None or int
286 @return: A tuple of information about the bonded atom.
287 @rtype: tuple consisting of the atom number (int), atom name (str), element
288 name (str), and atomic positions for each model (list of numpy
289 arrays)
290 """
291
292
293 sel_obj = Selection(atom_id)
294
295
296 bonded_num = None
297 bonded_name = None
298 element = None
299 pos_array = []
300
301
302 for model in self.model_loop(model):
303
304 atom_found = False
305
306
307 for mol in model.mol:
308
309 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
310
311 for atom in res:
312
313 atom_num = atom.properties['serial_number']
314 atom_name = atom.name
315
316
317 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
318 continue
319
320
321 if atom_found:
322 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
323
324
325 atom_found = True
326 atom_num_match = atom_num
327 atom_name_match = atom_name
328 mol_type_match = mol.mol_type
329 res_match = res
330
331
332 if atom_found:
333
334 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
335
336
337 pos_array.append(array(pos, float64))
338
339
340 return bonded_num, bonded_name, element, pos_array
341
342
344 """Determine the average atom position across all models.
345
346 @param mol_index: The molecule index.
347 @type mol_index: int
348 @param res_index: The residue index.
349 @type res_index: int
350 @param atom_index: The atom index.
351 @type atom_index: int
352 """
353
354
355 pos = zeros(3, float64)
356
357
358 for model in self.model_loop():
359
360 mol = model.mol[mol_index]
361
362
363 if mol.mol_type != 'other':
364 res = mol.data.residues[res_index]
365 else:
366 res = mol.data[res_index]
367
368
369 atom = res[atom_index]
370
371
372 pos = pos + atom.position.array
373
374
375 pos = pos / len(self.structural_data)
376
377
378 return pos
379
380
381 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):
382 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'.
383
384 @keyword attached_atom: The name of the bonded atom.
385 @type attached_atom: str
386 @keyword model_num: The model of which to return the vectors from. If not supplied
387 and multiple models exist, then vectors from all models will be
388 returned.
389 @type model_num: None or int
390 @keyword mol_name: The name of the molecule that attached_atom belongs to.
391 @type mol_name: str
392 @keyword res_num: The number of the residue that attached_atom belongs to.
393 @type res_num: str
394 @keyword res_name: The name of the residue that attached_atom belongs to.
395 @type res_name: str
396 @keyword spin_num: The number of the spin that attached_atom is attached to.
397 @type spin_num: str
398 @keyword spin_name: The name of the spin that attached_atom is attached to.
399 @type spin_name: str
400 @keyword return_name: A flag which if True will cause the name of the attached atom to
401 be returned together with the bond vectors.
402 @type return_name: bool
403 @keyword return_warnings: A flag which if True will cause warning messages to be returned.
404 @type return_warnings: bool
405 @return: The list of bond vectors for each structure.
406 @rtype: list of numpy arrays
407 """
408
409
410 sel_obj = Selection(generate_spin_id(mol_name, res_num, res_name, spin_num, spin_name))
411
412
413 vectors = []
414 attached_name = None
415 warnings = None
416
417
418 for model in self.model_loop(model_num):
419
420 atom_found = False
421
422
423 for mol in model.mol:
424
425 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj):
426
427 for atom in res:
428
429 atom_num = atom.properties['serial_number']
430 atom_name = atom.name
431
432
433 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name):
434 continue
435
436
437 if atom_found:
438 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.")
439
440
441 atom_found = True
442 pos_match = atom.position.array
443 mol_type_match = mol.mol_type
444 res_match = res
445
446
447 if atom_found:
448
449 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match)
450
451
452 if (bonded_num, bonded_name, element) == (None, None, None):
453 continue
454
455
456 vector = pos - pos_match
457
458
459 vectors.append(array(vector, float64))
460
461
462 data = (vectors,)
463 if return_name:
464 data = data + (attached_name,)
465 if return_warnings:
466 data = data + (warnings,)
467
468
469 return data
470
471
472 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):
473 """Function for loading the structures from the PDB file.
474
475 @param file_path: The full path of the file.
476 @type file_path: str
477 @keyword read_mol: The molecule(s) to read from the file, independent of model. The
478 molecules are determined differently by the different parsers, but
479 are numbered consecutively from 1. If set to None, then all
480 molecules will be loaded.
481 @type read_mol: None, int, or list of int
482 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None,
483 then the molecules will be automatically labelled based on the file
484 name or other information.
485 @type set_mol_name: None, str, or list of str
486 @keyword read_model: The PDB model to extract from the file. If set to None, then all
487 models will be loaded.
488 @type read_model: None, int, or list of int
489 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then
490 the PDB model numbers will be preserved, if they exist.
491 @type set_model_num: None, int, or list of int
492 @keyword verbosity: A flag which if True will cause messages to be printed.
493 @type verbosity: bool
494 @return: The status of the loading of the PDB file.
495 @rtype: bool
496 """
497
498
499 if verbosity:
500 print("\nScientific Python PDB parser.")
501
502
503 if not access(file_path, F_OK):
504
505 return False
506
507
508 path, file = os.path.split(file_path)
509
510
511 if read_mol and not isinstance(read_mol, list):
512 read_mol = [read_mol]
513 if set_mol_name and not isinstance(set_mol_name, list):
514 set_mol_name = [set_mol_name]
515 if read_model and not isinstance(read_model, list):
516 read_model = [read_model]
517 if set_model_num and not isinstance(set_model_num, list):
518 set_model_num = [set_model_num]
519
520
521 model_flag = True
522 model_num = 1
523 model_load_num = 1
524 orig_model_num = []
525 mol_conts = []
526 while True:
527
528 if read_model:
529
530 if model_num > max(read_model):
531 break
532
533
534 if model_num not in read_model:
535
536 model_num = model_num + 1
537
538
539 continue
540
541
542 model = PDB.Structure(file_path, model_num)
543
544
545 if not len(model) and not len(mol_conts):
546
547 model = PDB.Structure(file_path)
548 model_flag = False
549
550
551 if not len(model):
552 raise RelaxPdbLoadError(file_path)
553
554
555 if not len(model):
556 del model
557 break
558
559
560 mol_conts.append([])
561 mol_index = 0
562 new_mol_name = []
563
564
565 mol_offset = 0
566 for i in range(len(self.structural_data)):
567 model_index = model_load_num - 1
568 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num):
569 mol_offset = len(self.structural_data[i].mol)
570
571
572 orig_model_num.append(model_num)
573
574
575 if verbosity:
576 print(model)
577
578
579 if hasattr(model, 'peptide_chains'):
580 for mol in model.peptide_chains:
581
582 if read_mol and mol_index+1 not in read_mol:
583 mol_index = mol_index + 1
584 continue
585
586 mol.mol_type = 'protein'
587 mol_conts[-1].append(MolContainer())
588 mol_conts[-1][-1].data = mol
589 mol_conts[-1][-1].mol_type = 'protein'
590 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
591 mol_index = mol_index + 1
592
593
594 if hasattr(model, 'nucleotide_chains'):
595 for mol in model.nucleotide_chains:
596
597 if read_mol and mol_index+1 not in read_mol:
598 mol_index = mol_index + 1
599 continue
600
601 mol_conts[-1].append(MolContainer())
602 mol_conts[-1][-1].data = mol
603 mol_conts[-1][-1].mol_type = 'nucleic acid'
604 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
605 mol_index = mol_index + 1
606
607
608 if hasattr(model, 'molecules'):
609 for key in list(model.molecules.keys()):
610
611 if read_mol and mol_index+1 not in read_mol:
612 mol_index = mol_index + 1
613 continue
614
615
616 mol_conts[-1].append(MolContainer())
617 mol_conts[-1][-1].mol_type = 'other'
618
619
620 mol_conts[-1][-1].data = []
621 for mol in model.molecules[key]:
622 mol_conts[-1][-1].data.append(mol)
623
624
625 if set_mol_name and mol_index >= len(set_mol_name):
626 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name))
627
628
629 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file)
630 mol_index = mol_index + 1
631
632
633 model_num = model_num + 1
634 model_load_num = model_load_num + 1
635
636
637 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=range(1, len(mol_conts[0])+1), set_mol_name=new_mol_name, file_name=file, file_path=path)
638
639
640 return True
641
642
644 """The empty list-type container for the non-protein and non-RNA molecular information."""
645
646
647 - def from_xml(self, mol_node, file_version=1):
648 """Recreate the MolContainer from the XML molecule node.
649
650 @param mol_node: The molecule XML node.
651 @type mol_node: xml.dom.minicompat.NodeList instance
652 @keyword file_version: The relax XML version of the XML file.
653 @type file_version: int
654 """
655
656
657 xml_to_object(mol_node, self, file_version=file_version)
658
659
660 self.reload_pdb()
661
662
664 """Reload the PDB from the original file."""
665
666
667 if self.file_path:
668 file_path = self.file_path + sep + self.file_name
669 else:
670 file_path = self.file_name
671
672
673 if not access(file_path, F_OK):
674 found = False
675
676
677 for dir in sys.path:
678 if access(dir + sep + file_path, F_OK):
679
680 file_path = dir + sep + file_path
681 found = True
682 break
683
684
685 if not found:
686 warn(RelaxNoPDBFileWarning(file_path))
687 return
688
689
690 model = PDB.Structure(file_path, self.file_model)
691
692
693 print(("\n" + repr(model)))
694
695
696 mol_num = 1
697
698
699 if hasattr(model, 'peptide_chains'):
700 for mol in model.peptide_chains:
701
702 if mol_num == self.file_mol_num:
703 self.data = mol
704 return
705
706 mol_num = mol_num + 1
707
708
709 if hasattr(model, 'nucleotide_chains'):
710 for mol in model.nucleotide_chains:
711
712 if mol_num == self.file_mol_num:
713 self.data = mol
714 return
715
716 mol_num = mol_num + 1
717
718
719 if hasattr(model, 'molecules'):
720 for key in list(model.molecules.keys()):
721
722 if mol_num == self.file_mol_num:
723
724 self.data = []
725 for mol in model.molecules[key]:
726 self.data.append(mol)
727 return
728
729 mol_num = mol_num + 1
730
731
732 - def to_xml(self, doc, element):
733 """Create XML elements for the contents of this molecule container.
734
735 @param doc: The XML document object.
736 @type doc: xml.dom.minidom.Document instance
737 @param element: The element to add the molecule XML elements to.
738 @type element: XML element object
739 """
740
741
742 mol_element = doc.createElement('mol_cont')
743 element.appendChild(mol_element)
744
745
746 mol_element.setAttribute('desc', 'Molecule container')
747 mol_element.setAttribute('name', str(self.mol_name))
748
749
750 fill_object_contents(doc, mol_element, object=self, blacklist=['data'] + list(self.__class__.__dict__.keys()))
751