1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24  """Module containing the Scientific Python PDB specific structural object class.""" 
 25   
 26   
 27  import dep_check 
 28   
 29   
 30  from math import sqrt 
 31  from numpy import array, dot, float64, zeros 
 32  import os 
 33  from os import F_OK, access, sep 
 34  from extern.scientific_python.IO import PDB 
 35  import sys 
 36  from warnings import warn 
 37   
 38   
 39  from api_base import Base_struct_API 
 40  from data.relax_xml import fill_object_contents, xml_to_object 
 41  from generic_fns import pipes, relax_re 
 42  from generic_fns.mol_res_spin import Selection, generate_spin_id, parse_token, tokenise 
 43  from relax_errors import RelaxError, RelaxPdbLoadError 
 44  from relax_io import file_root 
 45  from relax_warnings import RelaxWarning, RelaxNoAtomWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning 
 46   
 47   
 49      """The Scientific Python specific data object.""" 
 50   
 51       
 52      id = 'scientific' 
 53   
 59   
 60   
 62          """Find the atom named attached_atom directly bonded to the desired atom. 
 63   
 64          @param attached_atom:   The name of the attached atom to return. 
 65          @type attached_atom:    str 
 66          @param mol_type:        The type of the molecule.  This can be one of 'protein', 'nucleic acid', 
 67                                  or 'other'. 
 68          @type mol_type:         str 
 69          @param res:             The Scientific Python residue object. 
 70          @type res:              Scientific Python residue instance 
 71          @return:                A tuple of information about the bonded atom. 
 72          @rtype:                 tuple consisting of the atom number (int), atom name (str), element 
 73                                  name (str), and atomic position (Numeric array of len 3) 
 74          """ 
 75   
 76           
 77          bonded_found = False 
 78   
 79           
 80          matching_list = [] 
 81          for atom in list(res.atoms.keys()): 
 82              if relax_re.search(atom, attached_atom): 
 83                  matching_list.append(atom) 
 84          num_attached = len(matching_list) 
 85   
 86           
 87          if num_attached > 1: 
 88              return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list) 
 89   
 90           
 91          if num_attached == 0: 
 92              return None, None, None, None, None, "No attached atom could be found" 
 93   
 94           
 95          bonded = res[attached_atom] 
 96   
 97           
 98          bonded_num = bonded.properties['serial_number'] 
 99          bonded_name = bonded.name 
100          element = bonded.properties['element'] 
101          pos = bonded.position.array 
102          attached_name = matching_list[0] 
103   
104           
105          return bonded_num, bonded_name, element, pos, attached_name, None 
 106   
107   
109          """Generator function for looping over all residues in the Scientific PDB data objects. 
110   
111          @param mol:         The individual molecule Scientific Python PDB data object. 
112          @type mol:          Scientific Python PDB object 
113          @keyword sel_obj:   The selection object. 
114          @type sel_obj:      instance of generic_fns.mol_res_spin.Selection 
115          @return:            A tuple of the Scientific Python PDB object representing a single 
116                              residue, the residue number, and residue name. 
117          @rtype:             (Scientific Python PDB object, str, str) 
118          """ 
119   
120           
121          if mol.mol_type != 'other': 
122               
123              res_index = -1 
124              for res in mol.data.residues: 
125                   
126                  if mol.mol_type == 'nucleic acid': 
127                      res_name = res.name[-1] 
128                  else: 
129                      res_name = res.name 
130                  res_num = res.number 
131   
132                   
133                  res_index = res_index + 1 
134   
135                   
136                  if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name): 
137                      continue 
138   
139                   
140                  yield res, res_num, res_name, res_index 
141   
142           
143          else: 
144              res_index = -1 
145              for res in mol.data: 
146                   
147                  res_index = res_index + 1 
148   
149                   
150                  if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name): 
151                      continue 
152   
153                   
154                  yield res, res.number, res.name, res_index 
 155   
156   
157 -    def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False): 
 158          """Generator function for looping over all atoms in the Scientific Python data objects. 
159   
160          @keyword atom_id:           The molecule, residue, and atom identifier string.  Only atoms 
161                                      matching this selection will be yielded. 
162          @type atom_id:              str 
163          @keyword str_id:            The structure identifier.  This can be the file name, model 
164                                      number, or structure number.  If None, then all structures will 
165                                      be looped over. 
166          @type str_id:               str, int, or None 
167          @keyword model_num:         Only loop over a specific model. 
168          @type model_num:            int or None 
169          @keyword model_num_flag:    A flag which if True will cause the model number to be yielded. 
170          @type model_num_flag:       bool 
171          @keyword mol_name_flag:     A flag which if True will cause the molecule name to be yielded. 
172          @type mol_name_flag:        bool 
173          @keyword res_num_flag:      A flag which if True will cause the residue number to be 
174                                      yielded. 
175          @type res_num_flag:         bool 
176          @keyword res_name_flag:     A flag which if True will cause the residue name to be yielded. 
177          @type res_name_flag:        bool 
178          @keyword atom_num_flag:     A flag which if True will cause the atom number to be yielded. 
179          @type atom_num_flag:        bool 
180          @keyword atom_name_flag:    A flag which if True will cause the atom name to be yielded. 
181          @type atom_name_flag:       bool 
182          @keyword element_flag:      A flag which if True will cause the element name to be yielded. 
183          @type element_flag:         bool 
184          @keyword pos_flag:          A flag which if True will cause the atomic position to be 
185                                      yielded. 
186          @type pos_flag:             bool 
187                                      average atom properties across all loaded structures. 
188          @type ave:                  bool 
189          @return:                    A tuple of atomic information, as described in the docstring. 
190          @rtype:                     tuple consisting of optional molecule name (str), residue number 
191                                      (int), residue name (str), atom number (int), atom name(str), 
192                                      element name (str), and atomic position (array of len 3). 
193          """ 
194   
195           
196          sel_obj = Selection(atom_id) 
197   
198           
199          for model in self.model_loop(): 
200               
201              for mol_index in range(len(model.mol)): 
202                  mol = model.mol[mol_index] 
203   
204                   
205                  if sel_obj and not sel_obj.contains_mol(mol.mol_name): 
206                      continue 
207   
208                   
209                  for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 
210                       
211                      atom_index = -1 
212                      for atom in res: 
213                           
214                          atom_num = atom.properties['serial_number'] 
215                          atom_name = atom.name 
216                          element = atom.properties['element'] 
217                          atom_index = atom_index + 1 
218   
219                           
220                          if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 
221                              continue 
222   
223                           
224                          if ave: 
225                              pos = self.__ave_atom_pos(mol_index, res_index, atom_index) 
226                          else: 
227                              pos = atom.position.array 
228   
229                           
230                          mol_name = mol.mol_name 
231   
232                           
233                          if not mol.mol_name: 
234                              mol_name = None 
235                          if not res_num: 
236                              res_num = None 
237                          if not res_name: 
238                              res_name = None 
239                          if not atom_num: 
240                              atom_num = None 
241                          if not atom_name: 
242                              atom_name = None 
243   
244                           
245                          atomic_tuple = () 
246                          if model_num_flag: 
247                              if ave: 
248                                  atomic_tuple = atomic_tuple + (None,) 
249                              else: 
250                                  atomic_tuple = atomic_tuple + (model.num,) 
251                          if mol_name_flag: 
252                              atomic_tuple = atomic_tuple + (mol_name,) 
253                          if res_num_flag: 
254                              atomic_tuple = atomic_tuple + (res_num,) 
255                          if res_name_flag: 
256                              atomic_tuple = atomic_tuple + (res_name,) 
257                          if atom_num_flag: 
258                              atomic_tuple = atomic_tuple + (atom_num,) 
259                          if atom_name_flag: 
260                              atomic_tuple = atomic_tuple + (atom_name,) 
261                          if element_flag: 
262                              atomic_tuple = atomic_tuple + (element,) 
263                          if pos_flag: 
264                              atomic_tuple = atomic_tuple + (pos,) 
265   
266                           
267                          yield atomic_tuple 
268   
269               
270              if ave: 
271                  break 
 272   
273   
274 -    def attached_atom(self, atom_id=None, attached_atom=None, model=None): 
 275          """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'. 
276   
277          @keyword atom_id:       The molecule, residue, and atom identifier string.  This must 
278                                  correspond to a single atom in the system. 
279          @type atom_id:          str 
280          @keyword attached_atom: The name of the attached atom to return. 
281          @type attached_atom:    str 
282          @keyword model:         The model to return the positional information from.  If not 
283                                  supplied and multiple models exist, then the returned atomic 
284                                  position will be a list of the positions in each model. 
285          @type model:            None or int 
286          @return:                A tuple of information about the bonded atom. 
287          @rtype:                 tuple consisting of the atom number (int), atom name (str), element 
288                                  name (str), and atomic positions for each model (list of numpy 
289                                  arrays) 
290          """ 
291   
292           
293          sel_obj = Selection(atom_id) 
294   
295           
296          bonded_num = None 
297          bonded_name = None 
298          element = None 
299          pos_array = [] 
300   
301           
302          for model in self.model_loop(model): 
303               
304              atom_found = False 
305   
306               
307              for mol in model.mol: 
308                   
309                  for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 
310                       
311                      for atom in res: 
312                           
313                          atom_num = atom.properties['serial_number'] 
314                          atom_name = atom.name 
315   
316                           
317                          if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 
318                              continue 
319   
320                           
321                          if atom_found: 
322                              raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 
323   
324                           
325                          atom_found = True 
326                          atom_num_match = atom_num 
327                          atom_name_match = atom_name 
328                          mol_type_match = mol.mol_type 
329                          res_match = res 
330   
331               
332              if atom_found: 
333                   
334                  bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 
335   
336                   
337                  pos_array.append(array(pos, float64)) 
338   
339           
340          return bonded_num, bonded_name, element, pos_array 
 341   
342   
344          """Determine the average atom position across all models. 
345   
346          @param mol_index:   The molecule index. 
347          @type mol_index:    int 
348          @param res_index:   The residue index. 
349          @type res_index:    int 
350          @param atom_index:  The atom index. 
351          @type atom_index:   int 
352          """ 
353   
354           
355          pos = zeros(3, float64) 
356   
357           
358          for model in self.model_loop(): 
359               
360              mol = model.mol[mol_index] 
361   
362               
363              if mol.mol_type != 'other': 
364                  res = mol.data.residues[res_index] 
365              else: 
366                  res = mol.data[res_index] 
367   
368               
369              atom = res[atom_index] 
370   
371               
372              pos = pos + atom.position.array 
373   
374           
375          pos = pos / len(self.structural_data) 
376   
377           
378          return pos 
 379   
380   
381 -    def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False): 
 382          """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'. 
383   
384          @keyword attached_atom:     The name of the bonded atom. 
385          @type attached_atom:        str 
386          @keyword model_num:         The model of which to return the vectors from.  If not supplied 
387                                      and multiple models exist, then vectors from all models will be 
388                                      returned. 
389          @type model_num:            None or int 
390          @keyword mol_name:          The name of the molecule that attached_atom belongs to. 
391          @type mol_name:             str 
392          @keyword res_num:           The number of the residue that attached_atom belongs to. 
393          @type res_num:              str 
394          @keyword res_name:          The name of the residue that attached_atom belongs to. 
395          @type res_name:             str 
396          @keyword spin_num:          The number of the spin that attached_atom is attached to. 
397          @type spin_num:             str 
398          @keyword spin_name:         The name of the spin that attached_atom is attached to. 
399          @type spin_name:            str 
400          @keyword return_name:       A flag which if True will cause the name of the attached atom to 
401                                      be returned together with the bond vectors. 
402          @type return_name:          bool 
403          @keyword return_warnings:   A flag which if True will cause warning messages to be returned. 
404          @type return_warnings:      bool 
405          @return:                    The list of bond vectors for each structure. 
406          @rtype:                     list of numpy arrays 
407          """ 
408   
409           
410          sel_obj = Selection(generate_spin_id(mol_name, res_num, res_name, spin_num, spin_name)) 
411   
412           
413          vectors = [] 
414          attached_name = None 
415          warnings = None 
416   
417           
418          for model in self.model_loop(model_num): 
419               
420              atom_found = False 
421   
422               
423              for mol in model.mol: 
424                   
425                  for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 
426                       
427                      for atom in res: 
428                           
429                          atom_num = atom.properties['serial_number'] 
430                          atom_name = atom.name 
431   
432                           
433                          if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 
434                              continue 
435   
436                           
437                          if atom_found: 
438                              raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 
439   
440                           
441                          atom_found = True 
442                          pos_match = atom.position.array 
443                          mol_type_match = mol.mol_type 
444                          res_match = res 
445   
446               
447              if atom_found: 
448                   
449                  bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 
450   
451                   
452                  if (bonded_num, bonded_name, element) == (None, None, None): 
453                      continue 
454   
455                   
456                  vector = pos - pos_match 
457   
458                   
459                  vectors.append(array(vector, float64)) 
460   
461           
462          data = (vectors,) 
463          if return_name: 
464              data = data + (attached_name,) 
465          if return_warnings: 
466              data = data + (warnings,) 
467   
468           
469          return data 
 470   
471   
472 -    def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False): 
 473          """Function for loading the structures from the PDB file. 
474   
475          @param file_path:       The full path of the file. 
476          @type file_path:        str 
477          @keyword read_mol:      The molecule(s) to read from the file, independent of model.  The 
478                                  molecules are determined differently by the different parsers, but 
479                                  are numbered consecutively from 1.  If set to None, then all 
480                                  molecules will be loaded. 
481          @type read_mol:         None, int, or list of int 
482          @keyword set_mol_name:  Set the names of the molecules which are loaded.  If set to None, 
483                                  then the molecules will be automatically labelled based on the file 
484                                  name or other information. 
485          @type set_mol_name:     None, str, or list of str 
486          @keyword read_model:    The PDB model to extract from the file.  If set to None, then all 
487                                  models will be loaded. 
488          @type read_model:       None, int, or list of int 
489          @keyword set_model_num: Set the model number of the loaded molecule.  If set to None, then 
490                                  the PDB model numbers will be preserved, if they exist. 
491          @type set_model_num:    None, int, or list of int 
492          @keyword verbosity:     A flag which if True will cause messages to be printed. 
493          @type verbosity:        bool 
494          @return:                The status of the loading of the PDB file. 
495          @rtype:                 bool 
496          """ 
497   
498           
499          if verbosity: 
500              print("\nScientific Python PDB parser.") 
501   
502           
503          if not access(file_path, F_OK): 
504               
505              return False 
506   
507           
508          path, file = os.path.split(file_path) 
509   
510           
511          if read_mol and not isinstance(read_mol, list): 
512              read_mol = [read_mol] 
513          if set_mol_name and not isinstance(set_mol_name, list): 
514              set_mol_name = [set_mol_name] 
515          if read_model and not isinstance(read_model, list): 
516              read_model = [read_model] 
517          if set_model_num and not isinstance(set_model_num, list): 
518              set_model_num = [set_model_num] 
519   
520           
521          model_flag = True 
522          model_num = 1 
523          model_load_num = 1 
524          orig_model_num = [] 
525          mol_conts = [] 
526          while True: 
527               
528              if read_model: 
529                   
530                  if model_num > max(read_model): 
531                      break 
532   
533                   
534                  if model_num not in read_model: 
535                       
536                      model_num = model_num + 1 
537   
538                       
539                      continue 
540   
541               
542              model = PDB.Structure(file_path, model_num) 
543   
544               
545              if not len(model) and not len(mol_conts): 
546                   
547                  model = PDB.Structure(file_path) 
548                  model_flag = False 
549   
550                   
551                  if not len(model): 
552                      raise RelaxPdbLoadError(file_path) 
553   
554               
555              if not len(model): 
556                  del model 
557                  break 
558   
559               
560              mol_conts.append([]) 
561              mol_index = 0 
562              new_mol_name = [] 
563   
564               
565              mol_offset = 0 
566              for i in range(len(self.structural_data)): 
567                  model_index = model_load_num - 1 
568                  if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num): 
569                      mol_offset = len(self.structural_data[i].mol) 
570   
571               
572              orig_model_num.append(model_num) 
573   
574               
575              if verbosity: 
576                  print(model) 
577   
578               
579              if hasattr(model, 'peptide_chains'): 
580                  for mol in model.peptide_chains: 
581                       
582                      if read_mol and mol_index+1 not in read_mol: 
583                          mol_index = mol_index + 1 
584                          continue 
585   
586                      mol.mol_type = 'protein' 
587                      mol_conts[-1].append(MolContainer()) 
588                      mol_conts[-1][-1].data = mol 
589                      mol_conts[-1][-1].mol_type = 'protein' 
590                      self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 
591                      mol_index = mol_index + 1 
592   
593               
594              if hasattr(model, 'nucleotide_chains'): 
595                  for mol in model.nucleotide_chains: 
596                       
597                      if read_mol and mol_index+1 not in read_mol: 
598                          mol_index = mol_index + 1 
599                          continue 
600   
601                      mol_conts[-1].append(MolContainer()) 
602                      mol_conts[-1][-1].data = mol 
603                      mol_conts[-1][-1].mol_type = 'nucleic acid' 
604                      self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 
605                      mol_index = mol_index + 1 
606   
607               
608              if hasattr(model, 'molecules'): 
609                  for key in list(model.molecules.keys()): 
610                       
611                      if read_mol and mol_index+1 not in read_mol: 
612                          mol_index = mol_index + 1 
613                          continue 
614   
615                       
616                      mol_conts[-1].append(MolContainer()) 
617                      mol_conts[-1][-1].mol_type = 'other' 
618   
619                       
620                      mol_conts[-1][-1].data = [] 
621                      for mol in model.molecules[key]: 
622                          mol_conts[-1][-1].data.append(mol) 
623   
624                       
625                      if set_mol_name and mol_index >= len(set_mol_name): 
626                          raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name)) 
627   
628                       
629                      self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 
630                      mol_index = mol_index + 1 
631   
632               
633              model_num = model_num + 1 
634              model_load_num = model_load_num + 1 
635   
636           
637          self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=range(1, len(mol_conts[0])+1), set_mol_name=new_mol_name, file_name=file, file_path=path) 
638   
639           
640          return True 
  641   
642   
644      """The empty list-type container for the non-protein and non-RNA molecular information.""" 
645   
646   
648          """Recreate the MolContainer from the XML molecule node. 
649   
650          @param mol_node:    The molecule XML node. 
651          @type mol_node:     xml.dom.minicompat.NodeList instance 
652          """ 
653   
654           
655          xml_to_object(mol_node, self) 
656   
657           
658          self.reload_pdb() 
 659   
660   
662          """Reload the PDB from the original file.""" 
663   
664           
665          if self.file_path: 
666              file_path = self.file_path + sep + self.file_name 
667          else: 
668              file_path = self.file_name 
669   
670           
671          if not access(file_path, F_OK): 
672              found = False 
673   
674               
675              for dir in sys.path: 
676                  if access(dir + sep + file_path, F_OK): 
677                       
678                      file_path = dir + sep + file_path 
679                      found = True 
680                      break 
681   
682               
683              if not found: 
684                  warn(RelaxNoPDBFileWarning(file_path)) 
685                  return 
686   
687           
688          model = PDB.Structure(file_path, self.file_model) 
689   
690           
691          print(("\n" + repr(model))) 
692   
693           
694          mol_num = 1 
695   
696           
697          if hasattr(model, 'peptide_chains'): 
698              for mol in model.peptide_chains: 
699                   
700                  if mol_num == self.file_mol_num: 
701                      self.data = mol 
702                      return 
703   
704                  mol_num = mol_num + 1 
705   
706           
707          if hasattr(model, 'nucleotide_chains'): 
708              for mol in model.nucleotide_chains: 
709                   
710                  if mol_num == self.file_mol_num: 
711                      self.data = mol 
712                      return 
713   
714                  mol_num = mol_num + 1 
715   
716           
717          if hasattr(model, 'molecules'): 
718              for key in list(model.molecules.keys()): 
719                   
720                  if mol_num == self.file_mol_num: 
721                       
722                      self.data = [] 
723                      for mol in model.molecules[key]: 
724                          self.data.append(mol) 
725                      return 
726   
727                  mol_num = mol_num + 1 
 728   
729   
730 -    def to_xml(self, doc, element): 
 731          """Create XML elements for the contents of this molecule container. 
732   
733          @param doc:     The XML document object. 
734          @type doc:      xml.dom.minidom.Document instance 
735          @param element: The element to add the molecule XML elements to. 
736          @type element:  XML element object 
737          """ 
738   
739           
740          mol_element = doc.createElement('mol_cont') 
741          element.appendChild(mol_element) 
742   
743           
744          mol_element.setAttribute('desc', 'Molecule container') 
745          mol_element.setAttribute('name', str(self.mol_name)) 
746   
747           
748          fill_object_contents(doc, mol_element, object=self, blacklist=['data'] + list(self.__class__.__dict__.keys())) 
  749