generic_fns.structure.scientific

48 """The Scientific Python specific data object.""" 49 50 # Identification string. 51 id = 'scientific' 52

53 - def __init__(self):

54 """Initialise the class.""" 55 56 # Execute the base class __init__() method. 57 Base_struct_API.__init__(self)

58 59

60 - def __find_bonded_atom(self, attached_atom, mol_type, res):

61 """Find the atom named attached_atom directly bonded to the desired atom. 62 63 @param attached_atom: The name of the attached atom to return. 64 @type attached_atom: str 65 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid', 66 or 'other'. 67 @type mol_type: str 68 @param res: The Scientific Python residue object. 69 @type res: Scientific Python residue instance 70 @return: A tuple of information about the bonded atom. 71 @rtype: tuple consisting of the atom number (int), atom name (str), element 72 name (str), and atomic position (Numeric array of len 3) 73 """ 74 75 # Init. 76 bonded_found = False 77 78 # The find the attached atom in the residue (FIXME). 79 matching_list = [] 80 for atom in list(res.atoms.keys()): 81 if relax_re.search(atom, attached_atom): 82 matching_list.append(atom) 83 num_attached = len(matching_list) 84 85 # Problem. 86 if num_attached > 1: 87 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list) 88 89 # No attached atoms. 90 if num_attached == 0: 91 return None, None, None, None, None, "No attached atom could be found" 92 93 # The bonded atom object. 94 bonded = res[attached_atom] 95 96 # The bonded atom info. 97 bonded_num = bonded.properties['serial_number'] 98 bonded_name = bonded.name 99 element = bonded.properties['element'] 100 pos = bonded.position.array 101 attached_name = matching_list[0] 102 103 # Return the information. 104 return bonded_num, bonded_name, element, pos, attached_name, None

105 106

107 - def __residue_loop(self, mol, sel_obj=None):

108 """Generator function for looping over all residues in the Scientific PDB data objects. 109 110 @param mol: The individual molecule Scientific Python PDB data object. 111 @type mol: Scientific Python PDB object 112 @keyword sel_obj: The selection object. 113 @type sel_obj: instance of generic_fns.mol_res_spin.Selection 114 @return: A tuple of the Scientific Python PDB object representing a single 115 residue, the residue number, and residue name. 116 @rtype: (Scientific Python PDB object, str, str) 117 """ 118 119 # Proteins, RNA, and DNA. 120 if mol.mol_type != 'other': 121 # Loop over the residues of the protein in the PDB file. 122 res_index = -1 123 for res in mol.data.residues: 124 # Residue number and name. 125 if mol.mol_type == 'nucleic acid': 126 res_name = res.name[-1] 127 else: 128 res_name = res.name 129 res_num = res.number 130 131 # Residue index. 132 res_index = res_index + 1 133 134 # Skip non-matching residues. 135 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name): 136 continue 137 138 # Yield the residue info. 139 yield res, res_num, res_name, res_index 140 141 # Other molecules. 142 else: 143 res_index = -1 144 for res in mol.data: 145 # Residue index. 146 res_index = res_index + 1 147 148 # Skip non-matching residues. 149 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name): 150 continue 151 152 # Yield the residue info. 153 yield res, res.number, res.name, res_index

154 155

156 - def are_bonded(self, atom_id1=None, atom_id2=None):

157 """Determine if two atoms are directly bonded to each other. 158 159 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 160 @type atom_id1: str 161 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 162 @type atom_id2: str 163 @return: True if the atoms are directly bonded. 164 @rtype: bool 165 """ 166 167 # Loop over the atoms. 168 for pos1 in self.atom_loop(atom_id=atom_id1, pos_flag=True): 169 for pos2 in self.atom_loop(atom_id=atom_id2, pos_flag=True): 170 # The interatomic distance. 171 dist = linalg.norm(pos2-pos1) 172 173 # The atom is within the radius of 2.0 Angstrom. 174 if dist < 2.0: 175 return True 176 177 # Not bonded. 178 return False

179 180

181 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, index_flag=False, ave=False):

182 """Generator function for looping over all atoms in the Scientific Python data objects. 183 184 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded. 185 @type atom_id: str 186 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 187 @type str_id: str, int, or None 188 @keyword model_num: Only loop over a specific model. 189 @type model_num: int or None 190 @keyword model_num_flag: A flag which if True will cause the model number to be yielded. 191 @type model_num_flag: bool 192 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 193 @type mol_name_flag: bool 194 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 195 @type res_num_flag: bool 196 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 197 @type res_name_flag: bool 198 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 199 @type atom_num_flag: bool 200 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 201 @type atom_name_flag: bool 202 @keyword element_flag: A flag which if True will cause the element name to be yielded. 203 @type element_flag: bool 204 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 205 @type pos_flag: bool 206 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 207 @type index_flag: bool 208 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 209 @type ave: bool 210 @return: A tuple of atomic information, as described in the docstring. 211 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 212 """ 213 214 # Generate the selection object. 215 sel_obj = Selection(atom_id) 216 217 # Model loop. 218 for model in self.model_loop(): 219 # Loop over the molecules. 220 for mol_index in range(len(model.mol)): 221 mol = model.mol[mol_index] 222 223 # Skip non-matching molecules. 224 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 225 continue 226 227 # Loop over the residues. 228 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 229 # Loop over the atoms of the residue. 230 atom_index = -1 231 for atom in res: 232 # Atom number, name, index, and element type. 233 atom_num = atom.properties['serial_number'] 234 atom_name = atom.name 235 element = atom.properties['element'] 236 atom_index = atom_index + 1 237 238 # Skip non-matching atoms. 239 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 240 continue 241 242 # The atom position. 243 if ave: 244 pos = self.__ave_atom_pos(mol_index, res_index, atom_index) 245 else: 246 pos = atom.position.array 247 248 # The molecule name. 249 mol_name = mol.mol_name 250 251 # Replace empty variables with None. 252 if not mol.mol_name: 253 mol_name = None 254 if not res_num: 255 res_num = None 256 if not res_name: 257 res_name = None 258 if not atom_num: 259 atom_num = None 260 if not atom_name: 261 atom_name = None 262 263 # Build the tuple to be yielded. 264 atomic_tuple = () 265 if model_num_flag: 266 if ave: 267 atomic_tuple = atomic_tuple + (None,) 268 else: 269 atomic_tuple = atomic_tuple + (model.num,) 270 if mol_name_flag: 271 atomic_tuple = atomic_tuple + (mol_name,) 272 if res_num_flag: 273 atomic_tuple = atomic_tuple + (res_num,) 274 if res_name_flag: 275 atomic_tuple = atomic_tuple + (res_name,) 276 if atom_num_flag: 277 atomic_tuple = atomic_tuple + (atom_num,) 278 if atom_name_flag: 279 atomic_tuple = atomic_tuple + (atom_name,) 280 if element_flag: 281 atomic_tuple = atomic_tuple + (element,) 282 if pos_flag: 283 atomic_tuple = atomic_tuple + (pos,) 284 if index_flag: 285 atomic_tuple += (atom_index,) 286 287 # Yield the information. 288 if len(atomic_tuple) == 1: 289 atomic_tuple = atomic_tuple[0] 290 yield atomic_tuple 291 292 # Break out of the loop if the ave flag is set, as data from only one model is used. 293 if ave: 294 break

295 296

297 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):

298 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'. 299 300 @keyword atom_id: The molecule, residue, and atom identifier string. This must 301 correspond to a single atom in the system. 302 @type atom_id: str 303 @keyword attached_atom: The name of the attached atom to return. 304 @type attached_atom: str 305 @keyword model: The model to return the positional information from. If not 306 supplied and multiple models exist, then the returned atomic 307 position will be a list of the positions in each model. 308 @type model: None or int 309 @return: A tuple of information about the bonded atom. 310 @rtype: tuple consisting of the atom number (int), atom name (str), element 311 name (str), and atomic positions for each model (list of numpy 312 arrays) 313 """ 314 315 # Generate the selection object. 316 sel_obj = Selection(atom_id) 317 318 # Initialise some objects. 319 bonded_num = None 320 bonded_name = None 321 element = None 322 pos_array = [] 323 324 # Loop over the models. 325 for model in self.model_loop(model): 326 # Init. 327 atom_found = False 328 329 # Loop over each individual molecule. 330 for mol in model.mol: 331 # Loop over the residues of the protein in the PDB file. 332 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 333 # Loop over the atoms of the residue. 334 for atom in res: 335 # Atom number, name, and position. 336 atom_num = atom.properties['serial_number'] 337 atom_name = atom.name 338 339 # Skip non-matching atoms. 340 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 341 continue 342 343 # More than one matching atom! 344 if atom_found: 345 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 346 347 # The atom has been found, so store some info. 348 atom_found = True 349 atom_num_match = atom_num 350 atom_name_match = atom_name 351 mol_type_match = mol.mol_type 352 res_match = res 353 354 # Found the atom. 355 if atom_found: 356 # Find the atom bonded to this model/molecule/residue/atom. 357 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 358 359 # Append the position to the position array (converting from a Numeric array to a numpy array). 360 pos_array.append(array(pos, float64)) 361 362 # Return the atom info. 363 return bonded_num, bonded_name, element, pos_array

364 365

366 - def __ave_atom_pos(self, mol_index, res_index, atom_index):

367 """Determine the average atom position across all models. 368 369 @param mol_index: The molecule index. 370 @type mol_index: int 371 @param res_index: The residue index. 372 @type res_index: int 373 @param atom_index: The atom index. 374 @type atom_index: int 375 """ 376 377 # Init. 378 pos = zeros(3, float64) 379 380 # Loop over the models. 381 for model in self.model_loop(): 382 # The exact molecule. 383 mol = model.mol[mol_index] 384 385 # The residue. 386 if mol.mol_type != 'other': 387 res = mol.data.residues[res_index] 388 else: 389 res = mol.data[res_index] 390 391 # The atom. 392 atom = res[atom_index] 393 394 # Sum the position. 395 pos = pos + atom.position.array 396 397 # Average the position array. 398 pos = pos / len(self.structural_data) 399 400 # Return the ave position. 401 return pos

402 403

404 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

405 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'. 406 407 @keyword attached_atom: The name of the bonded atom. 408 @type attached_atom: str 409 @keyword model_num: The model of which to return the vectors from. If not supplied 410 and multiple models exist, then vectors from all models will be 411 returned. 412 @type model_num: None or int 413 @keyword mol_name: The name of the molecule that attached_atom belongs to. 414 @type mol_name: str 415 @keyword res_num: The number of the residue that attached_atom belongs to. 416 @type res_num: str 417 @keyword res_name: The name of the residue that attached_atom belongs to. 418 @type res_name: str 419 @keyword spin_num: The number of the spin that attached_atom is attached to. 420 @type spin_num: str 421 @keyword spin_name: The name of the spin that attached_atom is attached to. 422 @type spin_name: str 423 @keyword return_name: A flag which if True will cause the name of the attached atom to 424 be returned together with the bond vectors. 425 @type return_name: bool 426 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 427 @type return_warnings: bool 428 @return: The list of bond vectors for each structure. 429 @rtype: list of numpy arrays 430 """ 431 432 # Generate the selection object. 433 sel_obj = Selection(generate_spin_id(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=spin_num, spin_name=spin_name)) 434 435 # Initialise some objects. 436 vectors = [] 437 attached_name = None 438 warnings = None 439 440 # Loop over the models. 441 for model in self.model_loop(model_num): 442 # Init. 443 atom_found = False 444 445 # Loop over each individual molecule. 446 for mol in model.mol: 447 # Loop over the residues of the protein in the PDB file. 448 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 449 # Loop over the atoms of the residue. 450 for atom in res: 451 # Atom number, name, and position. 452 atom_num = atom.properties['serial_number'] 453 atom_name = atom.name 454 455 # Skip non-matching atoms. 456 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 457 continue 458 459 # More than one matching atom! 460 if atom_found: 461 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 462 463 # The atom has been found, so store some info. 464 atom_found = True 465 pos_match = atom.position.array 466 mol_type_match = mol.mol_type 467 res_match = res 468 469 # Found the atom. 470 if atom_found: 471 # Find the atom bonded to this structure/molecule/residue/atom. 472 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 473 474 # No bonded atom. 475 if (bonded_num, bonded_name, element) == (None, None, None): 476 continue 477 478 # The bond vector. 479 vector = pos - pos_match 480 481 # Append the vector to the vectors array (converting from a Numeric array to a numpy array). 482 vectors.append(array(vector, float64)) 483 484 # Build the tuple to be yielded. 485 data = (vectors,) 486 if return_name: 487 data = data + (attached_name,) 488 if return_warnings: 489 data = data + (warnings,) 490 491 # Return the data. 492 return data

493 494

495 - def get_molecule(self, molecule, model=None):

496 """Return the molecule. 497 498 Only one model can be specified. 499 500 501 @param molecule: The molecule name. 502 @type molecule: int or None 503 @keyword model: The model number. 504 @type model: int or None 505 @raises RelaxError: If the model is not specified and there is more than one model loaded. 506 @return: The MolContainer corresponding to the molecule name and model number. 507 @rtype: MolContainer instance or None 508 """ 509 510 # Check if the target is a single molecule. 511 if model == None and self.num_models() > 1: 512 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 513 514 # Check the model argument. 515 if not isinstance(model, int) and not model == None: 516 raise RelaxNoneIntError 517 518 # No models. 519 if not len(self.structural_data): 520 return 521 522 # Loop over the models. 523 for model_cont in self.model_loop(model): 524 # Loop over the molecules. 525 for mol in model_cont.mol: 526 # Return the matching molecule. 527 if mol.mol_name == molecule: 528 return mol

529 530

531 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):

532 """Function for loading the structures from the PDB file. 533 534 @param file_path: The full path of the file. 535 @type file_path: str 536 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 537 @type read_mol: None, int, or list of int 538 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 539 @type set_mol_name: None, str, or list of str 540 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 541 @type read_model: None, int, or list of int 542 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 543 @type set_model_num: None, int, or list of int 544 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 545 @type alt_loc: str or None 546 @keyword verbosity: A flag which if True will cause messages to be printed. 547 @type verbosity: bool 548 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 549 @type merge: bool 550 @return: The status of the loading of the PDB file. 551 @rtype: bool 552 """ 553 554 # Initial printout. 555 if verbosity: 556 print("\nScientific Python PDB parser.") 557 558 # Test if the file exists. 559 if not access(file_path, F_OK): 560 # Exit indicating failure. 561 return False 562 563 # Alt loc warning. 564 if alt_loc: 565 warn(RelaxWarning("The alt_loc argument will be ignored for the ScientificPython PDB parser.")) 566 567 # Separate the file name and path. 568 path, file = os.path.split(file_path) 569 570 # Convert the structure reading args into lists. 571 if read_mol and not isinstance(read_mol, list): 572 read_mol = [read_mol] 573 if set_mol_name and not isinstance(set_mol_name, list): 574 set_mol_name = [set_mol_name] 575 if read_model and not isinstance(read_model, list): 576 read_model = [read_model] 577 if set_model_num and not isinstance(set_model_num, list): 578 set_model_num = [set_model_num] 579 580 # Load all models. 581 model_flag = True 582 model_num = 1 583 model_load_num = 1 584 orig_model_num = [] 585 mol_conts = [] 586 while True: 587 # Only load the desired model. 588 if read_model: 589 # No more models to read. 590 if model_num > max(read_model): 591 break 592 593 # Skip the model if not in the list. 594 if model_num not in read_model: 595 # Increment the model counter. 596 model_num = model_num + 1 597 598 # Jump to the next model. 599 continue 600 601 # Load the PDB file. 602 model = PDB.Structure(file_path, model_num) 603 604 # No model 1. 605 if not len(model) and not len(mol_conts): 606 # Load the PDB without a model number. 607 model = PDB.Structure(file_path) 608 model_flag = False 609 610 # Ok, nothing is loadable from this file. 611 if not len(model): 612 raise RelaxPdbLoadError(file_path) 613 614 # Test if the last structure has been reached. 615 if not len(model): 616 del model 617 break 618 619 # Append an empty list to the molecule container structure for a new model, set the molecule index, and initialise the target name structure. 620 mol_conts.append([]) 621 mol_index = 0 622 new_mol_name = [] 623 624 # Set the target molecule number offset (if molecules already exist). 625 mol_offset = 0 626 for i in range(len(self.structural_data)): 627 model_index = model_load_num - 1 628 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num): 629 mol_offset = len(self.structural_data[i].mol) 630 631 # Store the original model number. 632 orig_model_num.append(model_num) 633 634 # Print the PDB info. 635 if verbosity: 636 print(model) 637 638 # First add the peptide chains (generating the molecule names and incrementing the molecule index). 639 if hasattr(model, 'peptide_chains'): 640 for mol in model.peptide_chains: 641 # Only read the required molecule. 642 if read_mol and mol_index+1 not in read_mol: 643 mol_index = mol_index + 1 644 continue 645 646 mol.mol_type = 'protein' 647 mol_conts[-1].append(MolContainer()) 648 mol_conts[-1][-1].data = mol 649 mol_conts[-1][-1].mol_type = 'protein' 650 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 651 mol_index = mol_index + 1 652 653 # Then the nucleotide chains (generating the molecule names and incrementing the molecule index). 654 if hasattr(model, 'nucleotide_chains'): 655 for mol in model.nucleotide_chains: 656 # Only read the required molecule. 657 if read_mol and mol_index+1 not in read_mol: 658 mol_index = mol_index + 1 659 continue 660 661 mol_conts[-1].append(MolContainer()) 662 mol_conts[-1][-1].data = mol 663 mol_conts[-1][-1].mol_type = 'nucleic acid' 664 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 665 mol_index = mol_index + 1 666 667 # Finally all other molecules (generating the molecule names and incrementing the molecule index). 668 if hasattr(model, 'molecules'): 669 for key in list(model.molecules.keys()): 670 # Only read the required molecule. 671 if read_mol and mol_index+1 not in read_mol: 672 mol_index = mol_index + 1 673 continue 674 675 # Add an empty list-type container. 676 mol_conts[-1].append(MolContainer()) 677 mol_conts[-1][-1].mol_type = 'other' 678 679 # Loop over the molecules. 680 mol_conts[-1][-1].data = [] 681 for mol in model.molecules[key]: 682 mol_conts[-1][-1].data.append(mol) 683 684 # Check. 685 if set_mol_name and mol_index >= len(set_mol_name): 686 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name)) 687 688 # Update structures. 689 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 690 mol_index = mol_index + 1 691 692 # Increment the model counters. 693 model_num = model_num + 1 694 model_load_num = model_load_num + 1 695 696 # Create the structural data data structures. 697 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=list(range(1, len(mol_conts[0])+1)), set_mol_name=new_mol_name, file_name=file, file_path=path, merge=merge) 698 699 # Loading worked. 700 return True

Source Code for Module generic_fns.structure.scientific