generic_fns.structure.scientific

48 """The Scientific Python specific data object.""" 49 50 # Identification string. 51 id = 'scientific' 52

53 - def __init__(self):

54 """Initialise the class.""" 55 56 # Execute the base class __init__() method. 57 Base_struct_API.__init__(self)

58 59

60 - def __find_bonded_atom(self, attached_atom, mol_type, res):

61 """Find the atom named attached_atom directly bonded to the desired atom. 62 63 @param attached_atom: The name of the attached atom to return. 64 @type attached_atom: str 65 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid', 66 or 'other'. 67 @type mol_type: str 68 @param res: The Scientific Python residue object. 69 @type res: Scientific Python residue instance 70 @return: A tuple of information about the bonded atom. 71 @rtype: tuple consisting of the atom number (int), atom name (str), element 72 name (str), and atomic position (Numeric array of len 3) 73 """ 74 75 # Init. 76 bonded_found = False 77 78 # The find the attached atom in the residue (FIXME). 79 matching_list = [] 80 for atom in list(res.atoms.keys()): 81 if relax_re.search(atom, attached_atom): 82 matching_list.append(atom) 83 num_attached = len(matching_list) 84 85 # Problem. 86 if num_attached > 1: 87 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list) 88 89 # No attached atoms. 90 if num_attached == 0: 91 return None, None, None, None, None, "No attached atom could be found" 92 93 # The bonded atom object. 94 bonded = res[attached_atom] 95 96 # The bonded atom info. 97 bonded_num = bonded.properties['serial_number'] 98 bonded_name = bonded.name 99 element = bonded.properties['element'] 100 pos = bonded.position.array 101 attached_name = matching_list[0] 102 103 # Return the information. 104 return bonded_num, bonded_name, element, pos, attached_name, None

105 106

107 - def __residue_loop(self, mol, sel_obj=None):

108 """Generator function for looping over all residues in the Scientific PDB data objects. 109 110 @param mol: The individual molecule Scientific Python PDB data object. 111 @type mol: Scientific Python PDB object 112 @keyword sel_obj: The selection object. 113 @type sel_obj: instance of generic_fns.mol_res_spin.Selection 114 @return: A tuple of the Scientific Python PDB object representing a single 115 residue, the residue number, and residue name. 116 @rtype: (Scientific Python PDB object, str, str) 117 """ 118 119 # Proteins, RNA, and DNA. 120 if mol.mol_type != 'other': 121 # Loop over the residues of the protein in the PDB file. 122 res_index = -1 123 for res in mol.data.residues: 124 # Residue number and name. 125 if mol.mol_type == 'nucleic acid': 126 res_name = res.name[-1] 127 else: 128 res_name = res.name 129 res_num = res.number 130 131 # Residue index. 132 res_index = res_index + 1 133 134 # Skip non-matching residues. 135 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name): 136 continue 137 138 # Yield the residue info. 139 yield res, res_num, res_name, res_index 140 141 # Other molecules. 142 else: 143 res_index = -1 144 for res in mol.data: 145 # Residue index. 146 res_index = res_index + 1 147 148 # Skip non-matching residues. 149 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name): 150 continue 151 152 # Yield the residue info. 153 yield res, res.number, res.name, res_index

154 155

156 - def are_bonded(self, atom_id1=None, atom_id2=None):

157 """Determine if two atoms are directly bonded to each other. 158 159 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 160 @type atom_id1: str 161 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 162 @type atom_id2: str 163 @return: True if the atoms are directly bonded. 164 @rtype: bool 165 """ 166 167 # Loop over the atoms. 168 for pos1 in self.atom_loop(atom_id=atom_id1, pos_flag=True): 169 for pos2 in self.atom_loop(atom_id=atom_id2, pos_flag=True): 170 # The interatomic distance. 171 dist = linalg.norm(pos2[0]-pos1[0]) 172 173 # The atom is within the radius of 2.0 Angstrom. 174 if dist < 2.0: 175 return True 176 177 # Not bonded. 178 return False

179 180

181 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False):

182 """Generator function for looping over all atoms in the Scientific Python data objects. 183 184 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms 185 matching this selection will be yielded. 186 @type atom_id: str 187 @keyword str_id: The structure identifier. This can be the file name, model 188 number, or structure number. If None, then all structures will 189 be looped over. 190 @type str_id: str, int, or None 191 @keyword model_num: Only loop over a specific model. 192 @type model_num: int or None 193 @keyword model_num_flag: A flag which if True will cause the model number to be yielded. 194 @type model_num_flag: bool 195 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 196 @type mol_name_flag: bool 197 @keyword res_num_flag: A flag which if True will cause the residue number to be 198 yielded. 199 @type res_num_flag: bool 200 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 201 @type res_name_flag: bool 202 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 203 @type atom_num_flag: bool 204 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 205 @type atom_name_flag: bool 206 @keyword element_flag: A flag which if True will cause the element name to be yielded. 207 @type element_flag: bool 208 @keyword pos_flag: A flag which if True will cause the atomic position to be 209 yielded. 210 @type pos_flag: bool 211 average atom properties across all loaded structures. 212 @type ave: bool 213 @return: A tuple of atomic information, as described in the docstring. 214 @rtype: tuple consisting of optional molecule name (str), residue number 215 (int), residue name (str), atom number (int), atom name(str), 216 element name (str), and atomic position (array of len 3). 217 """ 218 219 # Generate the selection object. 220 sel_obj = Selection(atom_id) 221 222 # Model loop. 223 for model in self.model_loop(): 224 # Loop over the molecules. 225 for mol_index in range(len(model.mol)): 226 mol = model.mol[mol_index] 227 228 # Skip non-matching molecules. 229 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 230 continue 231 232 # Loop over the residues. 233 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 234 # Loop over the atoms of the residue. 235 atom_index = -1 236 for atom in res: 237 # Atom number, name, index, and element type. 238 atom_num = atom.properties['serial_number'] 239 atom_name = atom.name 240 element = atom.properties['element'] 241 atom_index = atom_index + 1 242 243 # Skip non-matching atoms. 244 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 245 continue 246 247 # The atom position. 248 if ave: 249 pos = self.__ave_atom_pos(mol_index, res_index, atom_index) 250 else: 251 pos = atom.position.array 252 253 # The molecule name. 254 mol_name = mol.mol_name 255 256 # Replace empty variables with None. 257 if not mol.mol_name: 258 mol_name = None 259 if not res_num: 260 res_num = None 261 if not res_name: 262 res_name = None 263 if not atom_num: 264 atom_num = None 265 if not atom_name: 266 atom_name = None 267 268 # Build the tuple to be yielded. 269 atomic_tuple = () 270 if model_num_flag: 271 if ave: 272 atomic_tuple = atomic_tuple + (None,) 273 else: 274 atomic_tuple = atomic_tuple + (model.num,) 275 if mol_name_flag: 276 atomic_tuple = atomic_tuple + (mol_name,) 277 if res_num_flag: 278 atomic_tuple = atomic_tuple + (res_num,) 279 if res_name_flag: 280 atomic_tuple = atomic_tuple + (res_name,) 281 if atom_num_flag: 282 atomic_tuple = atomic_tuple + (atom_num,) 283 if atom_name_flag: 284 atomic_tuple = atomic_tuple + (atom_name,) 285 if element_flag: 286 atomic_tuple = atomic_tuple + (element,) 287 if pos_flag: 288 atomic_tuple = atomic_tuple + (pos,) 289 290 # Yield the information. 291 yield atomic_tuple 292 293 # Break out of the loop if the ave flag is set, as data from only one model is used. 294 if ave: 295 break

296 297

298 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):

299 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'. 300 301 @keyword atom_id: The molecule, residue, and atom identifier string. This must 302 correspond to a single atom in the system. 303 @type atom_id: str 304 @keyword attached_atom: The name of the attached atom to return. 305 @type attached_atom: str 306 @keyword model: The model to return the positional information from. If not 307 supplied and multiple models exist, then the returned atomic 308 position will be a list of the positions in each model. 309 @type model: None or int 310 @return: A tuple of information about the bonded atom. 311 @rtype: tuple consisting of the atom number (int), atom name (str), element 312 name (str), and atomic positions for each model (list of numpy 313 arrays) 314 """ 315 316 # Generate the selection object. 317 sel_obj = Selection(atom_id) 318 319 # Initialise some objects. 320 bonded_num = None 321 bonded_name = None 322 element = None 323 pos_array = [] 324 325 # Loop over the models. 326 for model in self.model_loop(model): 327 # Init. 328 atom_found = False 329 330 # Loop over each individual molecule. 331 for mol in model.mol: 332 # Loop over the residues of the protein in the PDB file. 333 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 334 # Loop over the atoms of the residue. 335 for atom in res: 336 # Atom number, name, and position. 337 atom_num = atom.properties['serial_number'] 338 atom_name = atom.name 339 340 # Skip non-matching atoms. 341 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 342 continue 343 344 # More than one matching atom! 345 if atom_found: 346 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 347 348 # The atom has been found, so store some info. 349 atom_found = True 350 atom_num_match = atom_num 351 atom_name_match = atom_name 352 mol_type_match = mol.mol_type 353 res_match = res 354 355 # Found the atom. 356 if atom_found: 357 # Find the atom bonded to this model/molecule/residue/atom. 358 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 359 360 # Append the position to the position array (converting from a Numeric array to a numpy array). 361 pos_array.append(array(pos, float64)) 362 363 # Return the atom info. 364 return bonded_num, bonded_name, element, pos_array

365 366

367 - def __ave_atom_pos(self, mol_index, res_index, atom_index):

368 """Determine the average atom position across all models. 369 370 @param mol_index: The molecule index. 371 @type mol_index: int 372 @param res_index: The residue index. 373 @type res_index: int 374 @param atom_index: The atom index. 375 @type atom_index: int 376 """ 377 378 # Init. 379 pos = zeros(3, float64) 380 381 # Loop over the models. 382 for model in self.model_loop(): 383 # The exact molecule. 384 mol = model.mol[mol_index] 385 386 # The residue. 387 if mol.mol_type != 'other': 388 res = mol.data.residues[res_index] 389 else: 390 res = mol.data[res_index] 391 392 # The atom. 393 atom = res[atom_index] 394 395 # Sum the position. 396 pos = pos + atom.position.array 397 398 # Average the position array. 399 pos = pos / len(self.structural_data) 400 401 # Return the ave position. 402 return pos

403 404

405 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

406 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'. 407 408 @keyword attached_atom: The name of the bonded atom. 409 @type attached_atom: str 410 @keyword model_num: The model of which to return the vectors from. If not supplied 411 and multiple models exist, then vectors from all models will be 412 returned. 413 @type model_num: None or int 414 @keyword mol_name: The name of the molecule that attached_atom belongs to. 415 @type mol_name: str 416 @keyword res_num: The number of the residue that attached_atom belongs to. 417 @type res_num: str 418 @keyword res_name: The name of the residue that attached_atom belongs to. 419 @type res_name: str 420 @keyword spin_num: The number of the spin that attached_atom is attached to. 421 @type spin_num: str 422 @keyword spin_name: The name of the spin that attached_atom is attached to. 423 @type spin_name: str 424 @keyword return_name: A flag which if True will cause the name of the attached atom to 425 be returned together with the bond vectors. 426 @type return_name: bool 427 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 428 @type return_warnings: bool 429 @return: The list of bond vectors for each structure. 430 @rtype: list of numpy arrays 431 """ 432 433 # Generate the selection object. 434 sel_obj = Selection(generate_spin_id(mol_name, res_num, res_name, spin_num, spin_name)) 435 436 # Initialise some objects. 437 vectors = [] 438 attached_name = None 439 warnings = None 440 441 # Loop over the models. 442 for model in self.model_loop(model_num): 443 # Init. 444 atom_found = False 445 446 # Loop over each individual molecule. 447 for mol in model.mol: 448 # Loop over the residues of the protein in the PDB file. 449 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 450 # Loop over the atoms of the residue. 451 for atom in res: 452 # Atom number, name, and position. 453 atom_num = atom.properties['serial_number'] 454 atom_name = atom.name 455 456 # Skip non-matching atoms. 457 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 458 continue 459 460 # More than one matching atom! 461 if atom_found: 462 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 463 464 # The atom has been found, so store some info. 465 atom_found = True 466 pos_match = atom.position.array 467 mol_type_match = mol.mol_type 468 res_match = res 469 470 # Found the atom. 471 if atom_found: 472 # Find the atom bonded to this structure/molecule/residue/atom. 473 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 474 475 # No bonded atom. 476 if (bonded_num, bonded_name, element) == (None, None, None): 477 continue 478 479 # The bond vector. 480 vector = pos - pos_match 481 482 # Append the vector to the vectors array (converting from a Numeric array to a numpy array). 483 vectors.append(array(vector, float64)) 484 485 # Build the tuple to be yielded. 486 data = (vectors,) 487 if return_name: 488 data = data + (attached_name,) 489 if return_warnings: 490 data = data + (warnings,) 491 492 # Return the data. 493 return data

494 495

496 - def get_molecule(self, molecule, model=None):

497 """Return the molecule. 498 499 Only one model can be specified. 500 501 502 @param molecule: The molecule name. 503 @type molecule: int or None 504 @keyword model: The model number. 505 @type model: int or None 506 @raises RelaxError: If the model is not specified and there is more than one model loaded. 507 @return: The MolContainer corresponding to the molecule name and model number. 508 @rtype: MolContainer instance or None 509 """ 510 511 # Check if the target is a single molecule. 512 if model == None and self.num_models() > 1: 513 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 514 515 # Check the model argument. 516 if not isinstance(model, int) and not model == None: 517 raise RelaxNoneIntError 518 519 # No models. 520 if not len(self.structural_data): 521 return 522 523 # Loop over the models. 524 for model_cont in self.model_loop(model): 525 # Loop over the molecules. 526 for mol in model_cont.mol: 527 # Return the matching molecule. 528 if mol.mol_name == molecule: 529 return mol

530 531

532 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

533 """Function for loading the structures from the PDB file. 534 535 @param file_path: The full path of the file. 536 @type file_path: str 537 @keyword read_mol: The molecule(s) to read from the file, independent of model. The 538 molecules are determined differently by the different parsers, but 539 are numbered consecutively from 1. If set to None, then all 540 molecules will be loaded. 541 @type read_mol: None, int, or list of int 542 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, 543 then the molecules will be automatically labelled based on the file 544 name or other information. 545 @type set_mol_name: None, str, or list of str 546 @keyword read_model: The PDB model to extract from the file. If set to None, then all 547 models will be loaded. 548 @type read_model: None, int, or list of int 549 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then 550 the PDB model numbers will be preserved, if they exist. 551 @type set_model_num: None, int, or list of int 552 @keyword verbosity: A flag which if True will cause messages to be printed. 553 @type verbosity: bool 554 @return: The status of the loading of the PDB file. 555 @rtype: bool 556 """ 557 558 # Initial printout. 559 if verbosity: 560 print("\nScientific Python PDB parser.") 561 562 # Test if the file exists. 563 if not access(file_path, F_OK): 564 # Exit indicating failure. 565 return False 566 567 # Separate the file name and path. 568 path, file = os.path.split(file_path) 569 570 # Convert the structure reading args into lists. 571 if read_mol and not isinstance(read_mol, list): 572 read_mol = [read_mol] 573 if set_mol_name and not isinstance(set_mol_name, list): 574 set_mol_name = [set_mol_name] 575 if read_model and not isinstance(read_model, list): 576 read_model = [read_model] 577 if set_model_num and not isinstance(set_model_num, list): 578 set_model_num = [set_model_num] 579 580 # Load all models. 581 model_flag = True 582 model_num = 1 583 model_load_num = 1 584 orig_model_num = [] 585 mol_conts = [] 586 while True: 587 # Only load the desired model. 588 if read_model: 589 # No more models to read. 590 if model_num > max(read_model): 591 break 592 593 # Skip the model if not in the list. 594 if model_num not in read_model: 595 # Increment the model counter. 596 model_num = model_num + 1 597 598 # Jump to the next model. 599 continue 600 601 # Load the PDB file. 602 model = PDB.Structure(file_path, model_num) 603 604 # No model 1. 605 if not len(model) and not len(mol_conts): 606 # Load the PDB without a model number. 607 model = PDB.Structure(file_path) 608 model_flag = False 609 610 # Ok, nothing is loadable from this file. 611 if not len(model): 612 raise RelaxPdbLoadError(file_path) 613 614 # Test if the last structure has been reached. 615 if not len(model): 616 del model 617 break 618 619 # Append an empty list to the molecule container structure for a new model, set the molecule index, and initialise the target name structure. 620 mol_conts.append([]) 621 mol_index = 0 622 new_mol_name = [] 623 624 # Set the target molecule number offset (if molecules already exist). 625 mol_offset = 0 626 for i in range(len(self.structural_data)): 627 model_index = model_load_num - 1 628 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num): 629 mol_offset = len(self.structural_data[i].mol) 630 631 # Store the original model number. 632 orig_model_num.append(model_num) 633 634 # Print the PDB info. 635 if verbosity: 636 print(model) 637 638 # First add the peptide chains (generating the molecule names and incrementing the molecule index). 639 if hasattr(model, 'peptide_chains'): 640 for mol in model.peptide_chains: 641 # Only read the required molecule. 642 if read_mol and mol_index+1 not in read_mol: 643 mol_index = mol_index + 1 644 continue 645 646 mol.mol_type = 'protein' 647 mol_conts[-1].append(MolContainer()) 648 mol_conts[-1][-1].data = mol 649 mol_conts[-1][-1].mol_type = 'protein' 650 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 651 mol_index = mol_index + 1 652 653 # Then the nucleotide chains (generating the molecule names and incrementing the molecule index). 654 if hasattr(model, 'nucleotide_chains'): 655 for mol in model.nucleotide_chains: 656 # Only read the required molecule. 657 if read_mol and mol_index+1 not in read_mol: 658 mol_index = mol_index + 1 659 continue 660 661 mol_conts[-1].append(MolContainer()) 662 mol_conts[-1][-1].data = mol 663 mol_conts[-1][-1].mol_type = 'nucleic acid' 664 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 665 mol_index = mol_index + 1 666 667 # Finally all other molecules (generating the molecule names and incrementing the molecule index). 668 if hasattr(model, 'molecules'): 669 for key in list(model.molecules.keys()): 670 # Only read the required molecule. 671 if read_mol and mol_index+1 not in read_mol: 672 mol_index = mol_index + 1 673 continue 674 675 # Add an empty list-type container. 676 mol_conts[-1].append(MolContainer()) 677 mol_conts[-1][-1].mol_type = 'other' 678 679 # Loop over the molecules. 680 mol_conts[-1][-1].data = [] 681 for mol in model.molecules[key]: 682 mol_conts[-1][-1].data.append(mol) 683 684 # Check. 685 if set_mol_name and mol_index >= len(set_mol_name): 686 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name)) 687 688 # Update structures. 689 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 690 mol_index = mol_index + 1 691 692 # Increment the model counters. 693 model_num = model_num + 1 694 model_load_num = model_load_num + 1 695 696 # Create the structural data data structures. 697 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=list(range(1, len(mol_conts[0])+1)), set_mol_name=new_mol_name, file_name=file, file_path=path) 698 699 # Loading worked. 700 return True

Source Code for Module generic_fns.structure.scientific