generic_fns.structure.scientific

49 """The Scientific Python specific data object.""" 50 51 # Identification string. 52 id = 'scientific' 53

54 - def __init__(self):

55 """Initialise the class.""" 56 57 # Execute the base class __init__() method. 58 Base_struct_API.__init__(self)

59 60

61 - def __find_bonded_atom(self, attached_atom, mol_type, res):

62 """Find the atom named attached_atom directly bonded to the desired atom. 63 64 @param attached_atom: The name of the attached atom to return. 65 @type attached_atom: str 66 @param mol_type: The type of the molecule. This can be one of 'protein', 'nucleic acid', 67 or 'other'. 68 @type mol_type: str 69 @param res: The Scientific Python residue object. 70 @type res: Scientific Python residue instance 71 @return: A tuple of information about the bonded atom. 72 @rtype: tuple consisting of the atom number (int), atom name (str), element 73 name (str), and atomic position (Numeric array of len 3) 74 """ 75 76 # Init. 77 bonded_found = False 78 79 # The find the attached atom in the residue (FIXME). 80 matching_list = [] 81 for atom in list(res.atoms.keys()): 82 if relax_re.search(atom, attached_atom): 83 matching_list.append(atom) 84 num_attached = len(matching_list) 85 86 # Problem. 87 if num_attached > 1: 88 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_list) 89 90 # No attached atoms. 91 if num_attached == 0: 92 return None, None, None, None, None, "No attached atom could be found" 93 94 # The bonded atom object. 95 bonded = res[attached_atom] 96 97 # The bonded atom info. 98 bonded_num = bonded.properties['serial_number'] 99 bonded_name = bonded.name 100 element = bonded.properties['element'] 101 pos = bonded.position.array 102 attached_name = matching_list[0] 103 104 # Return the information. 105 return bonded_num, bonded_name, element, pos, attached_name, None

106 107

108 - def __residue_loop(self, mol, sel_obj=None):

109 """Generator function for looping over all residues in the Scientific PDB data objects. 110 111 @param mol: The individual molecule Scientific Python PDB data object. 112 @type mol: Scientific Python PDB object 113 @keyword sel_obj: The selection object. 114 @type sel_obj: instance of generic_fns.mol_res_spin.Selection 115 @return: A tuple of the Scientific Python PDB object representing a single 116 residue, the residue number, and residue name. 117 @rtype: (Scientific Python PDB object, str, str) 118 """ 119 120 # Proteins, RNA, and DNA. 121 if mol.mol_type != 'other': 122 # Loop over the residues of the protein in the PDB file. 123 res_index = -1 124 for res in mol.data.residues: 125 # Residue number and name. 126 if mol.mol_type == 'nucleic acid': 127 res_name = res.name[-1] 128 else: 129 res_name = res.name 130 res_num = res.number 131 132 # Residue index. 133 res_index = res_index + 1 134 135 # Skip non-matching residues. 136 if sel_obj and not sel_obj.contains_res(res_num, res_name, mol.mol_name): 137 continue 138 139 # Yield the residue info. 140 yield res, res_num, res_name, res_index 141 142 # Other molecules. 143 else: 144 res_index = -1 145 for res in mol.data: 146 # Residue index. 147 res_index = res_index + 1 148 149 # Skip non-matching residues. 150 if sel_obj and not sel_obj.contains_res(res.number, res.name, mol.mol_name): 151 continue 152 153 # Yield the residue info. 154 yield res, res.number, res.name, res_index

155 156

157 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, ave=False):

158 """Generator function for looping over all atoms in the Scientific Python data objects. 159 160 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms 161 matching this selection will be yielded. 162 @type atom_id: str 163 @keyword str_id: The structure identifier. This can be the file name, model 164 number, or structure number. If None, then all structures will 165 be looped over. 166 @type str_id: str, int, or None 167 @keyword model_num: Only loop over a specific model. 168 @type model_num: int or None 169 @keyword model_num_flag: A flag which if True will cause the model number to be yielded. 170 @type model_num_flag: bool 171 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 172 @type mol_name_flag: bool 173 @keyword res_num_flag: A flag which if True will cause the residue number to be 174 yielded. 175 @type res_num_flag: bool 176 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 177 @type res_name_flag: bool 178 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 179 @type atom_num_flag: bool 180 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 181 @type atom_name_flag: bool 182 @keyword element_flag: A flag which if True will cause the element name to be yielded. 183 @type element_flag: bool 184 @keyword pos_flag: A flag which if True will cause the atomic position to be 185 yielded. 186 @type pos_flag: bool 187 average atom properties across all loaded structures. 188 @type ave: bool 189 @return: A tuple of atomic information, as described in the docstring. 190 @rtype: tuple consisting of optional molecule name (str), residue number 191 (int), residue name (str), atom number (int), atom name(str), 192 element name (str), and atomic position (array of len 3). 193 """ 194 195 # Generate the selection object. 196 sel_obj = Selection(atom_id) 197 198 # Model loop. 199 for model in self.model_loop(): 200 # Loop over the molecules. 201 for mol_index in range(len(model.mol)): 202 mol = model.mol[mol_index] 203 204 # Skip non-matching molecules. 205 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 206 continue 207 208 # Loop over the residues. 209 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 210 # Loop over the atoms of the residue. 211 atom_index = -1 212 for atom in res: 213 # Atom number, name, index, and element type. 214 atom_num = atom.properties['serial_number'] 215 atom_name = atom.name 216 element = atom.properties['element'] 217 atom_index = atom_index + 1 218 219 # Skip non-matching atoms. 220 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 221 continue 222 223 # The atom position. 224 if ave: 225 pos = self.__ave_atom_pos(mol_index, res_index, atom_index) 226 else: 227 pos = atom.position.array 228 229 # The molecule name. 230 mol_name = mol.mol_name 231 232 # Replace empty variables with None. 233 if not mol.mol_name: 234 mol_name = None 235 if not res_num: 236 res_num = None 237 if not res_name: 238 res_name = None 239 if not atom_num: 240 atom_num = None 241 if not atom_name: 242 atom_name = None 243 244 # Build the tuple to be yielded. 245 atomic_tuple = () 246 if model_num_flag: 247 if ave: 248 atomic_tuple = atomic_tuple + (None,) 249 else: 250 atomic_tuple = atomic_tuple + (model.num,) 251 if mol_name_flag: 252 atomic_tuple = atomic_tuple + (mol_name,) 253 if res_num_flag: 254 atomic_tuple = atomic_tuple + (res_num,) 255 if res_name_flag: 256 atomic_tuple = atomic_tuple + (res_name,) 257 if atom_num_flag: 258 atomic_tuple = atomic_tuple + (atom_num,) 259 if atom_name_flag: 260 atomic_tuple = atomic_tuple + (atom_name,) 261 if element_flag: 262 atomic_tuple = atomic_tuple + (element,) 263 if pos_flag: 264 atomic_tuple = atomic_tuple + (pos,) 265 266 # Yield the information. 267 yield atomic_tuple 268 269 # Break out of the loop if the ave flag is set, as data from only one model is used. 270 if ave: 271 break

272 273

274 - def attached_atom(self, atom_id=None, attached_atom=None, model=None):

275 """Find the atom corresponding to 'attached_atom' which is bonded to the atom of 'atom_id'. 276 277 @keyword atom_id: The molecule, residue, and atom identifier string. This must 278 correspond to a single atom in the system. 279 @type atom_id: str 280 @keyword attached_atom: The name of the attached atom to return. 281 @type attached_atom: str 282 @keyword model: The model to return the positional information from. If not 283 supplied and multiple models exist, then the returned atomic 284 position will be a list of the positions in each model. 285 @type model: None or int 286 @return: A tuple of information about the bonded atom. 287 @rtype: tuple consisting of the atom number (int), atom name (str), element 288 name (str), and atomic positions for each model (list of numpy 289 arrays) 290 """ 291 292 # Generate the selection object. 293 sel_obj = Selection(atom_id) 294 295 # Initialise some objects. 296 bonded_num = None 297 bonded_name = None 298 element = None 299 pos_array = [] 300 301 # Loop over the models. 302 for model in self.model_loop(model): 303 # Init. 304 atom_found = False 305 306 # Loop over each individual molecule. 307 for mol in model.mol: 308 # Loop over the residues of the protein in the PDB file. 309 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 310 # Loop over the atoms of the residue. 311 for atom in res: 312 # Atom number, name, and position. 313 atom_num = atom.properties['serial_number'] 314 atom_name = atom.name 315 316 # Skip non-matching atoms. 317 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 318 continue 319 320 # More than one matching atom! 321 if atom_found: 322 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 323 324 # The atom has been found, so store some info. 325 atom_found = True 326 atom_num_match = atom_num 327 atom_name_match = atom_name 328 mol_type_match = mol.mol_type 329 res_match = res 330 331 # Found the atom. 332 if atom_found: 333 # Find the atom bonded to this model/molecule/residue/atom. 334 bonded_num, bonded_name, element, pos = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 335 336 # Append the position to the position array (converting from a Numeric array to a numpy array). 337 pos_array.append(array(pos, float64)) 338 339 # Return the atom info. 340 return bonded_num, bonded_name, element, pos_array

341 342

343 - def __ave_atom_pos(self, mol_index, res_index, atom_index):

344 """Determine the average atom position across all models. 345 346 @param mol_index: The molecule index. 347 @type mol_index: int 348 @param res_index: The residue index. 349 @type res_index: int 350 @param atom_index: The atom index. 351 @type atom_index: int 352 """ 353 354 # Init. 355 pos = zeros(3, float64) 356 357 # Loop over the models. 358 for model in self.model_loop(): 359 # The exact molecule. 360 mol = model.mol[mol_index] 361 362 # The residue. 363 if mol.mol_type != 'other': 364 res = mol.data.residues[res_index] 365 else: 366 res = mol.data[res_index] 367 368 # The atom. 369 atom = res[atom_index] 370 371 # Sum the position. 372 pos = pos + atom.position.array 373 374 # Average the position array. 375 pos = pos / len(self.structural_data) 376 377 # Return the ave position. 378 return pos

379 380

381 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

382 """Find the bond vectors between the atoms of 'attached_atom' and 'atom_id'. 383 384 @keyword attached_atom: The name of the bonded atom. 385 @type attached_atom: str 386 @keyword model_num: The model of which to return the vectors from. If not supplied 387 and multiple models exist, then vectors from all models will be 388 returned. 389 @type model_num: None or int 390 @keyword mol_name: The name of the molecule that attached_atom belongs to. 391 @type mol_name: str 392 @keyword res_num: The number of the residue that attached_atom belongs to. 393 @type res_num: str 394 @keyword res_name: The name of the residue that attached_atom belongs to. 395 @type res_name: str 396 @keyword spin_num: The number of the spin that attached_atom is attached to. 397 @type spin_num: str 398 @keyword spin_name: The name of the spin that attached_atom is attached to. 399 @type spin_name: str 400 @keyword return_name: A flag which if True will cause the name of the attached atom to 401 be returned together with the bond vectors. 402 @type return_name: bool 403 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 404 @type return_warnings: bool 405 @return: The list of bond vectors for each structure. 406 @rtype: list of numpy arrays 407 """ 408 409 # Generate the selection object. 410 sel_obj = Selection(generate_spin_id(mol_name, res_num, res_name, spin_num, spin_name)) 411 412 # Initialise some objects. 413 vectors = [] 414 attached_name = None 415 warnings = None 416 417 # Loop over the models. 418 for model in self.model_loop(model_num): 419 # Init. 420 atom_found = False 421 422 # Loop over each individual molecule. 423 for mol in model.mol: 424 # Loop over the residues of the protein in the PDB file. 425 for res, res_num, res_name, res_index in self.__residue_loop(mol, sel_obj): 426 # Loop over the atoms of the residue. 427 for atom in res: 428 # Atom number, name, and position. 429 atom_num = atom.properties['serial_number'] 430 atom_name = atom.name 431 432 # Skip non-matching atoms. 433 if sel_obj and not sel_obj.contains_spin(atom_num, atom_name, res_num, res_name, mol.mol_name): 434 continue 435 436 # More than one matching atom! 437 if atom_found: 438 raise RelaxError("The atom_id argument " + repr(atom_id) + " must correspond to a single atom.") 439 440 # The atom has been found, so store some info. 441 atom_found = True 442 pos_match = atom.position.array 443 mol_type_match = mol.mol_type 444 res_match = res 445 446 # Found the atom. 447 if atom_found: 448 # Find the atom bonded to this structure/molecule/residue/atom. 449 bonded_num, bonded_name, element, pos, attached_name, warnings = self.__find_bonded_atom(attached_atom, mol_type_match, res_match) 450 451 # No bonded atom. 452 if (bonded_num, bonded_name, element) == (None, None, None): 453 continue 454 455 # The bond vector. 456 vector = pos - pos_match 457 458 # Append the vector to the vectors array (converting from a Numeric array to a numpy array). 459 vectors.append(array(vector, float64)) 460 461 # Build the tuple to be yielded. 462 data = (vectors,) 463 if return_name: 464 data = data + (attached_name,) 465 if return_warnings: 466 data = data + (warnings,) 467 468 # Return the data. 469 return data

470 471

472 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

473 """Function for loading the structures from the PDB file. 474 475 @param file_path: The full path of the file. 476 @type file_path: str 477 @keyword read_mol: The molecule(s) to read from the file, independent of model. The 478 molecules are determined differently by the different parsers, but 479 are numbered consecutively from 1. If set to None, then all 480 molecules will be loaded. 481 @type read_mol: None, int, or list of int 482 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, 483 then the molecules will be automatically labelled based on the file 484 name or other information. 485 @type set_mol_name: None, str, or list of str 486 @keyword read_model: The PDB model to extract from the file. If set to None, then all 487 models will be loaded. 488 @type read_model: None, int, or list of int 489 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then 490 the PDB model numbers will be preserved, if they exist. 491 @type set_model_num: None, int, or list of int 492 @keyword verbosity: A flag which if True will cause messages to be printed. 493 @type verbosity: bool 494 @return: The status of the loading of the PDB file. 495 @rtype: bool 496 """ 497 498 # Initial print out. 499 if verbosity: 500 print("\nScientific Python PDB parser.") 501 502 # Test if the file exists. 503 if not access(file_path, F_OK): 504 # Exit indicating failure. 505 return False 506 507 # Separate the file name and path. 508 path, file = os.path.split(file_path) 509 510 # Convert the structure reading args into lists. 511 if read_mol and not isinstance(read_mol, list): 512 read_mol = [read_mol] 513 if set_mol_name and not isinstance(set_mol_name, list): 514 set_mol_name = [set_mol_name] 515 if read_model and not isinstance(read_model, list): 516 read_model = [read_model] 517 if set_model_num and not isinstance(set_model_num, list): 518 set_model_num = [set_model_num] 519 520 # Load all models. 521 model_flag = True 522 model_num = 1 523 model_load_num = 1 524 orig_model_num = [] 525 mol_conts = [] 526 while True: 527 # Only load the desired model. 528 if read_model: 529 # No more models to read. 530 if model_num > max(read_model): 531 break 532 533 # Skip the model if not in the list. 534 if model_num not in read_model: 535 # Increment the model counter. 536 model_num = model_num + 1 537 538 # Jump to the next model. 539 continue 540 541 # Load the PDB file. 542 model = PDB.Structure(file_path, model_num) 543 544 # No model 1. 545 if not len(model) and not len(mol_conts): 546 # Load the PDB without a model number. 547 model = PDB.Structure(file_path) 548 model_flag = False 549 550 # Ok, nothing is loadable from this file. 551 if not len(model): 552 raise RelaxPdbLoadError(file_path) 553 554 # Test if the last structure has been reached. 555 if not len(model): 556 del model 557 break 558 559 # Append an empty list to the molecule container structure for a new model, set the molecule index, and initialise the target name structure. 560 mol_conts.append([]) 561 mol_index = 0 562 new_mol_name = [] 563 564 # Set the target molecule number offset (if molecules already exist). 565 mol_offset = 0 566 for i in range(len(self.structural_data)): 567 model_index = model_load_num - 1 568 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[i].num): 569 mol_offset = len(self.structural_data[i].mol) 570 571 # Store the original model number. 572 orig_model_num.append(model_num) 573 574 # Print the PDB info. 575 if verbosity: 576 print(model) 577 578 # First add the peptide chains (generating the molecule names and incrementing the molecule index). 579 if hasattr(model, 'peptide_chains'): 580 for mol in model.peptide_chains: 581 # Only read the required molecule. 582 if read_mol and mol_index+1 not in read_mol: 583 mol_index = mol_index + 1 584 continue 585 586 mol.mol_type = 'protein' 587 mol_conts[-1].append(MolContainer()) 588 mol_conts[-1][-1].data = mol 589 mol_conts[-1][-1].mol_type = 'protein' 590 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 591 mol_index = mol_index + 1 592 593 # Then the nucleotide chains (generating the molecule names and incrementing the molecule index). 594 if hasattr(model, 'nucleotide_chains'): 595 for mol in model.nucleotide_chains: 596 # Only read the required molecule. 597 if read_mol and mol_index+1 not in read_mol: 598 mol_index = mol_index + 1 599 continue 600 601 mol_conts[-1].append(MolContainer()) 602 mol_conts[-1][-1].data = mol 603 mol_conts[-1][-1].mol_type = 'nucleic acid' 604 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 605 mol_index = mol_index + 1 606 607 # Finally all other molecules (generating the molecule names and incrementing the molecule index). 608 if hasattr(model, 'molecules'): 609 for key in list(model.molecules.keys()): 610 # Only read the required molecule. 611 if read_mol and mol_index+1 not in read_mol: 612 mol_index = mol_index + 1 613 continue 614 615 # Add an empty list-type container. 616 mol_conts[-1].append(MolContainer()) 617 mol_conts[-1][-1].mol_type = 'other' 618 619 # Loop over the molecules. 620 mol_conts[-1][-1].data = [] 621 for mol in model.molecules[key]: 622 mol_conts[-1][-1].data.append(mol) 623 624 # Check. 625 if set_mol_name and mol_index >= len(set_mol_name): 626 raise RelaxError("The %s molecules read exceeds the number of molecule names supplied in %s." % (mol_index+1, set_mol_name)) 627 628 # Update structures. 629 self.target_mol_name(set=set_mol_name, target=new_mol_name, index=mol_index, mol_num=mol_index+1+mol_offset, file=file) 630 mol_index = mol_index + 1 631 632 # Increment the model counters. 633 model_num = model_num + 1 634 model_load_num = model_load_num + 1 635 636 # Create the structural data data structures. 637 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=range(1, len(mol_conts[0])+1), set_mol_name=new_mol_name, file_name=file, file_path=path) 638 639 # Loading worked. 640 return True

Source Code for Module generic_fns.structure.scientific