Package generic_fns :: Package structure :: Module internal
[hide private]
[frames] | no frames]

Source Code for Module generic_fns.structure.internal

   1  ############################################################################### 
   2  #                                                                             # 
   3  # Copyright (C) 2003-2013 Edward d'Auvergne                                   # 
   4  #                                                                             # 
   5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
   6  #                                                                             # 
   7  # This program is free software: you can redistribute it and/or modify        # 
   8  # it under the terms of the GNU General Public License as published by        # 
   9  # the Free Software Foundation, either version 3 of the License, or           # 
  10  # (at your option) any later version.                                         # 
  11  #                                                                             # 
  12  # This program is distributed in the hope that it will be useful,             # 
  13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
  14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
  15  # GNU General Public License for more details.                                # 
  16  #                                                                             # 
  17  # You should have received a copy of the GNU General Public License           # 
  18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
  19  #                                                                             # 
  20  ############################################################################### 
  21   
  22  # Module docstring. 
  23  """Module containing the internal relax structural object.""" 
  24   
  25  # Python module imports. 
  26  from copy import deepcopy 
  27  from numpy import array, dot, float64, linalg, zeros 
  28  import os 
  29  from os import F_OK, access 
  30  from string import digits, ascii_uppercase 
  31  from warnings import warn 
  32   
  33  # relax module imports. 
  34  from check_types import is_float 
  35  from data.relax_xml import fill_object_contents, xml_to_object 
  36  from generic_fns import pipes, relax_re 
  37  from generic_fns.mol_res_spin import spin_loop 
  38  from generic_fns.mol_res_spin import Selection 
  39  from generic_fns.structure import pdb_read, pdb_write 
  40  from generic_fns.structure.api_base import Base_struct_API, ModelList, Displacements 
  41  from relax_errors import RelaxError, RelaxNoneIntError, RelaxNoPdbError 
  42  from relax_io import file_root, open_read_file 
  43  from relax_warnings import RelaxWarning 
  44   
  45   
  46   
47 -class Internal(Base_struct_API):
48 """The internal relax structural data object. 49 50 The structural data object for this class is a container possessing a number of different arrays 51 corresponding to different structural information. These objects are described in the 52 structural container docstring. 53 """ 54 55 # Identification string. 56 id = 'internal' 57 58
59 - def _bonded_atom(self, attached_atom, index, mol):
60 """Find the atom named attached_atom directly bonded to the atom located at the index. 61 62 @param attached_atom: The name of the attached atom to return. 63 @type attached_atom: str 64 @param index: The index of the atom which the attached atom is attached to. 65 @type index: int 66 @param mol: The molecule container. 67 @type mol: MolContainer instance 68 @return: A tuple of information about the bonded atom. 69 @rtype: tuple consisting of the atom number (int), atom name (str), element 70 name (str), and atomic position (Numeric array of len 3) 71 """ 72 73 # Init. 74 bonded_found = False 75 76 # No bonded atoms, so determine the connectivities. 77 if not mol.bonded[index]: 78 # Determine the molecule type if needed. 79 if not hasattr(mol, 'type'): 80 self._mol_type(mol) 81 82 # Protein. 83 if mol.type == 'protein': 84 self._protein_connect(mol) 85 86 # Find everything within 2 Angstroms and say they are bonded. 87 else: 88 self._find_bonded_atoms(index, mol, radius=2) 89 90 # Loop over the bonded atoms. 91 matching_list = [] 92 for bonded_index in mol.bonded[index]: 93 if relax_re.search(mol.atom_name[bonded_index], attached_atom): 94 matching_list.append(bonded_index) 95 num_attached = len(matching_list) 96 97 # Problem. 98 if num_attached > 1: 99 # Get the atom names. 100 matching_names = [] 101 for i in matching_list: 102 matching_names.append(mol.atom_name[i]) 103 104 # Return nothing but a warning. 105 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 106 107 # No attached atoms. 108 if num_attached == 0: 109 if relax_re.search('@*', attached_atom): 110 matching_list = [] 111 bonded_num=[] 112 bonded_name=[] 113 element=[] 114 pos=[] 115 for spin, mol_name, res_num, res_name in spin_loop(selection=attached_atom, full_info=True): 116 bonded_num.append(spin.num) 117 bonded_name.append(spin.name) 118 element.append(spin.element) 119 pos.append(spin.pos) 120 if len(bonded_num) == 1: 121 return bonded_num[0], bonded_name[0], element[0], pos[0], attached_atom, None 122 elif len(bonded_num) > 1: 123 # Return nothing but a warning. 124 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 125 elif len(bonded_num) > 1: 126 # Return nothing but a warning. 127 return None, None, None, None, None, "No attached atom could be found" 128 else: 129 return None, None, None, None, None, "No attached atom could be found" 130 131 # The bonded atom info. 132 index = matching_list[0] 133 bonded_num = mol.atom_num[index] 134 bonded_name = mol.atom_name[index] 135 element = mol.element[index] 136 pos = [mol.x[index], mol.y[index], mol.z[index]] 137 attached_name = mol.atom_name[index] 138 139 # Return the information. 140 return bonded_num, bonded_name, element, pos, attached_name, None
141 142
143 - def _find_bonded_atoms(self, index, mol, radius=1.2):
144 """Find all atoms within a sphere and say that they are attached to the central atom. 145 146 The found atoms will be added to the 'bonded' data structure. 147 148 149 @param index: The index of the central atom. 150 @type index: int 151 @param mol: The molecule container. 152 @type mol: MolContainer instance 153 """ 154 155 # Central atom info. 156 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64) 157 158 # Atom loop. 159 dist_list = [] 160 connect_list = {} 161 element_list = {} 162 for i in range(len(mol.atom_num)): 163 # Skip proton to proton bonds! 164 if mol.element[index] == 'H' and mol.element[i] == 'H': 165 continue 166 167 # The atom's position. 168 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 169 170 # The distance from the centre. 171 dist = linalg.norm(centre-pos) 172 173 # The atom is within the radius. 174 if dist < radius: 175 # Store the distance. 176 dist_list.append(dist) 177 178 # Store the atom index. 179 connect_list[dist] = i 180 181 # Store the element type. 182 element_list[dist] = mol.element[i] 183 184 # The maximum number of allowed covalent bonds. 185 max_conn = 1000 # Ridiculous default! 186 if mol.element[index] == 'H': 187 max_conn = 1 188 elif mol.element[index] == 'O': 189 max_conn = 2 190 elif mol.element[index] == 'N': 191 max_conn = 3 192 elif mol.element[index] == 'C': 193 max_conn = 4 194 195 # Sort. 196 dist_list.sort() 197 198 # Loop over the max number of connections (or the number of connected atoms, if less). 199 for i in range(min(max_conn, len(dist_list))): 200 mol.atom_connect(index, connect_list[dist_list[i]])
201 202
203 - def _get_chemical_name(self, hetID):
204 """Return the chemical name corresponding to the given residue ID. 205 206 The following names are currently returned:: 207 ________________________________________________ 208 | | | 209 | hetID | Chemical name | 210 |________|_____________________________________| 211 | | | 212 | TNS | Tensor | 213 | COM | Centre of mass | 214 | AXS | Tensor axes | 215 | SIM | Monte Carlo simulation tensor axes | 216 | PIV | Pivot point | 217 | CON | Cone object | 218 | AVE | Average vector | 219 |________|_____________________________________| 220 221 For any other residues, no description is returned. 222 223 @param hetID: The residue ID. 224 @type hetID: str 225 @return: The chemical name. 226 @rtype: str or None 227 """ 228 229 # Tensor. 230 if hetID == 'TNS': 231 return 'Tensor' 232 233 # Centre of mass. 234 if hetID == 'COM': 235 return 'Centre of mass' 236 237 # Tensor axes. 238 if hetID == 'AXS': 239 return 'Tensor axes' 240 241 # Monte Carlo simulation tensor axes. 242 if hetID == 'SIM': 243 return 'Monte Carlo simulation tensor axes' 244 245 # Pivot point. 246 if hetID == 'PIV': 247 return 'Pivot point' 248 249 # Cone object. 250 if hetID == 'CON': 251 return 'Cone' 252 253 # Average vector. 254 if hetID == 'AVE': 255 return 'Average vector'
256 257
258 - def _parse_pdb_connectivity_annotation(self, lines):
259 """Loop over and parse the PDB connectivity annotation records. 260 261 These are the records identified in the U{PDB version 3.30 documentation<http://www.wwpdb.org/documentation/format33/sect6.html>}. 262 263 264 @param lines: The lines of the PDB file excluding the sections prior to the connectivity annotation section. 265 @type lines: list of str 266 @return: The remaining PDB lines with the connectivity annotation records stripped. 267 @rtype: list of str 268 """ 269 270 # The ordered list of record names in the connectivity annotation section. 271 records = [ 272 'SSBOND', 273 'LINK ', 274 'CISPEP' 275 ] 276 277 # Loop over the lines. 278 for i in range(len(lines)): 279 # No match, therefore assume to be out of the connectivity annotation section. 280 if lines[i][:6] not in records: 281 break 282 283 # Return the remaining lines. 284 return lines[i:]
285 286
287 - def _parse_pdb_coord(self, lines):
288 """Generator function for looping over the models in the PDB file. 289 290 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect9.html}. 291 292 293 @param lines: The lines of the coordinate section. 294 @type lines: list of str 295 @return: The model number and all the records for that model. 296 @rtype: tuple of int and array of str 297 """ 298 299 # Init. 300 model = None 301 records = [] 302 303 # Loop over the data. 304 for i in range(len(lines)): 305 # A new model record. 306 if lines[i][:5] == 'MODEL': 307 try: 308 model = int(lines[i].split()[1]) 309 except: 310 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.") 311 312 # Skip all records prior to the first ATOM or HETATM record. 313 if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and not len(records): 314 continue 315 316 # End of the model. 317 if lines[i][:6] == 'ENDMDL': 318 # Yield the info. 319 yield model, records 320 321 # Reset the records. 322 records = [] 323 324 # Skip the rest of this loop. 325 continue 326 327 # Append the line as a record of the model. 328 records.append(lines[i]) 329 330 # If records is not empty then there are no models, so yield the lot. 331 if len(records): 332 yield model, records
333 334
335 - def _parse_pdb_hetrogen(self, lines):
336 """Loop over and parse the PDB hetrogen records. 337 338 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect4.html}. 339 340 341 @param lines: The lines of the PDB file excluding the sections prior to the hetrogen section. 342 @type lines: list of str 343 @return: The remaining PDB lines with the hetrogen records stripped. 344 @rtype: list of str 345 """ 346 347 # The ordered list of record names in the hetrogen section. 348 records = [ 349 'HET ', 350 'FORMUL', 351 'HETNAM', 352 'HETSYN' 353 ] 354 355 # Loop over the lines. 356 for i in range(len(lines)): 357 # No match, therefore assume to be out of the hetrogen section. 358 if lines[i][:6] not in records: 359 break 360 361 # Return the remaining lines. 362 return lines[i:]
363 364
365 - def _parse_pdb_misc(self, lines):
366 """Loop over and parse the PDB miscellaneous records. 367 368 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect7.html}. 369 370 371 @param lines: The lines of the PDB file excluding the sections prior to the miscellaneous section. 372 @type lines: list of str 373 @return: The remaining PDB lines with the miscellaneous records stripped. 374 @rtype: list of str 375 """ 376 377 # The ordered list of record names in the miscellaneous section. 378 records = [ 379 'SITE ' 380 ] 381 382 # Loop over the lines. 383 for i in range(len(lines)): 384 # No match, therefore assume to be out of the miscellaneous section. 385 if lines[i][:6] not in records: 386 break 387 388 # Return the remaining lines. 389 return lines[i:]
390 391
392 - def _parse_pdb_prim_struct(self, lines):
393 """Loop over and parse the PDB primary structure records. 394 395 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect3.html}. 396 397 398 @param lines: The lines of the PDB file excluding the title section. 399 @type lines: list of str 400 @return: The remaining PDB lines with the primary structure records stripped. 401 @rtype: list of str 402 """ 403 404 # The ordered list of record names in the primary structure section. 405 records = [ 406 'DBREF ', 407 'DBREF1', 408 'DBREF2', 409 'SEQADV', 410 'SEQRES', 411 'MODRES' 412 ] 413 414 # Loop over the lines. 415 for i in range(len(lines)): 416 # No match, therefore assume to be out of the primary structure section. 417 if lines[i][:6] not in records: 418 break 419 420 # Return the remaining lines. 421 return lines[i:]
422 423
424 - def _parse_pdb_ss(self, lines):
425 """Loop over and parse the PDB secondary structure records. 426 427 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect5.html}. 428 429 430 @param lines: The lines of the PDB file excluding the sections prior to the secondary structure section. 431 @type lines: list of str 432 @return: The remaining PDB lines with the secondary structure records stripped. 433 @rtype: list of str 434 """ 435 436 # The ordered list of record names in the secondary structure section (the depreciated TURN record is also included to handle old PDB files). 437 records = [ 438 'HELIX ', 439 'SHEET ', 440 'TURN ' 441 ] 442 443 # Loop over the lines. 444 for i in range(len(lines)): 445 # No match, therefore assume to be out of the secondary structure section. 446 if lines[i][:6] not in records: 447 break 448 449 # A helix. 450 if lines[i][:5] == 'HELIX': 451 # Parse the record. 452 record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) 453 454 # Store the data. 455 if not hasattr(self, 'helices'): 456 self.helices = [] 457 self.helices.append([helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length]) 458 459 # A sheet. 460 if lines[i][:5] == 'SHEET': 461 # Parse the record. 462 record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) 463 464 # Store the data. 465 if not hasattr(self, 'sheets'): 466 self.sheets = [] 467 self.sheets.append([strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode]) 468 469 # Return the remaining lines. 470 return lines[i:]
471 472
473 - def _parse_pdb_title(self, lines):
474 """Loop over and parse the PDB title records. 475 476 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect2.html}. 477 478 479 @param lines: All lines of the PDB file. 480 @type lines: list of str 481 @return: The remaining PDB lines with the title records stripped. 482 @rtype: list of str 483 """ 484 485 # The ordered list of (sometimes truncated) record names in the title section. 486 records = [ 487 'HEADER', 488 'OBSLTE', 489 'TITLE ', 490 'SPLT ', 491 'CAVEAT', 492 'COMPND', 493 'SOURCE', 494 'KEYWDS', 495 'EXPDTA', 496 'NUMMDL', 497 'MDLTYP', 498 'AUTHOR', 499 'REVDAT', 500 'SPRSDE', 501 'JRNL ', 502 'REMARK' 503 ] 504 505 # Loop over the lines. 506 for i in range(len(lines)): 507 # No match, therefore assume to be out of the title section. 508 if lines[i][:6] not in records: 509 break 510 511 # Return the remaining lines. 512 return lines[i:]
513 514
515 - def _parse_pdb_transform(self, lines):
516 """Loop over and parse the PDB transform records. 517 518 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect8.html}. 519 520 521 @param lines: The lines of the PDB file excluding the sections prior to the transform section. 522 @type lines: list of str 523 @return: The remaining PDB lines with the transform records stripped. 524 @rtype: list of str 525 """ 526 527 # The ordered list of record names in the transform section. 528 records = [ 529 'CRYST', 530 'MTRIX', 531 'ORIGX', 532 'SCALE', 533 ] 534 535 # Loop over the lines. 536 for i in range(len(lines)): 537 # No match, therefore assume to be out of the transform section. 538 if lines[i][0: 5] not in records: 539 break 540 541 # Return the remaining lines. 542 return lines[i:]
543 544
545 - def _parse_models_xyz(self, file_path):
546 """Generator function for looping over the models in the XYZ file. 547 548 @param file_path: The full path of the XYZ file. 549 @type file_path: str 550 @return: The model number and all the records for that model. 551 @rtype: tuple of int and array of str 552 """ 553 554 # Open the file. 555 file = open_read_file(file_path) 556 lines = file.readlines() 557 file.close() 558 559 # Check for empty files. 560 if lines == []: 561 raise RelaxError("The XYZ file is empty.") 562 563 # Init. 564 total_atom = 0 565 model = 0 566 records = [] 567 568 # Loop over the data. 569 for i in range(len(lines)): 570 num=0 571 word = lines[i].split() 572 # Find the total atom number and the first model. 573 if (i==0) and (len(word)==1): 574 try: 575 total_atom = int(word[0]) 576 num = 1 577 except: 578 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.") 579 580 # End of the model. 581 if (len(records) == total_atom): 582 # Yield the info 583 yield records 584 585 # Reset the records. 586 records = [] 587 588 # Skip all records prior to atom coordinates record. 589 if (len(word) != 4): 590 continue 591 592 # Append the line as a record of the model. 593 records.append(lines[i]) 594 595 # If records is not empty then there are no models, so yield the lot. 596 if len(records): 597 yield records
598 599
600 - def _parse_mols(self, records):
601 """Generator function for looping over the molecules in the PDB records of a model. 602 603 @param records: The list of PDB records for the model, or if no models exist the entire 604 PDB file. 605 @type records: list of str 606 @return: The molecule number and all the records for that molecule. 607 @rtype: tuple of int and list of str 608 """ 609 610 # Check for empty records. 611 if records == []: 612 raise RelaxError("There are no PDB records for this model.") 613 614 # Init. 615 mol_num = 1 616 mol_records = [] 617 end = False 618 619 # Loop over the data. 620 for i in range(len(records)): 621 # A PDB termination record. 622 if records[i][:3] == 'END': 623 break 624 625 # A master record, so we are done. 626 if records[i][:6] == 'MASTER': 627 break 628 629 # A model termination record. 630 if records[i][:6] == 'ENDMDL': 631 end = True 632 633 # A molecule termination record with no trailing HETATM. 634 elif i < len(records)-1 and records[i][:3] == 'TER' and not records[i+1][:6] == 'HETATM': 635 end = True 636 637 # A HETATM followed by an ATOM record. 638 elif i < len(records)-1 and records[i][:6] == 'HETATM' and records[i+1][:4] == 'ATOM': 639 end = True 640 641 # End. 642 if end: 643 # Yield the info. 644 yield mol_num, mol_records 645 646 # Reset the records. 647 mol_records = [] 648 649 # Increment the molecule number. 650 mol_num = mol_num + 1 651 652 # Reset the flag. 653 end = False 654 655 # Skip the rest of this loop. 656 continue 657 658 # Append the line as a record of the molecule. 659 mol_records.append(records[i]) 660 661 # If records is not empty then there is only a single molecule, so yield the lot. 662 if len(mol_records): 663 yield mol_num, mol_records
664 665
666 - def _pdb_chain_id_to_mol_index(self, chain_id=None):
667 """Convert the PDB chain ID into the molecule index in a regular way. 668 669 @keyword chain_id: The PDB chain ID string. 670 @type chain_id: str 671 @return: The corresponding molecule index. 672 @rtype: int 673 """ 674 675 # Initialise. 676 mol_index = 0 677 678 # Convert to the molecule index. 679 if chain_id: 680 mol_index = ascii_uppercase.index(chain_id) 681 682 # Return the index. 683 return mol_index
684 685
686 - def _residue_data(self, res_nums=None, res_names=None):
687 """Convert the residue info into a dictionary of unique residues with numbers as keys. 688 689 @keyword res_nums: The list of residue numbers. 690 @type res_nums: list of int 691 @keyword res_names: The list of residue names matching the numbers. 692 @type res_names: list of str 693 @return: The dictionary of residue names with residue numbers as keys. 694 @rtype: dict of str 695 """ 696 697 # Initialise. 698 data = {} 699 700 # Loop over the data. 701 for i in range(len(res_nums)): 702 # The residue data already exists. 703 if res_nums[i] in data: 704 continue 705 706 # Add the data. 707 data[res_nums[i]] = res_names[i] 708 709 # Return the dictionary. 710 return data
711 712
713 - def _validate_data_arrays(self, struct):
714 """Check the validity of the data arrays in the given structure object. 715 716 @param struct: The structural object. 717 @type struct: Structure_container instance 718 """ 719 720 # The number of atoms. 721 num = len(struct.atom_name) 722 723 # Check the other lengths. 724 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num: 725 raise RelaxError("The structural data is invalid.")
726 727
728 - def _mol_type(self, mol):
729 """Determine the type of molecule. 730 731 @param mol: The molecule data container. 732 @type mol: MolContainer instance 733 """ 734 735 # Amino acids. 736 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 737 738 # Set the molecule type to default to 'other'. 739 mol.type = 'other' 740 741 # Loop over the residues. 742 for res in mol.res_name: 743 # Protein. 744 if res in aa: 745 # Set the molecule type and return. 746 mol.type = 'protein' 747 return
748 749
750 - def _protein_connect(self, mol):
751 """Set up the connectivities for the protein. 752 753 @param mol: The molecule data container. 754 @type mol: MolContainer instance 755 """ 756 757 # Initialise some residue data. 758 curr_res_num = None 759 res_atoms = [] 760 761 # Loop over all atoms. 762 for i in range(len(mol.atom_num)): 763 # New residue. 764 if mol.res_num[i] != curr_res_num: 765 # Intra-residue connectivites. 766 if len(res_atoms): 767 self._protein_intra_connect(mol, res_atoms) 768 769 # Update the residue number. 770 curr_res_num = mol.res_num[i] 771 772 # Reset the residue atom index list. 773 res_atoms = [] 774 775 # Add the atom index to the list. 776 res_atoms.append(i) 777 778 # Last atom. 779 if i == len(mol.atom_num) - 1 and len(res_atoms): 780 self._protein_intra_connect(mol, res_atoms)
781 782
783 - def _protein_intra_connect(self, mol, res_atoms):
784 """Set up the connectivities for the protein. 785 786 @param mol: The molecule data container. 787 @type mol: MolContainer instance 788 @param res_atoms: The list of atom indices corresponding to the residue. 789 @type res_atoms: list of int 790 """ 791 792 # Back bond connectivity. 793 indices = { 794 'N': None, 795 'C': None, 796 'O': None, 797 'CA': None, 798 'HN': None, 799 'H': None, # Same as HN. 800 'HA': None 801 } 802 803 # Loop over all atoms to find the indices. 804 for index in res_atoms: 805 if mol.atom_name[index] in indices: 806 indices[mol.atom_name[index]] = index 807 808 # Connect the atom pairs. 809 pairs = [ 810 ['N', 'HN'], 811 ['N', 'H'], 812 ['N', 'CA'], 813 ['CA', 'HA'], 814 ['CA', 'C'], 815 ['C', 'O'] 816 ] 817 818 # Loop over the atoms pairs and connect them. 819 for pair in pairs: 820 if indices[pair[0]] != None and indices[pair[1]] != None: 821 mol.atom_connect(indices[pair[0]], indices[pair[1]])
822 823
824 - def _translate(self, data, format='str'):
825 """Convert the data into a format for writing to file. 826 827 @param data: The data to convert to the required format. 828 @type data: anything 829 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'. 830 @type format: str 831 @return: The converted version of the data. 832 @rtype: str 833 """ 834 835 # Conversion to string. 836 if format == 'str': 837 # None values. 838 if data == None: 839 data = '' 840 841 # Force convert to string. 842 if not isinstance(data, str): 843 data = repr(data) 844 845 # Conversion to float. 846 if format == 'float': 847 # None values. 848 if data == None: 849 data = 0.0 850 851 # Force convert to float. 852 if not isinstance(data, float): 853 data = float(data) 854 855 # Return the converted data. 856 return data
857 858
859 - def _trim_helix(self, helix=None, trim_res_list=[], res_data=None):
860 """Trim the given helix based on the list of deleted residue numbers. 861 862 @keyword helix: The single helix metadata structure. 863 @type helix: list 864 @keyword trim_res_list: The list of residue numbers which no longer exist. 865 @type trim_res_list: list of int 866 @keyword res_data: The dictionary of residue names with residue numbers as keys. 867 @type res_data: dict of str 868 @return: The trimmed helix metadata structure, or None if the whole helix is to be deleted. 869 @rtype: list or None 870 """ 871 872 # Unpack the helix residue numbers. 873 start_res = helix[3] 874 end_res = helix[6] 875 876 # The reverse residue list. 877 trim_res_list_rev = deepcopy(trim_res_list) 878 trim_res_list_rev.reverse() 879 880 # The helix residues. 881 helix_res = list(range(start_res, end_res+1)) 882 883 # Trim forwards. 884 for res_num in trim_res_list: 885 if res_num == start_res: 886 # Remove the residue. 887 helix_res.pop(0) 888 889 # No helix left. 890 if len(helix_res) == 0: 891 break 892 893 # Realias the starting residue. 894 start_res = helix_res[0] 895 896 # No helix left. 897 if len(helix_res) == 0: 898 return None 899 900 # Trim backwards. 901 for res_num in trim_res_list_rev: 902 if res_num == end_res: 903 helix_res.pop(-1) 904 end_res = helix_res[-1] 905 906 # Replace the starting and ending residues. 907 if start_res != helix[3]: 908 helix[3] = start_res 909 helix[2] = res_data[start_res] 910 if end_res != helix[6]: 911 helix[6] = end_res 912 helix[5] = res_data[end_res] 913 914 # The helix length. 915 helix[-1] = len(helix_res) 916 917 # Return the modified helix. 918 return helix
919 920
921 - def _trim_sheet(self, sheet=None, trim_res_list=[], res_data=None):
922 """Trim the given sheet based on the list of deleted residue numbers. 923 924 @keyword sheet: The single sheet metadata structure. 925 @type sheet: list 926 @keyword trim_res_list: The list of residue numbers which no longer exist. 927 @type trim_res_list: list of int 928 @keyword res_data: The dictionary of residue names with residue numbers as keys. 929 @type res_data: dict of str 930 @return: The trimmed sheet metadata structure, or None if the whole sheet is to be deleted. 931 @rtype: list or None 932 """ 933 934 # Unpack the sheet residue numbers. 935 start_res = sheet[5] 936 end_res = sheet[9] 937 938 # The reverse residue list. 939 trim_res_list_rev = deepcopy(trim_res_list) 940 trim_res_list_rev.reverse() 941 942 # The sheet residues. 943 sheet_res = list(range(start_res, end_res+1)) 944 945 # Trim forwards. 946 for res_num in trim_res_list: 947 if res_num == start_res: 948 # Remove the residue. 949 sheet_res.pop(0) 950 951 # No sheet left. 952 if len(sheet_res) == 0: 953 break 954 955 # Realias the starting residue. 956 start_res = sheet_res[0] 957 958 # No sheet left. 959 if len(sheet_res) == 0: 960 return None 961 962 # Trim backwards. 963 for res_num in trim_res_list_rev: 964 if res_num == end_res: 965 sheet_res.pop(-1) 966 end_res = sheet_res[-1] 967 968 # Replace the starting and ending residues. 969 if start_res != sheet[5]: 970 sheet[5] = start_res 971 sheet[3] = res_data[start_res] 972 if end_res != sheet[9]: 973 sheet[9] = end_res 974 sheet[7] = res_data[end_res] 975 976 # Return the modified sheet. 977 return sheet
978 979
980 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
981 """Add a new atom to the structural data object. 982 983 @keyword mol_name: The name of the molecule. 984 @type mol_name: str 985 @keyword atom_name: The atom name, e.g. 'H1'. 986 @type atom_name: str or None 987 @keyword res_name: The residue name. 988 @type res_name: str or None 989 @keyword res_num: The residue number. 990 @type res_num: int or None 991 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 992 @type pos: rank-1 or rank-2 array or list of float 993 @keyword element: The element symbol. 994 @type element: str or None 995 @keyword atom_num: The atom number. 996 @type atom_num: int or None 997 @keyword chain_id: The chain identifier. 998 @type chain_id: str or None 999 @keyword segment_id: The segment identifier. 1000 @type segment_id: str or None 1001 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 1002 @type pdb_record: str or None 1003 """ 1004 1005 # Test if the current data pipe exists. 1006 pipes.test() 1007 1008 # Add a model if not present. 1009 if len(self.structural_data) == 0: 1010 self.add_model() 1011 1012 # Check the position. 1013 if is_float(pos[0]): 1014 if len(pos) != 3: 1015 raise RelaxError("The single atomic position %s must be a 3D list." % pos) 1016 else: 1017 if len(pos) != len(self.structural_data): 1018 raise RelaxError("The %s atomic positions does not match the %s models present." % (len(pos), len(self.structural_data))) 1019 1020 # Loop over each model. 1021 for i in range(len(self.structural_data)): 1022 # Alias the model. 1023 model = self.structural_data[i] 1024 1025 # Specific molecule. 1026 mol = self.get_molecule(mol_name, model=model.num) 1027 1028 # Add the molecule, if it does not exist. 1029 if mol == None: 1030 self.add_molecule(name=mol_name) 1031 mol = self.get_molecule(mol_name, model=model.num) 1032 1033 # Split up the position if needed. 1034 if is_float(pos[0]): 1035 model_pos = pos 1036 else: 1037 model_pos = pos[i] 1038 1039 # Add the atom. 1040 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=model_pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)
1041 1042
1043 - def add_model(self, model=None, coords_from=None):
1044 """Add a new model to the store. 1045 1046 The new model will be constructured with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used. 1047 1048 @keyword model: The number of the model to create. 1049 @type model: int or None 1050 @keyword coords_from: The model number to take the coordinates from. 1051 @type coords_from: int or None 1052 @return: The model container. 1053 @rtype: ModelContainer instance 1054 """ 1055 1056 # Check if the model currently exists. 1057 if model != None: 1058 for i in range(len(self.structural_data)): 1059 if model == self.structural_data[i].num: 1060 raise RelaxError("The model '%s' already exists." % model) 1061 1062 # Add a new model. 1063 self.structural_data.add_item(model_num=model) 1064 1065 # The model to duplicate. 1066 if coords_from == None: 1067 coords_from = self.structural_data[0].num 1068 1069 # Construct the structural data for the model from the other models. 1070 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in self.atom_loop(model_num=coords_from, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True): 1071 # Add the atom. 1072 self.add_atom(self, mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num) 1073 1074 # Return the model. 1075 return self.structural_data[-1]
1076 1077
1078 - def add_molecule(self, name=None):
1079 """Add a new molecule to the store. 1080 1081 @keyword name: The molecule identifier string. 1082 @type name: str 1083 """ 1084 1085 # Add a model if necessary. 1086 if len(self.structural_data) == 0: 1087 self.add_model() 1088 1089 # Loop over the models. 1090 for i in range(len(self.structural_data)): 1091 # Add the molecule. 1092 self.structural_data[i].mol.add_item(mol_name=name, mol_cont=MolContainer())
1093 1094
1095 - def are_bonded(self, atom_id1=None, atom_id2=None):
1096 """Determine if two atoms are directly bonded to each other. 1097 1098 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 1099 @type atom_id1: str 1100 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 1101 @type atom_id2: str 1102 @return: True if the atoms are directly bonded. 1103 @rtype: bool 1104 """ 1105 1106 # Generate the selection objects. 1107 sel_obj1 = Selection(atom_id1) 1108 sel_obj2 = Selection(atom_id2) 1109 1110 # Build the connectivities if needed. 1111 for mol in self.structural_data[0].mol: 1112 for i in range(len(mol.atom_num)): 1113 if not len(mol.bonded[i]): 1114 self._find_bonded_atoms(i, mol, radius=2) 1115 1116 # Loop over the molecules. 1117 for mol in self.structural_data[0].mol: 1118 # Skip non-matching molecules. 1119 if not sel_obj1.contains_mol(mol.mol_name): 1120 continue 1121 if not sel_obj2.contains_mol(mol.mol_name): 1122 continue 1123 1124 # Find the first atom. 1125 index1 = None 1126 for i in range(len(mol.atom_num)): 1127 # Skip a non-matching first atom. 1128 if sel_obj1.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1129 index1 = i 1130 break 1131 1132 # Find the second atom. 1133 index2 = None 1134 for i in range(len(mol.atom_num)): 1135 # Skip a non-matching first atom. 1136 if sel_obj2.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1137 index2 = i 1138 break 1139 1140 # Connectivities exist. 1141 if index1 < len(mol.bonded): 1142 if index2 in mol.bonded[index1]: 1143 return True 1144 else: 1145 return False
1146 1147
1148 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, model_num_flag=False, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, index_flag=False, ave=False):
1149 """Generator function for looping over all atoms in the internal relax structural object. 1150 1151 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded. 1152 @type atom_id: str 1153 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 1154 @type str_id: str, int, or None 1155 @keyword model_num: Only loop over a specific model. 1156 @type model_num: int or None 1157 @keyword model_num_flag: A flag which if True will cause the model number to be yielded. 1158 @type model_num_flag: bool 1159 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 1160 @type mol_name_flag: bool 1161 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 1162 @type res_num_flag: bool 1163 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 1164 @type res_name_flag: bool 1165 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 1166 @type atom_num_flag: bool 1167 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 1168 @type atom_name_flag: bool 1169 @keyword element_flag: A flag which if True will cause the element name to be yielded. 1170 @type element_flag: bool 1171 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 1172 @type pos_flag: bool 1173 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 1174 @type index_flag: bool 1175 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 1176 @type ave: bool 1177 @return: A tuple of atomic information, as described in the docstring. 1178 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 1179 """ 1180 1181 # Check that the structure is loaded. 1182 if not len(self.structural_data): 1183 raise RelaxNoPdbError 1184 1185 # Generate the selection object. 1186 sel_obj = None 1187 if atom_id: 1188 sel_obj = Selection(atom_id) 1189 1190 # Model loop. 1191 for model in self.model_loop(model_num): 1192 # Loop over the molecules. 1193 for mol_index in range(len(model.mol)): 1194 mol = model.mol[mol_index] 1195 1196 # Skip non-matching molecules. 1197 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1198 continue 1199 1200 # Loop over all atoms. 1201 for i in range(len(mol.atom_name)): 1202 # Skip non-matching atoms. 1203 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1204 continue 1205 1206 # Initialise. 1207 res_num = mol.res_num[i] 1208 res_name = mol.res_name[i] 1209 atom_num = mol.atom_num[i] 1210 atom_name = mol.atom_name[i] 1211 element = mol.element[i] 1212 pos = zeros(3, float64) 1213 1214 # The atom position. 1215 if ave: 1216 # Loop over the models. 1217 for model in self.model_loop(): 1218 # Alias. 1219 mol = model.mol[mol_index] 1220 1221 # Some sanity checks. 1222 if mol.atom_num[i] != atom_num: 1223 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.") 1224 1225 # Sum the atom positions. 1226 pos = pos + array([mol.x[i], mol.y[i], mol.z[i]], float64) 1227 1228 # Average the position array (divide by the number of models). 1229 pos = pos / len(self.structural_data) 1230 else: 1231 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 1232 1233 # The molecule name. 1234 mol_name = mol.mol_name 1235 1236 # Build the tuple to be yielded. 1237 atomic_tuple = () 1238 if model_num_flag: 1239 if ave: 1240 atomic_tuple = atomic_tuple + (None,) 1241 else: 1242 atomic_tuple = atomic_tuple + (model.num,) 1243 if mol_name_flag: 1244 atomic_tuple = atomic_tuple + (mol_name,) 1245 if res_num_flag: 1246 atomic_tuple = atomic_tuple + (res_num,) 1247 if res_name_flag: 1248 atomic_tuple = atomic_tuple + (res_name,) 1249 if atom_num_flag: 1250 atomic_tuple = atomic_tuple + (atom_num,) 1251 if atom_name_flag: 1252 atomic_tuple = atomic_tuple + (atom_name,) 1253 if element_flag: 1254 atomic_tuple = atomic_tuple + (element,) 1255 if pos_flag: 1256 atomic_tuple = atomic_tuple + (pos,) 1257 if index_flag: 1258 atomic_tuple += (i,) 1259 1260 # Yield the information. 1261 if len(atomic_tuple) == 1: 1262 atomic_tuple = atomic_tuple[0] 1263 yield atomic_tuple 1264 1265 # Break out of the loop if the ave flag is set, as data from only one model is used. 1266 if ave: 1267 break
1268 1269
1270 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):
1271 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'. 1272 1273 @keyword attached_atom: The name of the bonded atom. 1274 @type attached_atom: str 1275 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned. 1276 @type model_num: None or int 1277 @keyword mol_name: The name of the molecule that attached_atom belongs to. 1278 @type mol_name: str 1279 @keyword res_num: The number of the residue that attached_atom belongs to. 1280 @type res_num: str 1281 @keyword res_name: The name of the residue that attached_atom belongs to. 1282 @type res_name: str 1283 @keyword spin_num: The number of the spin that attached_atom is attached to. 1284 @type spin_num: str 1285 @keyword spin_name: The name of the spin that attached_atom is attached to. 1286 @type spin_name: str 1287 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors. 1288 @type return_name: bool 1289 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 1290 @type return_warnings: bool 1291 @return: The list of bond vectors for each model. 1292 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set) 1293 """ 1294 1295 # Initialise some objects. 1296 vectors = [] 1297 attached_name = None 1298 warnings = None 1299 1300 # Loop over the models. 1301 for model in self.model_loop(model_num): 1302 # Loop over the molecules. 1303 for mol in model.mol: 1304 # Skip non-matching molecules. 1305 if mol_name and mol_name != mol.mol_name: 1306 continue 1307 1308 # Find the atomic index of the base atom. 1309 index = None 1310 for i in range(len(mol.atom_name)): 1311 # Residues don't match. 1312 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name): 1313 continue 1314 1315 # Atoms don't match. 1316 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name): 1317 continue 1318 1319 # Update the index and stop searching. 1320 index = i 1321 break 1322 1323 # Found the atom. 1324 if index != None: 1325 # Get the atom bonded to this model/molecule/residue/atom. 1326 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol) 1327 1328 # No bonded atom. 1329 if (bonded_num, bonded_name, element) == (None, None, None): 1330 continue 1331 1332 # The bond vector. 1333 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64) 1334 1335 # Append the vector to the vectors array. 1336 vectors.append(vector) 1337 1338 # Not found. 1339 else: 1340 warnings = "Cannot find the atom in the structure" 1341 1342 # Build the tuple to be yielded. 1343 data = (vectors,) 1344 if return_name: 1345 data = data + (attached_name,) 1346 if return_warnings: 1347 data = data + (warnings,) 1348 1349 # Return the data. 1350 return data
1351 1352
1353 - def connect_atom(self, mol_name=None, index1=None, index2=None):
1354 """Connect two atoms in the structural data object. 1355 1356 @keyword mol_name: The name of the molecule. 1357 @type mol_name: str 1358 @keyword index1: The global index of the first atom. 1359 @type index1: str 1360 @keyword index2: The global index of the first atom. 1361 @type index2: str 1362 """ 1363 1364 # Test if the current data pipe exists. 1365 pipes.test() 1366 1367 # Add the molecule, if it does not exist. 1368 if self.get_molecule(mol_name) == None: 1369 self.add_molecule(name=mol_name) 1370 1371 # Loop over each model. 1372 for model in self.structural_data: 1373 # Specific molecule. 1374 mol = self.get_molecule(mol_name) 1375 1376 # Add the atom. 1377 mol.atom_connect(index1=index1, index2=index2)
1378 1379
1380 - def delete(self, atom_id=None):
1381 """Deletion of structural information. 1382 1383 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 1384 @type atom_id: str or None 1385 """ 1386 1387 # All data. 1388 if atom_id == None: 1389 # Print out. 1390 print("Deleting the following structural data:\n") 1391 print(self.structural_data) 1392 1393 # Delete the structural data. 1394 del self.structural_data 1395 1396 # Initialise the empty model list. 1397 self.structural_data = ModelList() 1398 1399 # Atom subset deletion. 1400 else: 1401 # Generate the selection object. 1402 sel_obj = None 1403 if atom_id: 1404 sel_obj = Selection(atom_id) 1405 1406 # Loop over the models. 1407 del_res_nums = [] 1408 for model in self.model_loop(): 1409 # Loop over the molecules. 1410 for mol_index in range(len(model.mol)): 1411 mol = model.mol[mol_index] 1412 1413 # Skip non-matching molecules. 1414 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1415 continue 1416 1417 # Loop over the atoms. 1418 indices = [] 1419 for i in self.atom_loop(atom_id=atom_id, model_num=model.num, index_flag=True): 1420 indices.append(i) 1421 1422 # Generate a residue data dictionary for the metadata trimming (prior to atom deletion). 1423 res_data = self._residue_data(res_nums=mol.res_num, res_names=mol.res_name) 1424 1425 # Loop over the reverse indices and pop out the data. 1426 indices.reverse() 1427 for i in indices: 1428 mol.atom_num.pop(i) 1429 mol.atom_name.pop(i) 1430 mol.bonded.pop(i) 1431 mol.chain_id.pop(i) 1432 mol.element.pop(i) 1433 mol.pdb_record.pop(i) 1434 mol.res_name.pop(i) 1435 res_num = mol.res_num.pop(i) 1436 mol.seg_id.pop(i) 1437 mol.x.pop(i) 1438 mol.y.pop(i) 1439 mol.z.pop(i) 1440 1441 # The residue no longer exists. 1442 if res_num not in mol.res_num and res_num not in del_res_nums: 1443 del_res_nums.append(res_num) 1444 1445 # Nothing more to do. 1446 if not len(del_res_nums): 1447 return 1448 1449 # Fix the deleted residue number order. 1450 del_res_nums.reverse() 1451 1452 # Handle the helix metadata. 1453 del_helix_indices = [] 1454 for i in range(len(self.helices)): 1455 # Trim the helix. 1456 helix = self._trim_helix(helix=self.helices[i], trim_res_list=del_res_nums, res_data=res_data) 1457 1458 # Trimmed helix. 1459 if helix != None: 1460 self.helices[i] = helix 1461 1462 # No helix left. 1463 else: 1464 del_helix_indices.append(i) 1465 1466 # Loop over the reverse helix indices and pop out the data. 1467 del_helix_indices.reverse() 1468 for i in del_helix_indices: 1469 self.helices.pop(i) 1470 1471 # Handle the sheet metadata. 1472 del_sheet_indices = [] 1473 for i in range(len(self.sheets)): 1474 # Trim the sheet. 1475 sheet = self._trim_sheet(sheet=self.sheets[i], trim_res_list=del_res_nums, res_data=res_data) 1476 1477 # Trimmed sheet. 1478 if sheet != None: 1479 self.sheets[i] = sheet 1480 1481 # No sheet left. 1482 else: 1483 del_sheet_indices.append(i) 1484 1485 # Loop over the reverse sheet indices and pop out the data. 1486 del_sheet_indices.reverse() 1487 for i in del_sheet_indices: 1488 self.sheets.pop(i)
1489 1490
1491 - def get_molecule(self, molecule, model=None):
1492 """Return the molecule. 1493 1494 Only one model can be specified. 1495 1496 1497 @param molecule: The molecule name. 1498 @type molecule: int or None 1499 @keyword model: The model number. 1500 @type model: int or None 1501 @raises RelaxError: If the model is not specified and there is more than one model loaded. 1502 @return: The MolContainer corresponding to the molecule name and model number. 1503 @rtype: MolContainer instance or None 1504 """ 1505 1506 # Check if the target is a single molecule. 1507 if model == None and self.num_models() > 1: 1508 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 1509 1510 # Check the model argument. 1511 if not isinstance(model, int) and not model == None: 1512 raise RelaxNoneIntError 1513 1514 # No models. 1515 if not len(self.structural_data): 1516 return 1517 1518 # Loop over the models. 1519 for model_cont in self.model_loop(model): 1520 # Loop over the molecules. 1521 for mol in model_cont.mol: 1522 # Return the matching molecule. 1523 if mol.mol_name == molecule: 1524 return mol
1525 1526
1527 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):
1528 """Method for loading structures from a PDB file. 1529 1530 @param file_path: The full path of the PDB file. 1531 @type file_path: str 1532 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1533 @type read_mol: None, int, or list of int 1534 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1535 @type set_mol_name: None, str, or list of str 1536 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 1537 @type read_model: None, int, or list of int 1538 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 1539 @type set_model_num: None, int, or list of int 1540 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 1541 @type alt_loc: str or None 1542 @keyword verbosity: A flag which if True will cause messages to be printed. 1543 @type verbosity: bool 1544 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 1545 @type merge: bool 1546 @return: The status of the loading of the PDB file. 1547 @rtype: bool 1548 """ 1549 1550 # Initial printout. 1551 if verbosity: 1552 print("\nInternal relax PDB parser.") 1553 1554 # Test if the file exists. 1555 if not access(file_path, F_OK): 1556 # Exit indicating failure. 1557 return False 1558 1559 # Separate the file name and path. 1560 path, file = os.path.split(file_path) 1561 1562 # Convert the structure reading args into lists. 1563 if read_mol and not isinstance(read_mol, list): 1564 read_mol = [read_mol] 1565 if set_mol_name and not isinstance(set_mol_name, list): 1566 set_mol_name = [set_mol_name] 1567 if read_model and not isinstance(read_model, list): 1568 read_model = [read_model] 1569 if set_model_num and not isinstance(set_model_num, list): 1570 set_model_num = [set_model_num] 1571 1572 # Open the PDB file. 1573 pdb_file = open_read_file(file_path) 1574 pdb_lines = pdb_file.readlines() 1575 pdb_file.close() 1576 1577 # Check for empty files. 1578 if pdb_lines == []: 1579 raise RelaxError("The PDB file is empty.") 1580 1581 # Process the different sections. 1582 pdb_lines = self._parse_pdb_title(pdb_lines) 1583 pdb_lines = self._parse_pdb_prim_struct(pdb_lines) 1584 pdb_lines = self._parse_pdb_hetrogen(pdb_lines) 1585 pdb_lines = self._parse_pdb_ss(pdb_lines) 1586 pdb_lines = self._parse_pdb_connectivity_annotation(pdb_lines) 1587 pdb_lines = self._parse_pdb_misc(pdb_lines) 1588 pdb_lines = self._parse_pdb_transform(pdb_lines) 1589 1590 # Loop over all models in the PDB file. 1591 model_index = 0 1592 orig_model_num = [] 1593 mol_conts = [] 1594 for model_num, model_records in self._parse_pdb_coord(pdb_lines): 1595 # Only load the desired model. 1596 if read_model and model_num not in read_model: 1597 continue 1598 1599 # Store the original model number. 1600 orig_model_num.append(model_num) 1601 1602 # Loop over the molecules of the model. 1603 mol_conts.append([]) 1604 mol_index = 0 1605 orig_mol_num = [] 1606 new_mol_name = [] 1607 for mol_num, mol_records in self._parse_mols(model_records): 1608 # Only load the desired model. 1609 if read_mol and mol_num not in read_mol: 1610 continue 1611 1612 # Set the target molecule name. 1613 if set_mol_name: 1614 new_mol_name.append(set_mol_name[mol_index]) 1615 else: 1616 # Number of structures already present for the model. 1617 num_struct = 0 1618 for model in self.structural_data: 1619 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == model.num): 1620 num_struct = len(model.mol) 1621 1622 # Set the name to the file name plus the structure number. 1623 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct)) 1624 1625 # Store the original mol number. 1626 orig_mol_num.append(mol_num) 1627 1628 # Generate the molecule container. 1629 mol = MolContainer() 1630 1631 # Fill the molecular data object. 1632 mol.fill_object_from_pdb(mol_records, alt_loc_select=alt_loc) 1633 1634 # Store the molecule container. 1635 mol_conts[model_index].append(mol) 1636 1637 # Increment the molecule index. 1638 mol_index = mol_index + 1 1639 1640 # Increment the model index. 1641 model_index = model_index + 1 1642 1643 # No data, so throw a warning and exit. 1644 if not len(mol_conts): 1645 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path)) 1646 return False 1647 1648 # Create the structural data data structures. 1649 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, merge=merge) 1650 1651 # Loading worked. 1652 return True
1653 1654
1655 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):
1656 """Method for loading structures from a XYZ file. 1657 1658 @param file_path: The full path of the XYZ file. 1659 @type file_path: str 1660 @keyword read_mol: The molecule(s) to read from the file, independent of model. The 1661 molecules are determined differently by the different parsers, but 1662 are numbered consecutively from 1. If set to None, then all 1663 molecules will be loaded. 1664 @type read_mol: None, int, or list of int 1665 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, 1666 then the molecules will be automatically labelled based on the file 1667 name or other information. 1668 @type set_mol_name: None, str, or list of str 1669 @keyword read_model: The XYZ model to extract from the file. If set to None, then all 1670 models will be loaded. 1671 @type read_model: None, int, or list of int 1672 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then 1673 the XYZ model numbers will be preserved, if they exist. 1674 @type set_model_num: None, int, or list of int 1675 @keyword verbosity: A flag which if True will cause messages to be printed. 1676 @type verbosity: bool 1677 @return: The status of the loading of the XYZ file. 1678 @rtype: bool 1679 """ 1680 1681 # Initial printout. 1682 if verbosity: 1683 print("\nInternal relax XYZ parser.") 1684 1685 # Test if the file exists. 1686 if not access(file_path, F_OK): 1687 # Exit indicating failure. 1688 return False 1689 1690 # Separate the file name and path. 1691 path, file = os.path.split(file_path) 1692 1693 # Convert the structure reading args into lists. 1694 if read_mol and not isinstance(read_mol, list): 1695 read_mol = [read_mol] 1696 if set_mol_name and not isinstance(set_mol_name, list): 1697 set_mol_name = [set_mol_name] 1698 if read_model and not isinstance(read_model, list): 1699 read_model = [read_model] 1700 if set_model_num and not isinstance(set_model_num, list): 1701 set_model_num = [set_model_num] 1702 1703 # Loop over all models in the XYZ file. 1704 mol_index=0 1705 model_index = 0 1706 xyz_model_increment = 0 1707 orig_model_num = [] 1708 mol_conts = [] 1709 orig_mol_num = [] 1710 new_mol_name = [] 1711 for model_records in self._parse_models_xyz(file_path): 1712 # Increment the xyz_model_increment 1713 xyz_model_increment = xyz_model_increment +1 1714 1715 # Only load the desired model. 1716 if read_model and xyz_model_increment not in read_model: 1717 continue 1718 1719 # Store the original model number. 1720 orig_model_num.append(model_index) 1721 1722 # Loop over the molecules of the model. 1723 if read_mol and mol_index not in read_mol: 1724 continue 1725 1726 # Set the target molecule name. 1727 if set_mol_name: 1728 new_mol_name.append(set_mol_name[mol_index]) 1729 else: 1730 if mol_index==0: 1731 #Set the name to the file name plus the structure number. 1732 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1)) 1733 1734 # Store the original mol number. 1735 orig_mol_num.append(mol_index) 1736 1737 # Generate the molecule container. 1738 mol = MolContainer() 1739 1740 # Fill the molecular data object. 1741 mol.fill_object_from_xyz(model_records) 1742 1743 # Store the molecule container. 1744 mol_conts.append([]) 1745 mol_conts[model_index].append(mol) 1746 1747 # Increment the molecule index. 1748 mol_index = mol_index + 1 1749 1750 # Increment the model index. 1751 model_index = model_index + 1 1752 1753 orig_mol_num=[0] 1754 # Create the structural data data structures. 1755 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path) 1756 1757 # Loading worked. 1758 return True
1759 1760
1761 - def rotate(self, R=None, origin=None, model=None, atom_id=None):
1762 """Rotate the structural information about the given origin. 1763 1764 @keyword R: The forwards rotation matrix. 1765 @type R: numpy 3D, rank-2 array 1766 @keyword origin: The origin of the rotation. 1767 @type origin: numpy 3D, rank-1 array 1768 @keyword model: The model to rotate. If None, all models will be rotated. 1769 @type model: int 1770 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 1771 @type atom_id: str or None 1772 """ 1773 1774 # Generate the selection object. 1775 sel_obj = None 1776 if atom_id: 1777 sel_obj = Selection(atom_id) 1778 1779 # Loop over the models. 1780 for model_cont in self.model_loop(model): 1781 # Loop over the molecules. 1782 for mol in model_cont.mol: 1783 # Skip non-matching molecules. 1784 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1785 continue 1786 1787 # Loop over the atoms. 1788 for i in range(len(mol.atom_num)): 1789 # Skip non-matching atoms. 1790 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1791 continue 1792 1793 # The origin to atom vector. 1794 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin 1795 1796 # Rotation. 1797 rot_vect = dot(R, vect) 1798 1799 # The new position. 1800 pos = rot_vect + origin 1801 mol.x[i] = pos[0] 1802 mol.y[i] = pos[1] 1803 mol.z[i] = pos[2]
1804 1805
1806 - def translate(self, T=None, model=None, atom_id=None):
1807 """Displace the structural information by the given translation vector. 1808 1809 @keyword T: The translation vector. 1810 @type T: numpy 3D, rank-1 array 1811 @keyword model: The model to rotate. If None, all models will be rotated. 1812 @type model: int 1813 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 1814 @type atom_id: str or None 1815 """ 1816 1817 # Generate the selection object. 1818 sel_obj = None 1819 if atom_id: 1820 sel_obj = Selection(atom_id) 1821 1822 # Loop over the models. 1823 for model_cont in self.model_loop(model): 1824 # Loop over the molecules. 1825 for mol in model_cont.mol: 1826 # Skip non-matching molecules. 1827 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1828 continue 1829 1830 # Loop over the atoms. 1831 for i in range(len(mol.atom_num)): 1832 # Skip non-matching atoms. 1833 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1834 continue 1835 1836 # Translate. 1837 mol.x[i] = mol.x[i] + T[0] 1838 mol.y[i] = mol.y[i] + T[1] 1839 mol.z[i] = mol.z[i] + T[2]
1840 1841
1842 - def validate_models(self):
1843 """Check that the models are consistent with each other. 1844 1845 This checks that the primary structure is identical between the models. 1846 """ 1847 1848 # Print out. 1849 print("Validating models:") 1850 1851 # Loop over the models. 1852 for i in range(len(self.structural_data)): 1853 # Check the molecules. 1854 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol): 1855 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol))) 1856 1857 # Loop over the molecules. 1858 for j in range(len(self.structural_data[i].mol)): 1859 # Alias the molecules. 1860 mol = self.structural_data[i].mol[j] 1861 mol_ref = self.structural_data[0].mol[j] 1862 1863 # Check the names. 1864 if mol.mol_name != mol_ref.mol_name: 1865 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name)) 1866 1867 # Loop over the atoms. 1868 for k in range(len(mol.atom_name)): 1869 # Create pseudo-pdb formatted records (with no atomic coordinates). 1870 atom = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '') 1871 atom_ref = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '') 1872 1873 # Check the atom info. 1874 if atom != atom_ref: 1875 print(atom) 1876 print(atom_ref) 1877 raise RelaxError("The atoms of model %i do not match the first model." % self.structural_data[i].num) 1878 1879 # Final printout. 1880 print("\tAll models are consistent")
1881 1882
1883 - def write_pdb(self, file, model_num=None):
1884 """Method for the creation of a PDB file from the structural data. 1885 1886 A number of PDB records including HET, HETNAM, FORMUL, HETATM, TER, CONECT, MASTER, and END 1887 are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data 1888 structure 'het_data' is created. It is an array of arrays where the first dimension 1889 corresponds to a different residue and the second dimension has the elements: 1890 1891 0. Residue number. 1892 1. Residue name. 1893 2. Chain ID. 1894 3. Total number of atoms in the residue. 1895 4. Number of H atoms in the residue. 1896 5. Number of C atoms in the residue. 1897 1898 1899 @param file: The PDB file object. This object must be writable. 1900 @type file: file object 1901 @keyword model_num: The model to place into the PDB file. If not supplied, then all 1902 models will be placed into the file. 1903 @type model_num: None or int 1904 """ 1905 1906 # Validate the structural data. 1907 self.validate() 1908 1909 # Initialise record counts. 1910 num_hetatm = 0 1911 num_atom = 0 1912 num_ter = 0 1913 num_conect = 0 1914 1915 # Print out. 1916 print("\nCreating the PDB records\n") 1917 1918 # Write some initial remarks. 1919 print("REMARK") 1920 pdb_write.remark(file, num=4, remark="This file complies with format v. 3.30, Jul-2011.") 1921 pdb_write.remark(file, num=40, remark="Created by relax (http://nmr-relax.com).") 1922 num_remark = 2 1923 1924 # Determine if model records will be created. 1925 model_records = False 1926 for model in self.model_loop(): 1927 if hasattr(model, 'num') and model.num != None: 1928 model_records = True 1929 1930 1931 #################### 1932 # Hetrogen section # 1933 #################### 1934 1935 # Initialise the hetrogen info array. 1936 het_data = [] 1937 het_data_coll = [] 1938 1939 # Loop over the molecules of the first model. 1940 index = 0 1941 for mol in self.structural_data[0].mol: 1942 # Check the validity of the data. 1943 self._validate_data_arrays(mol) 1944 1945 # Append an empty array for this molecule. 1946 het_data.append([]) 1947 1948 # Collect the non-standard residue info. 1949 for i in range(len(mol.atom_name)): 1950 # Skip non-HETATM records and HETATM records with no residue info. 1951 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None: 1952 continue 1953 1954 # If the residue is not already stored initialise a new het_data element. 1955 # (residue number, residue name, chain ID, number of atoms, atom count array). 1956 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]: 1957 het_data[index].append([mol.res_num[i], mol.res_name[i], mol.chain_id[i], 0, []]) 1958 1959 # Catch missing chain_ids. 1960 if het_data[index][-1][2] == None: 1961 het_data[index][-1][2] = '' 1962 1963 # Total atom count. 1964 het_data[index][-1][3] = het_data[index][-1][3] + 1 1965 1966 # Find if the atom has already a count entry. 1967 entry = False 1968 for j in range(len(het_data[index][-1][4])): 1969 if mol.element[i] == het_data[index][-1][4][j][0]: 1970 entry = True 1971 1972 # Create a new specific atom count entry. 1973 if not entry: 1974 het_data[index][-1][4].append([mol.element[i], 0]) 1975 1976 # Increment the specific atom count. 1977 for j in range(len(het_data[index][-1][4])): 1978 if mol.element[i] == het_data[index][-1][4][j][0]: 1979 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1 1980 1981 # Create the collective hetrogen info data structure. 1982 for i in range(len(het_data[index])): 1983 # Find the entry in the collective structure. 1984 found = False 1985 for j in range(len(het_data_coll)): 1986 # Matching residue numbers. 1987 if het_data[index][i][0] == het_data_coll[j][0]: 1988 # Change the flag. 1989 found = True 1990 1991 # The checks. 1992 if het_data_coll[j][1] != het_data[index][i][1]: 1993 raise RelaxError("The " + repr(het_data[index][i][1]) + " residue name of hetrogen " + repr(het_data[index][i][0]) + " " + het_data[index][i][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][1]) + " name of the previous structures.") 1994 1995 elif het_data_coll[j][2] != het_data[index][i][2]: 1996 raise RelaxError("The hetrogen chain id " + repr(het_data[index][i][2]) + " does not match " + repr(het_data_coll[j][2]) + " of residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of the previous structures.") 1997 1998 elif het_data_coll[j][3] != het_data[index][i][3]: 1999 raise RelaxError("The " + repr(het_data[index][i][3]) + " atoms of hetrogen " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][3]) + " of the previous structures.") 2000 2001 elif het_data_coll[j][4] != het_data[index][i][4]: 2002 raise RelaxError("The atom counts " + repr(het_data[index][i][4]) + " for the hetrogen residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " do not match the counts " + repr(het_data_coll[j][4]) + " of the previous structures.") 2003 2004 # If there is no match, add the new residue to the collective. 2005 if not found: 2006 het_data_coll.append(het_data[index][i]) 2007 2008 # Increment the molecule index. 2009 index = index + 1 2010 2011 2012 # The HET records. 2013 ################## 2014 2015 # Print out. 2016 print("HET") 2017 2018 # Write the HET records. 2019 for het in het_data_coll: 2020 pdb_write.het(file, het_id=het[1], chain_id=het[2], seq_num=het[0], num_het_atoms=het[3]) 2021 2022 2023 # The HETNAM records. 2024 ##################### 2025 2026 # Print out. 2027 print("HETNAM") 2028 2029 # Loop over the non-standard residues. 2030 residues = [] 2031 for het in het_data_coll: 2032 # Test if the residue HETNAM record as already been written (otherwise store its name). 2033 if het[1] in residues: 2034 continue 2035 else: 2036 residues.append(het[1]) 2037 2038 # Get the chemical name. 2039 chemical_name = self._get_chemical_name(het[1]) 2040 if not chemical_name: 2041 chemical_name = 'Unknown' 2042 2043 # Write the HETNAM records. 2044 pdb_write.hetnam(file, het_id=het[1], text=chemical_name) 2045 2046 2047 # The FORMUL records. 2048 ##################### 2049 2050 # Print out. 2051 print("FORMUL") 2052 2053 # Loop over the non-standard residues and generate and write the chemical formula. 2054 residues = [] 2055 for i in range(len(het_data_coll)): 2056 # Alias. 2057 het = het_data_coll[i] 2058 2059 # Test if the residue HETNAM record as already been written (otherwise store its name). 2060 if het[1] in residues: 2061 continue 2062 else: 2063 residues.append(het[1]) 2064 2065 # Initialise the chemical formula. 2066 formula = '' 2067 2068 # Loop over the atoms. 2069 for atom_count in het[4]: 2070 formula = formula + atom_count[0] + repr(atom_count[1]) 2071 2072 # The FORMUL record (chemical formula). 2073 pdb_write.formul(file, comp_num=i+1, het_id=het[1], text=formula) 2074 2075 2076 ############################### 2077 # Secondary structure section # 2078 ############################### 2079 2080 # The HELIX records. 2081 #################### 2082 2083 if hasattr(self, 'helices') and len(self.helices): 2084 # Printout. 2085 print("HELIX") 2086 2087 # Loop over and unpack the helix data. 2088 index = 1 2089 for helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length in self.helices: 2090 pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=init_chain_id, init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=end_chain_id, end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) 2091 index += 1 2092 2093 # The SHEET records. 2094 #################### 2095 2096 if hasattr(self, 'sheets') and len(self.sheets): 2097 # Printout. 2098 print("SHEET") 2099 2100 # Loop over and unpack the helix data. 2101 index = 1 2102 for strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode in self.sheets: 2103 pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) 2104 index += 1 2105 2106 2107 ###################### 2108 # Coordinate section # 2109 ###################### 2110 2111 # Loop over the models. 2112 for model in self.model_loop(model_num): 2113 # MODEL record, for multiple models. 2114 #################################### 2115 2116 if model_records: 2117 # Print out. 2118 print("\nMODEL %s" % model.num) 2119 2120 # Write the model record. 2121 pdb_write.model(file, serial=model.num) 2122 2123 2124 # Add the atomic coordinate records (ATOM, HETATM, and TER). 2125 ############################################################ 2126 2127 # Loop over the molecules. 2128 for mol in model.mol: 2129 # Print out. 2130 print("ATOM, HETATM, TER") 2131 2132 # Loop over the atomic data. 2133 atom_record = False 2134 for i in range(len(mol.atom_name)): 2135 # Write the ATOM record. 2136 if mol.pdb_record[i] in [None, 'ATOM']: 2137 atom_record = True 2138 2139 # The atom number, if missing. 2140 atom_num = mol.atom_num[i] 2141 if atom_num == None: 2142 atom_num = i + 1 2143 2144 # Handle the funky atom name alignment. From the PDB format documents: 2145 # "Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13." 2146 if len(mol.atom_name[i]) == 1: 2147 atom_name = " %s" % mol.atom_name[i] 2148 else: 2149 atom_name = "%s" % mol.atom_name[i] 2150 2151 # Write out. 2152 pdb_write.atom(file, serial=atom_num, name=atom_name, res_name=mol.res_name[i], chain_id=self._translate(mol.chain_id[i]), res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0, element=mol.element[i]) 2153 num_atom = num_atom + 1 2154 2155 # Info for the TER record. 2156 ter_num = atom_num + 1 2157 ter_name = mol.res_name[i] 2158 ter_chain_id = mol.chain_id[i] 2159 ter_res_num = mol.res_num[i] 2160 2161 # Finish the ATOM section with the TER record. 2162 if atom_record: 2163 pdb_write.ter(file, serial=ter_num, res_name=ter_name, chain_id=self._translate(ter_chain_id), res_seq=ter_res_num) 2164 num_ter = num_ter + 1 2165 2166 # Loop over the atomic data. 2167 count_shift = False 2168 for i in range(len(mol.atom_name)): 2169 # Write the HETATM record. 2170 if mol.pdb_record[i] == 'HETATM': 2171 # The atom number, if missing. 2172 atom_num = mol.atom_num[i] 2173 if atom_num == None: 2174 atom_num = i + 1 2175 2176 # Increment the atom number if a TER record was created. 2177 if atom_record and atom_num == ter_num: 2178 count_shift = True 2179 if atom_record and count_shift: 2180 atom_num += 1 2181 2182 # Write out. 2183 pdb_write.hetatm(file, serial=atom_num, name=self._translate(mol.atom_name[i]), res_name=mol.res_name[i], chain_id=self._translate(mol.chain_id[i]), res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0.0, element=mol.element[i]) 2184 num_hetatm = num_hetatm + 1 2185 2186 2187 # ENDMDL record, for multiple structures. 2188 ######################################## 2189 2190 if model_records: 2191 print("ENDMDL") 2192 pdb_write.endmdl(file) 2193 2194 2195 # Create the CONECT records. 2196 ############################ 2197 2198 # Print out. 2199 print("CONECT") 2200 2201 # Loop over the molecules of the first model. 2202 for mol in self.structural_data[0].mol: 2203 # Loop over the atoms. 2204 for i in range(len(mol.atom_name)): 2205 # No bonded atoms, hence no CONECT record is required. 2206 if not len(mol.bonded[i]): 2207 continue 2208 2209 # Initialise some data structures. 2210 flush = 0 2211 bonded_index = 0 2212 bonded = ['', '', '', ''] 2213 2214 # Loop over the bonded atoms. 2215 for j in range(len(mol.bonded[i])): 2216 # End of the array, hence create the CONECT record in this iteration. 2217 if j == len(mol.bonded[i])-1: 2218 flush = True 2219 2220 # Only four covalently bonded atoms allowed in one CONECT record. 2221 if bonded_index == 3: 2222 flush = True 2223 2224 # Get the bonded atom index. 2225 bonded[bonded_index] = mol.bonded[i][j] 2226 2227 # Increment the bonded_index value. 2228 bonded_index = bonded_index + 1 2229 2230 # Generate the CONECT record and increment the counter. 2231 if flush: 2232 # Convert the atom indices to atom numbers. 2233 for k in range(4): 2234 if bonded[k] != '': 2235 if mol.atom_num[bonded[k]] != None: 2236 bonded[k] = mol.atom_num[bonded[k]] 2237 else: 2238 bonded[k] = bonded[k] + 1 2239 2240 # Write the CONECT record. 2241 pdb_write.conect(file, serial=i+1, bonded1=bonded[0], bonded2=bonded[1], bonded3=bonded[2], bonded4=bonded[3]) 2242 2243 # Reset the flush flag, the bonded atom count, and the bonded atom names. 2244 flush = False 2245 bonded_index = 0 2246 bonded = ['', '', '', ''] 2247 2248 # Increment the CONECT record count. 2249 num_conect = num_conect + 1 2250 2251 2252 2253 # MASTER record. 2254 ################ 2255 2256 print("\nMASTER") 2257 pdb_write.master(file, num_het=len(het_data_coll), num_coord=num_atom+num_hetatm, num_ter=num_ter, num_conect=num_conect) 2258 2259 2260 # END. 2261 ###### 2262 2263 print("END") 2264 pdb_write.end(file)
2265 2266
2267 -class MolContainer:
2268 """The container for the molecular information. 2269 2270 The structural data object for this class is a container possessing a number of different arrays 2271 corresponding to different structural information. These objects include: 2272 2273 - atom_num: The atom name. 2274 - atom_name: The atom name. 2275 - bonded: Each element an array of bonded atom indices. 2276 - chain_id: The chain ID. 2277 - element: The element symbol. 2278 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 2279 - res_name: The residue name. 2280 - res_num: The residue number. 2281 - seg_id: The segment ID. 2282 - x: The x coordinate of the atom. 2283 - y: The y coordinate of the atom. 2284 - z: The z coordinate of the atom. 2285 2286 All arrays should be of equal length so that an atom index can retrieve all the corresponding 2287 data. Only the atom identification string is compulsory, all other arrays can contain None. 2288 """ 2289 2290
2291 - def __init__(self):
2292 """Initialise the molecular container.""" 2293 2294 # The atom num (array of int). 2295 self.atom_num = [] 2296 2297 # The atom name (array of str). 2298 self.atom_name = [] 2299 2300 # The bonded atom indices (array of arrays of int). 2301 self.bonded = [] 2302 2303 # The chain ID (array of str). 2304 self.chain_id = [] 2305 2306 # The element symbol (array of str). 2307 self.element = [] 2308 2309 # The optional PDB record name (array of str). 2310 self.pdb_record = [] 2311 2312 # The residue name (array of str). 2313 self.res_name = [] 2314 2315 # The residue number (array of int). 2316 self.res_num = [] 2317 2318 # The segment ID (array of int). 2319 self.seg_id = [] 2320 2321 # The x coordinate (array of float). 2322 self.x = [] 2323 2324 # The y coordinate (array of float). 2325 self.y = [] 2326 2327 # The z coordinate (array of float). 2328 self.z = []
2329 2330
2331 - def _atom_index(self, atom_num):
2332 """Find the atom index corresponding to the given atom number. 2333 2334 @param atom_num: The atom number to find the index of. 2335 @type atom_num: int 2336 @return: The atom index corresponding to the atom. 2337 @rtype: int 2338 """ 2339 2340 # Loop over the atoms. 2341 for j in range(len(self.atom_num)): 2342 # Return the index. 2343 if self.atom_num[j] == atom_num: 2344 return j 2345 2346 # Should not be here, the PDB connect records are incorrect. 2347 warn(RelaxWarning("The atom number " + repr(atom_num) + " from the CONECT record cannot be found within the ATOM and HETATM records."))
2348 2349
2350 - def _det_pdb_element(self, atom_name):
2351 """Try to determine the element from the PDB atom name. 2352 2353 @param atom_name: The PDB atom name. 2354 @type atom_name: str 2355 @return: The element name, or None if unsuccessful. 2356 @rtype: str or None 2357 """ 2358 2359 # Strip away the "'" character (for RNA, etc.). 2360 element = atom_name.strip("'") 2361 2362 # Strip away atom numbering, from the front and end. 2363 element = element.strip(digits) 2364 2365 # Amino acid atom translation table (note, numbers have been stripped already!). 2366 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 2367 'N': ['ND', 'NE', 'NH', 'NZ'], 2368 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 2369 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 2370 'S': ['SD', 'SG'] 2371 } 2372 2373 # Translate amino acids. 2374 for key in list(table.keys()): 2375 if element in table[key]: 2376 element = key 2377 break 2378 2379 # Allowed element list. 2380 elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S'] 2381 2382 # Return the element, if in the list. 2383 if element in elements: 2384 return element 2385 2386 # Else, throw a warning. 2387 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))
2388 2389
2390 - def _parse_xyz_record(self, record):
2391 """Parse the XYZ record string and return an array of the corresponding atomic information. 2392 2393 The format of the XYZ records is:: 2394 __________________________________________________________________________________________ 2395 | | | | | 2396 | Columns | Data type | Field | Definition | 2397 |_________|______________|______________|________________________________________________| 2398 | | | | | 2399 | 1 | String | element | | 2400 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 2401 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 2402 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 2403 |_________|______________|______________|________________________________________________| 2404 2405 2406 @param record: The single line PDB record. 2407 @type record: str 2408 @return: The list of atomic information 2409 @rtype: list of str 2410 """ 2411 2412 # Initialise. 2413 fields = [] 2414 word = record.split() 2415 2416 # ATOM and HETATM records. 2417 if len(word)==4: 2418 # Split up the record. 2419 fields.append(word[0]) 2420 fields.append(word[1]) 2421 fields.append(word[2]) 2422 fields.append(word[3]) 2423 2424 # Loop over the fields. 2425 for i in range(len(fields)): 2426 # Strip all whitespace. 2427 fields[i] = fields[i].strip() 2428 2429 # Replace nothingness with None. 2430 if fields[i] == '': 2431 fields[i] = None 2432 2433 # Convert strings to numbers. 2434 if fields[1]: 2435 fields[1] = float(fields[1]) 2436 if fields[2]: 2437 fields[2] = float(fields[2]) 2438 if fields[3]: 2439 fields[3] = float(fields[3]) 2440 2441 # Return the atomic info. 2442 return fields
2443 2444
2445 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
2446 """Method for adding an atom to the structural data object. 2447 2448 This method will create the key-value pair for the given atom. 2449 2450 2451 @keyword atom_name: The atom name, e.g. 'H1'. 2452 @type atom_name: str or None 2453 @keyword res_name: The residue name. 2454 @type res_name: str or None 2455 @keyword res_num: The residue number. 2456 @type res_num: int or None 2457 @keyword pos: The position vector of coordinates. 2458 @type pos: list (length = 3) 2459 @keyword element: The element symbol. 2460 @type element: str or None 2461 @keyword atom_num: The atom number. 2462 @type atom_num: int or None 2463 @keyword chain_id: The chain identifier. 2464 @type chain_id: str or None 2465 @keyword segment_id: The segment identifier. 2466 @type segment_id: str or None 2467 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 2468 @type pdb_record: str or None 2469 @return: The index of the added atom. 2470 @rtype: int 2471 """ 2472 2473 # Append to all the arrays. 2474 self.atom_num.append(atom_num) 2475 self.atom_name.append(atom_name) 2476 self.bonded.append([]) 2477 self.chain_id.append(chain_id) 2478 self.element.append(element) 2479 self.pdb_record.append(pdb_record) 2480 self.res_name.append(res_name) 2481 self.res_num.append(res_num) 2482 self.seg_id.append(segment_id) 2483 self.x.append(pos[0]) 2484 self.y.append(pos[1]) 2485 self.z.append(pos[2]) 2486 2487 # Return the index. 2488 return len(self.atom_num) - 1
2489 2490
2491 - def atom_connect(self, index1=None, index2=None):
2492 """Method for connecting two atoms within the data structure object. 2493 2494 This method will append index2 to the array at bonded[index1] and vice versa. 2495 2496 2497 @keyword index1: The index of the first atom. 2498 @type index1: int 2499 @keyword index2: The index of the second atom. 2500 @type index2: int 2501 """ 2502 2503 # Update the bonded array structure, if necessary. 2504 if index2 not in self.bonded[index1]: 2505 self.bonded[index1].append(index2) 2506 if index1 not in self.bonded[index2]: 2507 self.bonded[index2].append(index1)
2508 2509
2510 - def fill_object_from_pdb(self, records, alt_loc_select=None):
2511 """Method for generating a complete Structure_container object from the given PDB records. 2512 2513 @param records: A list of structural PDB records. 2514 @type records: list of str 2515 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 2516 @type alt_loc_select: str or None 2517 """ 2518 2519 # Loop over the records. 2520 for record in records: 2521 # Nothing to do. 2522 if not record or record == '\n': 2523 continue 2524 2525 # Add the atom. 2526 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 2527 # Parse the record. 2528 if record[:4] == 'ATOM': 2529 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 2530 if record[:6] == 'HETATM': 2531 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 2532 2533 # Handle the alternate locations. 2534 if alt_loc != None: 2535 # Don't know what to do. 2536 if alt_loc_select == None: 2537 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 2538 2539 # Skip non-matching locations. 2540 if alt_loc != alt_loc_select: 2541 continue 2542 2543 # Attempt at determining the element, if missing. 2544 if not element: 2545 element = self._det_pdb_element(name) 2546 2547 # Add. 2548 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, pos=[x, y, z], element=element) 2549 2550 # Connect atoms. 2551 if record[:6] == 'CONECT': 2552 # Parse the record. 2553 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 2554 2555 # Loop over the atoms of the record. 2556 for bonded in [bonded1, bonded2, bonded3, bonded4]: 2557 # Skip if there is no record. 2558 if not bonded: 2559 continue 2560 2561 # Skip broken CONECT records (for when the record points to a non-existent atom). 2562 if self._atom_index(serial) == None or self._atom_index(bonded) == None: 2563 continue 2564 2565 # Make the connection. 2566 self.atom_connect(index1=self._atom_index(serial), index2=self._atom_index(bonded))
2567 2568
2569 - def fill_object_from_xyz(self, records):
2570 """Method for generating a complete Structure_container object from the given xyz records. 2571 2572 @param records: A list of structural xyz records. 2573 @type records: list of str 2574 """ 2575 2576 # initialisation for atom number 2577 atom_number = 1 2578 2579 # Loop over the records. 2580 for record in records: 2581 # Parse the record. 2582 record = self._parse_xyz_record(record) 2583 2584 # Nothing to do. 2585 if not record: 2586 continue 2587 2588 # Add the atom. 2589 if len(record) == 4: 2590 # Add. 2591 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 2592 2593 # Increment of atom number 2594 atom_number = atom_number + 1
2595 2596
2597 - def from_xml(self, mol_node, file_version=1):
2598 """Recreate the MolContainer from the XML molecule node. 2599 2600 @param mol_node: The molecule XML node. 2601 @type mol_node: xml.dom.minicompat.NodeList instance 2602 @keyword file_version: The relax XML version of the XML file. 2603 @type file_version: int 2604 """ 2605 2606 # Recreate the current molecule container. 2607 xml_to_object(mol_node, self, file_version=file_version)
2608 2609
2610 - def is_empty(self):
2611 """Check if the container is empty.""" 2612 2613 # Set attributes. 2614 if hasattr(self, 'mol_name'): return False 2615 if hasattr(self, 'file_name'): return False 2616 if hasattr(self, 'file_path'): return False 2617 if hasattr(self, 'file_mol_num'): return False 2618 if hasattr(self, 'file_model'): return False 2619 2620 # Internal data structures. 2621 if not self.atom_num == []: return False 2622 if not self.atom_name == []: return False 2623 if not self.bonded == []: return False 2624 if not self.chain_id == []: return False 2625 if not self.element == []: return False 2626 if not self.pdb_record == []: return False 2627 if not self.res_name == []: return False 2628 if not self.res_num == []: return False 2629 if not self.seg_id == []: return False 2630 if not self.x == []: return False 2631 if not self.y == []: return False 2632 if not self.z == []: return False 2633 2634 # Ok, now this thing must be empty. 2635 return True
2636 2637
2638 - def last_residue(self):
2639 """Return the number of the last residue. 2640 2641 @return: The last residue number. 2642 @rtype: int 2643 """ 2644 2645 # Return the number. 2646 return self.res_num[-1]
2647 2648
2649 - def merge(self, mol_cont=None):
2650 """Merge the contents of the given molecule container into here. 2651 2652 @keyword mol_cont: The data structure for the molecule to merge. 2653 @type mol_cont: MolContainer instance 2654 """ 2655 2656 # The current index. 2657 curr_index = len(self.atom_num) 2658 2659 # Loop over all data. 2660 for i in range(len(mol_cont.atom_num)): 2661 # Add the atom. 2662 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 2663 2664 # Connect the atoms. 2665 for j in range(len(mol_cont.bonded[i])): 2666 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)
2667 2668
2669 - def to_xml(self, doc, element):
2670 """Create XML elements for the contents of this molecule container. 2671 2672 @param doc: The XML document object. 2673 @type doc: xml.dom.minidom.Document instance 2674 @param element: The element to add the molecule XML elements to. 2675 @type element: XML element object 2676 """ 2677 2678 # Create an XML element for this molecule and add it to the higher level element. 2679 mol_element = doc.createElement('mol_cont') 2680 element.appendChild(mol_element) 2681 2682 # Set the molecule attributes. 2683 mol_element.setAttribute('desc', 'Molecule container') 2684 mol_element.setAttribute('name', str(self.mol_name)) 2685 2686 # Add all simple python objects within the MolContainer to the XML element. 2687 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))
2688