lib.structure.internal.object

53 """The internal relax structural data object. 54 55 The structural data object for this class is a container possessing a number of different arrays corresponding to different structural information. These objects are described in the structural container docstring. 56 """ 57

58 - def __init__(self):

59 """Initialise the structural object.""" 60 61 # Initialise the empty model list. 62 self.structural_data = ModelList()

63 64

65 - def _bonded_atom(self, attached_atom, index, mol):

66 """Find the atom named attached_atom directly bonded to the atom located at the index. 67 68 @param attached_atom: The name of the attached atom to return. 69 @type attached_atom: str 70 @param index: The index of the atom which the attached atom is attached to. 71 @type index: int 72 @param mol: The molecule container. 73 @type mol: MolContainer instance 74 @return: A tuple of information about the bonded atom. 75 @rtype: tuple consisting of the atom number (int), atom name (str), element name (str), and atomic position (Numeric array of len 3) 76 """ 77 78 # Init. 79 bonded_found = False 80 81 # No bonded atoms, so determine the connectivities. 82 if not mol.bonded[index]: 83 # Determine the molecule type if needed. 84 if not hasattr(mol, 'type'): 85 self._mol_type(mol) 86 87 # Protein. 88 if mol.type == 'protein': 89 self._protein_connect(mol) 90 91 # Find everything within 2 Angstroms and say they are bonded. 92 else: 93 self._find_bonded_atoms(index, mol, radius=2) 94 95 # Loop over the bonded atoms. 96 matching_list = [] 97 for bonded_index in mol.bonded[index]: 98 if regex.search(mol.atom_name[bonded_index], attached_atom): 99 matching_list.append(bonded_index) 100 num_attached = len(matching_list) 101 102 # Problem. 103 if num_attached > 1: 104 # Get the atom names. 105 matching_names = [] 106 for i in matching_list: 107 matching_names.append(mol.atom_name[i]) 108 109 # Return nothing but a warning. 110 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 111 112 # No attached atoms. 113 if num_attached == 0: 114 return None, None, None, None, None, "No attached atom could be found" 115 116 # The bonded atom info. 117 index = matching_list[0] 118 bonded_num = mol.atom_num[index] 119 bonded_name = mol.atom_name[index] 120 element = mol.element[index] 121 pos = [mol.x[index], mol.y[index], mol.z[index]] 122 attached_name = mol.atom_name[index] 123 124 # Return the information. 125 return bonded_num, bonded_name, element, pos, attached_name, None

126 127

128 - def _find_bonded_atoms(self, index, mol, radius=1.2):

129 """Find all atoms within a sphere and say that they are attached to the central atom. 130 131 The found atoms will be added to the 'bonded' data structure. 132 133 134 @param index: The index of the central atom. 135 @type index: int 136 @param mol: The molecule container. 137 @type mol: MolContainer instance 138 """ 139 140 # Central atom info. 141 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64) 142 143 # Atom loop. 144 dist_list = [] 145 connect_list = {} 146 element_list = {} 147 for i in range(len(mol.atom_num)): 148 # Skip proton to proton bonds! 149 if mol.element[index] == 'H' and mol.element[i] == 'H': 150 continue 151 152 # The atom's position. 153 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 154 155 # The distance from the centre. 156 dist = linalg.norm(centre-pos) 157 158 # The atom is within the radius. 159 if dist < radius: 160 # Store the distance. 161 dist_list.append(dist) 162 163 # Store the atom index. 164 connect_list[dist] = i 165 166 # Store the element type. 167 element_list[dist] = mol.element[i] 168 169 # The maximum number of allowed covalent bonds. 170 max_conn = 1000 # Ridiculous default! 171 if mol.element[index] == 'H': 172 max_conn = 1 173 elif mol.element[index] == 'O': 174 max_conn = 2 175 elif mol.element[index] == 'N': 176 max_conn = 3 177 elif mol.element[index] == 'C': 178 max_conn = 4 179 180 # Sort. 181 dist_list.sort() 182 183 # Loop over the max number of connections (or the number of connected atoms, if less). 184 for i in range(min(max_conn, len(dist_list))): 185 mol.atom_connect(index, connect_list[dist_list[i]])

186 187

188 - def _get_chemical_name(self, hetID):

189 """Return the chemical name corresponding to the given residue ID. 190 191 The following names are currently returned:: 192 ________________________________________________ 193 | | | 194 | hetID | Chemical name | 195 |________|_____________________________________| 196 | | | 197 | TNS | Tensor | 198 | COM | Centre of mass | 199 | AXS | Tensor axes | 200 | SIM | Monte Carlo simulation tensor axes | 201 | PIV | Pivot point | 202 | CON | Cone object | 203 | AVE | Average vector | 204 |________|_____________________________________| 205 206 For any other residues, no description is returned. 207 208 @param hetID: The residue ID. 209 @type hetID: str 210 @return: The chemical name. 211 @rtype: str or None 212 """ 213 214 # Tensor. 215 if hetID == 'TNS': 216 return 'Tensor' 217 218 # Centre of mass. 219 if hetID == 'COM': 220 return 'Centre of mass' 221 222 # Tensor axes. 223 if hetID == 'AXS': 224 return 'Tensor axes' 225 226 # Monte Carlo simulation tensor axes. 227 if hetID == 'SIM': 228 return 'Monte Carlo simulation tensor axes' 229 230 # Pivot point. 231 if hetID == 'PIV': 232 return 'Pivot point' 233 234 # Cone object. 235 if hetID == 'CON': 236 return 'Cone' 237 238 # Average vector. 239 if hetID == 'AVE': 240 return 'Average vector'

241 242

243 - def _parse_models_gaussian(self, file_path):

244 """Generator function for looping over the models in the Gaussian log file. 245 246 @param file_path: The full path of the Gaussian log file. 247 @type file_path: str 248 @return: The model number and all the records for that model. 249 @rtype: tuple of int and array of str 250 """ 251 252 # Open the file. 253 file = open_read_file(file_path) 254 lines = file.readlines() 255 file.close() 256 257 # Check for empty files. 258 if lines == []: 259 raise RelaxError("The Gaussian log file is empty.") 260 261 # Init. 262 found = False 263 str_index = 0 264 total_atom = 0 265 model = 0 266 records = [] 267 268 # Loop over the data. 269 for i in range(len(lines)): 270 # Found a structure. 271 if search("Standard orientation", lines[i]): 272 found = True 273 str_index = 0 274 continue 275 276 # End of the model. 277 if found and str_index > 4 and search("---------", lines[i]): 278 # Yield the info 279 yield records 280 281 # Reset. 282 records = [] 283 found = False 284 285 # Not a structure line. 286 if not found: 287 continue 288 289 # Append the line as a record of the model. 290 records.append(lines[i]) 291 292 # Increment the structure line index. 293 str_index += 1

294 295

296 - def _parse_pdb_connectivity_annotation(self, lines):

297 """Loop over and parse the PDB connectivity annotation records. 298 299 These are the records identified in the U{PDB version 3.30 documentation<http://www.wwpdb.org/documentation/format33/sect6.html>}. 300 301 302 @param lines: The lines of the PDB file excluding the sections prior to the connectivity annotation section. 303 @type lines: list of str 304 @return: The remaining PDB lines with the connectivity annotation records stripped. 305 @rtype: list of str 306 """ 307 308 # The ordered list of record names in the connectivity annotation section. 309 records = [ 310 'SSBOND', 311 'LINK ', 312 'CISPEP' 313 ] 314 315 # Loop over the lines. 316 for i in range(len(lines)): 317 # No match, therefore assume to be out of the connectivity annotation section. 318 if lines[i][:6] not in records: 319 break 320 321 # Return the remaining lines. 322 return lines[i:]

323 324

325 - def _parse_pdb_coord(self, lines):

326 """Generator function for looping over the models in the PDB file. 327 328 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect9.html}. 329 330 331 @param lines: The lines of the coordinate section. 332 @type lines: list of str 333 @return: The model number and all the records for that model. 334 @rtype: tuple of int and array of str 335 """ 336 337 # Init. 338 model = None 339 records = [] 340 341 # Loop over the data. 342 for i in range(len(lines)): 343 # A new model record. 344 if lines[i][:5] == 'MODEL': 345 try: 346 model = int(lines[i].split()[1]) 347 except: 348 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.") 349 350 # Skip all records prior to the first ATOM or HETATM record. 351 if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and not len(records): 352 continue 353 354 # End of the model. 355 if lines[i][:6] == 'ENDMDL': 356 # Yield the info. 357 yield model, records 358 359 # Reset the records. 360 records = [] 361 362 # Skip the rest of this loop. 363 continue 364 365 # Append the line as a record of the model. 366 records.append(lines[i]) 367 368 # If records is not empty then there are no models, so yield the lot. 369 if len(records): 370 yield model, records

371 372

373 - def _parse_pdb_hetrogen(self, lines):

374 """Loop over and parse the PDB hetrogen records. 375 376 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect4.html}. 377 378 379 @param lines: The lines of the PDB file excluding the sections prior to the hetrogen section. 380 @type lines: list of str 381 @return: The remaining PDB lines with the hetrogen records stripped. 382 @rtype: list of str 383 """ 384 385 # The ordered list of record names in the hetrogen section. 386 records = [ 387 'HET ', 388 'FORMUL', 389 'HETNAM', 390 'HETSYN' 391 ] 392 393 # Loop over the lines. 394 for i in range(len(lines)): 395 # No match, therefore assume to be out of the hetrogen section. 396 if lines[i][:6] not in records: 397 break 398 399 # Return the remaining lines. 400 return lines[i:]

401 402

403 - def _parse_pdb_misc(self, lines):

404 """Loop over and parse the PDB miscellaneous records. 405 406 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect7.html}. 407 408 409 @param lines: The lines of the PDB file excluding the sections prior to the miscellaneous section. 410 @type lines: list of str 411 @return: The remaining PDB lines with the miscellaneous records stripped. 412 @rtype: list of str 413 """ 414 415 # The ordered list of record names in the miscellaneous section. 416 records = [ 417 'SITE ' 418 ] 419 420 # Loop over the lines. 421 for i in range(len(lines)): 422 # No match, therefore assume to be out of the miscellaneous section. 423 if lines[i][:6] not in records: 424 break 425 426 # Return the remaining lines. 427 return lines[i:]

428 429

430 - def _parse_pdb_prim_struct(self, lines):

431 """Loop over and parse the PDB primary structure records. 432 433 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect3.html}. 434 435 436 @param lines: The lines of the PDB file excluding the title section. 437 @type lines: list of str 438 @return: The remaining PDB lines with the primary structure records stripped. 439 @rtype: list of str 440 """ 441 442 # The ordered list of record names in the primary structure section. 443 records = [ 444 'DBREF ', 445 'DBREF1', 446 'DBREF2', 447 'SEQADV', 448 'SEQRES', 449 'MODRES' 450 ] 451 452 # Loop over the lines. 453 for i in range(len(lines)): 454 # No match, therefore assume to be out of the primary structure section. 455 if lines[i][:6] not in records: 456 break 457 458 # Return the remaining lines. 459 return lines[i:]

460 461

462 - def _parse_pdb_ss(self, lines):

463 """Loop over and parse the PDB secondary structure records. 464 465 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect5.html}. 466 467 468 @param lines: The lines of the PDB file excluding the sections prior to the secondary structure section. 469 @type lines: list of str 470 @return: The remaining PDB lines with the secondary structure records stripped. 471 @rtype: list of str 472 """ 473 474 # The ordered list of record names in the secondary structure section (the depreciated TURN record is also included to handle old PDB files). 475 records = [ 476 'HELIX ', 477 'SHEET ', 478 'TURN ' 479 ] 480 481 # Loop over the lines. 482 for i in range(len(lines)): 483 # No match, therefore assume to be out of the secondary structure section. 484 if lines[i][:6] not in records: 485 break 486 487 # A helix. 488 if lines[i][:5] == 'HELIX': 489 # Parse the record. 490 record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) 491 492 # Store the data. 493 if not hasattr(self, 'helices'): 494 self.helices = [] 495 self.helices.append([helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length]) 496 497 # A sheet. 498 if lines[i][:5] == 'SHEET': 499 # Parse the record. 500 record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) 501 502 # Store the data. 503 if not hasattr(self, 'sheets'): 504 self.sheets = [] 505 self.sheets.append([strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode]) 506 507 # Return the remaining lines. 508 return lines[i:]

509 510

511 - def _parse_pdb_title(self, lines):

512 """Loop over and parse the PDB title records. 513 514 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect2.html}. 515 516 517 @param lines: All lines of the PDB file. 518 @type lines: list of str 519 @return: The remaining PDB lines with the title records stripped. 520 @rtype: list of str 521 """ 522 523 # The ordered list of (sometimes truncated) record names in the title section. 524 records = [ 525 'HEADER', 526 'OBSLTE', 527 'TITLE ', 528 'SPLT ', 529 'CAVEAT', 530 'COMPND', 531 'SOURCE', 532 'KEYWDS', 533 'EXPDTA', 534 'NUMMDL', 535 'MDLTYP', 536 'AUTHOR', 537 'REVDAT', 538 'SPRSDE', 539 'JRNL ', 540 'REMARK' 541 ] 542 543 # Loop over the lines. 544 for i in range(len(lines)): 545 # No match, therefore assume to be out of the title section. 546 if lines[i][:6] not in records: 547 break 548 549 # Return the remaining lines. 550 return lines[i:]

551 552

553 - def _parse_pdb_transform(self, lines):

554 """Loop over and parse the PDB transform records. 555 556 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect8.html}. 557 558 559 @param lines: The lines of the PDB file excluding the sections prior to the transform section. 560 @type lines: list of str 561 @return: The remaining PDB lines with the transform records stripped. 562 @rtype: list of str 563 """ 564 565 # The ordered list of record names in the transform section. 566 records = [ 567 'CRYST', 568 'MTRIX', 569 'ORIGX', 570 'SCALE', 571 ] 572 573 # Loop over the lines. 574 for i in range(len(lines)): 575 # No match, therefore assume to be out of the transform section. 576 if lines[i][0: 5] not in records: 577 break 578 579 # Return the remaining lines. 580 return lines[i:]

581 582

583 - def _parse_models_xyz(self, file_path):

584 """Generator function for looping over the models in the XYZ file. 585 586 @param file_path: The full path of the XYZ file. 587 @type file_path: str 588 @return: The model number and all the records for that model. 589 @rtype: tuple of int and array of str 590 """ 591 592 # Open the file. 593 file = open_read_file(file_path) 594 lines = file.readlines() 595 file.close() 596 597 # Check for empty files. 598 if lines == []: 599 raise RelaxError("The XYZ file is empty.") 600 601 # Init. 602 total_atom = 0 603 model = 0 604 records = [] 605 606 # Loop over the data. 607 for i in range(len(lines)): 608 num=0 609 word = lines[i].split() 610 # Find the total atom number and the first model. 611 if (i==0) and (len(word)==1): 612 try: 613 total_atom = int(word[0]) 614 num = 1 615 except: 616 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.") 617 618 # End of the model. 619 if (len(records) == total_atom): 620 # Yield the info 621 yield records 622 623 # Reset the records. 624 records = [] 625 626 # Skip all records prior to atom coordinates record. 627 if (len(word) != 4): 628 continue 629 630 # Append the line as a record of the model. 631 records.append(lines[i]) 632 633 # If records is not empty then there are no models, so yield the lot. 634 if len(records): 635 yield records

636 637

638 - def _parse_mols(self, records):

639 """Generator function for looping over the molecules in the PDB records of a model. 640 641 @param records: The list of PDB records for the model, or if no models exist the entire PDB file. 642 @type records: list of str 643 @return: The molecule number and all the records for that molecule. 644 @rtype: tuple of int and list of str 645 """ 646 647 # Check for empty records. 648 if records == []: 649 raise RelaxError("There are no PDB records for this model.") 650 651 # Init. 652 mol_num = 1 653 mol_records = [] 654 end = False 655 656 # Loop over the data. 657 for i in range(len(records)): 658 # A PDB termination record. 659 if records[i][:3] == 'END': 660 break 661 662 # A master record, so we are done. 663 if records[i][:6] == 'MASTER': 664 break 665 666 # A model termination record. 667 if records[i][:6] == 'ENDMDL': 668 end = True 669 670 # A molecule termination record with no trailing HETATM. 671 elif i < len(records)-1 and records[i][:3] == 'TER' and not records[i+1][:6] == 'HETATM': 672 end = True 673 674 # A HETATM followed by an ATOM record. 675 elif i < len(records)-1 and records[i][:6] == 'HETATM' and records[i+1][:4] == 'ATOM': 676 end = True 677 678 # End. 679 if end: 680 # Yield the info. 681 yield mol_num, mol_records 682 683 # Reset the records. 684 mol_records = [] 685 686 # Increment the molecule number. 687 mol_num = mol_num + 1 688 689 # Reset the flag. 690 end = False 691 692 # Skip the rest of this loop. 693 continue 694 695 # Append the line as a record of the molecule. 696 mol_records.append(records[i]) 697 698 # If records is not empty then there is only a single molecule, so yield the lot. 699 if len(mol_records): 700 yield mol_num, mol_records

701 702

703 - def _pdb_chain_id_to_mol_index(self, chain_id=None):

704 """Convert the PDB chain ID into the molecule index in a regular way. 705 706 @keyword chain_id: The PDB chain ID string. 707 @type chain_id: str 708 @return: The corresponding molecule index. 709 @rtype: int 710 """ 711 712 # Initialise. 713 mol_index = 0 714 715 # Convert to the molecule index. 716 if chain_id: 717 mol_index = CHAIN_ID_LIST.index(chain_id) 718 719 # Return the index. 720 return mol_index

721 722

723 - def _residue_data(self, res_nums=None, res_names=None):

724 """Convert the residue info into a dictionary of unique residues with numbers as keys. 725 726 @keyword res_nums: The list of residue numbers. 727 @type res_nums: list of int 728 @keyword res_names: The list of residue names matching the numbers. 729 @type res_names: list of str 730 @return: The dictionary of residue names with residue numbers as keys. 731 @rtype: dict of str 732 """ 733 734 # Initialise. 735 data = {} 736 737 # Loop over the data. 738 for i in range(len(res_nums)): 739 # The residue data already exists. 740 if res_nums[i] in data: 741 continue 742 743 # Add the data. 744 data[res_nums[i]] = res_names[i] 745 746 # Return the dictionary. 747 return data

748 749

750 - def _validate_data_arrays(self, struct):

751 """Check the validity of the data arrays in the given structure object. 752 753 @param struct: The structural object. 754 @type struct: Structure_container instance 755 """ 756 757 # The number of atoms. 758 num = len(struct.atom_name) 759 760 # Check the other lengths. 761 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num: 762 raise RelaxError("The structural data is invalid.")

763 764

765 - def _mol_type(self, mol):

766 """Determine the type of molecule. 767 768 @param mol: The molecule data container. 769 @type mol: MolContainer instance 770 """ 771 772 # Amino acids. 773 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 774 775 # Set the molecule type to default to 'other'. 776 mol.type = 'other' 777 778 # Loop over the residues. 779 for res in mol.res_name: 780 # Protein. 781 if res in aa: 782 # Set the molecule type and return. 783 mol.type = 'protein' 784 return

785 786

787 - def _protein_connect(self, mol):

788 """Set up the connectivities for the protein. 789 790 @param mol: The molecule data container. 791 @type mol: MolContainer instance 792 """ 793 794 # Initialise some residue data. 795 curr_res_num = None 796 res_atoms = [] 797 798 # Loop over all atoms. 799 for i in range(len(mol.atom_num)): 800 # New residue. 801 if mol.res_num[i] != curr_res_num: 802 # Intra-residue connectivites. 803 if len(res_atoms): 804 self._protein_intra_connect(mol, res_atoms) 805 806 # Update the residue number. 807 curr_res_num = mol.res_num[i] 808 809 # Reset the residue atom index list. 810 res_atoms = [] 811 812 # Add the atom index to the list. 813 res_atoms.append(i) 814 815 # Last atom. 816 if i == len(mol.atom_num) - 1 and len(res_atoms): 817 self._protein_intra_connect(mol, res_atoms)

818 819

820 - def _protein_intra_connect(self, mol, res_atoms):

821 """Set up the connectivities for the protein. 822 823 @param mol: The molecule data container. 824 @type mol: MolContainer instance 825 @param res_atoms: The list of atom indices corresponding to the residue. 826 @type res_atoms: list of int 827 """ 828 829 # Back bond connectivity. 830 indices = { 831 'N': None, 832 'C': None, 833 'O': None, 834 'CA': None, 835 'HN': None, 836 'H': None, # Same as HN. 837 'HA': None 838 } 839 840 # Loop over all atoms to find the indices. 841 for index in res_atoms: 842 if mol.atom_name[index] in indices: 843 indices[mol.atom_name[index]] = index 844 845 # Connect the atom pairs. 846 pairs = [ 847 ['N', 'HN'], 848 ['N', 'H'], 849 ['N', 'CA'], 850 ['CA', 'HA'], 851 ['CA', 'C'], 852 ['C', 'O'] 853 ] 854 855 # Loop over the atoms pairs and connect them. 856 for pair in pairs: 857 if indices[pair[0]] != None and indices[pair[1]] != None: 858 mol.atom_connect(indices[pair[0]], indices[pair[1]])

859 860

861 - def _translate(self, data, format='str'):

862 """Convert the data into a format for writing to file. 863 864 @param data: The data to convert to the required format. 865 @type data: anything 866 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'. 867 @type format: str 868 @return: The converted version of the data. 869 @rtype: str 870 """ 871 872 # Conversion to string. 873 if format == 'str': 874 # None values. 875 if data == None: 876 data = '' 877 878 # Force convert to string. 879 if not isinstance(data, str): 880 data = repr(data) 881 882 # Conversion to float. 883 if format == 'float': 884 # None values. 885 if data == None: 886 data = 0.0 887 888 # Force convert to float. 889 if not isinstance(data, float): 890 data = float(data) 891 892 # Return the converted data. 893 return data

894 895

896 - def _trim_helix(self, helix=None, trim_res_list=[], res_data=None):

897 """Trim the given helix based on the list of deleted residue numbers. 898 899 @keyword helix: The single helix metadata structure. 900 @type helix: list 901 @keyword trim_res_list: The list of residue numbers which no longer exist. 902 @type trim_res_list: list of int 903 @keyword res_data: The dictionary of residue names with residue numbers as keys. 904 @type res_data: dict of str 905 @return: The trimmed helix metadata structure, or None if the whole helix is to be deleted. 906 @rtype: list or None 907 """ 908 909 # Unpack the helix residue numbers. 910 start_res = helix[3] 911 end_res = helix[6] 912 913 # The reverse residue list. 914 trim_res_list_rev = deepcopy(trim_res_list) 915 trim_res_list_rev.reverse() 916 917 # The helix residues. 918 helix_res = list(range(start_res, end_res+1)) 919 920 # Trim forwards. 921 for res_num in trim_res_list: 922 if res_num == start_res: 923 # Remove the residue. 924 helix_res.pop(0) 925 926 # No helix left. 927 if len(helix_res) == 0: 928 break 929 930 # Realias the starting residue. 931 start_res = helix_res[0] 932 933 # No helix left. 934 if len(helix_res) == 0: 935 return None 936 937 # Trim backwards. 938 for res_num in trim_res_list_rev: 939 if res_num == end_res: 940 helix_res.pop(-1) 941 end_res = helix_res[-1] 942 943 # Replace the starting and ending residues. 944 if start_res != helix[3]: 945 helix[3] = start_res 946 helix[2] = res_data[start_res] 947 if end_res != helix[6]: 948 helix[6] = end_res 949 helix[5] = res_data[end_res] 950 951 # The helix length. 952 helix[-1] = len(helix_res) 953 954 # Return the modified helix. 955 return helix

956 957

958 - def _trim_sheet(self, sheet=None, trim_res_list=[], res_data=None):

959 """Trim the given sheet based on the list of deleted residue numbers. 960 961 @keyword sheet: The single sheet metadata structure. 962 @type sheet: list 963 @keyword trim_res_list: The list of residue numbers which no longer exist. 964 @type trim_res_list: list of int 965 @keyword res_data: The dictionary of residue names with residue numbers as keys. 966 @type res_data: dict of str 967 @return: The trimmed sheet metadata structure, or None if the whole sheet is to be deleted. 968 @rtype: list or None 969 """ 970 971 # Unpack the sheet residue numbers. 972 start_res = sheet[5] 973 end_res = sheet[9] 974 975 # The reverse residue list. 976 trim_res_list_rev = deepcopy(trim_res_list) 977 trim_res_list_rev.reverse() 978 979 # The sheet residues. 980 sheet_res = list(range(start_res, end_res+1)) 981 982 # Trim forwards. 983 for res_num in trim_res_list: 984 if res_num == start_res: 985 # Remove the residue. 986 sheet_res.pop(0) 987 988 # No sheet left. 989 if len(sheet_res) == 0: 990 break 991 992 # Realias the starting residue. 993 start_res = sheet_res[0] 994 995 # No sheet left. 996 if len(sheet_res) == 0: 997 return None 998 999 # Trim backwards. 1000 for res_num in trim_res_list_rev: 1001 if res_num == end_res: 1002 sheet_res.pop(-1) 1003 end_res = sheet_res[-1] 1004 1005 # Replace the starting and ending residues. 1006 if start_res != sheet[5]: 1007 sheet[5] = start_res 1008 sheet[3] = res_data[start_res] 1009 if end_res != sheet[9]: 1010 sheet[9] = end_res 1011 sheet[7] = res_data[end_res] 1012 1013 # Return the modified sheet. 1014 return sheet

1015 1016

1017 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

1018 """Add a new atom to the structural data object. 1019 1020 @keyword mol_name: The name of the molecule. 1021 @type mol_name: str 1022 @keyword atom_name: The atom name, e.g. 'H1'. 1023 @type atom_name: str or None 1024 @keyword res_name: The residue name. 1025 @type res_name: str or None 1026 @keyword res_num: The residue number. 1027 @type res_num: int or None 1028 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 1029 @type pos: rank-1 or rank-2 array or list of float 1030 @keyword element: The element symbol. 1031 @type element: str or None 1032 @keyword atom_num: The atom number. 1033 @type atom_num: int or None 1034 @keyword chain_id: The chain identifier. 1035 @type chain_id: str or None 1036 @keyword segment_id: The segment identifier. 1037 @type segment_id: str or None 1038 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 1039 @type pdb_record: str or None 1040 """ 1041 1042 # Add a model if not present. 1043 if len(self.structural_data) == 0: 1044 self.add_model() 1045 1046 # Check the position. 1047 if is_float(pos[0]): 1048 if len(pos) != 3: 1049 raise RelaxError("The single atomic position %s must be a 3D list." % pos) 1050 else: 1051 if len(pos) != len(self.structural_data): 1052 raise RelaxError("The %s atomic positions does not match the %s models present." % (len(pos), len(self.structural_data))) 1053 1054 # Loop over each model. 1055 for i in range(len(self.structural_data)): 1056 # Alias the model. 1057 model = self.structural_data[i] 1058 1059 # Specific molecule. 1060 mol = self.get_molecule(mol_name, model=model.num) 1061 1062 # Add the molecule, if it does not exist. 1063 if mol == None: 1064 self.add_molecule(name=mol_name) 1065 mol = self.get_molecule(mol_name, model=model.num) 1066 1067 # Split up the position if needed. 1068 if is_float(pos[0]): 1069 model_pos = pos 1070 else: 1071 model_pos = pos[i] 1072 1073 # Add the atom. 1074 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=model_pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)

1075 1076

1077 - def add_model(self, model=None, coords_from=None):

1078 """Add a new model to the store. 1079 1080 The new model will be constructured with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used. 1081 1082 @keyword model: The number of the model to create. 1083 @type model: int or None 1084 @keyword coords_from: The model number to take the coordinates from. 1085 @type coords_from: int or None 1086 @return: The model container. 1087 @rtype: ModelContainer instance 1088 """ 1089 1090 # Check if the model currently exists. 1091 if model != None: 1092 for i in range(len(self.structural_data)): 1093 if model == self.structural_data[i].num: 1094 raise RelaxError("The model '%s' already exists." % model) 1095 1096 # Add a new model. 1097 self.structural_data.add_item(model_num=model) 1098 1099 # The model to duplicate. 1100 if coords_from == None: 1101 coords_from = self.structural_data[0].num 1102 1103 # Construct the structural data for the model from the other models. 1104 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in self.atom_loop(model_num=coords_from, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True): 1105 # Add the atom. 1106 self.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num) 1107 1108 # Return the model. 1109 return self.structural_data[-1]

1110 1111

1112 - def add_molecule(self, name=None):

1113 """Add a new molecule to the store. 1114 1115 @keyword name: The molecule identifier string. 1116 @type name: str 1117 """ 1118 1119 # Add a model if necessary. 1120 if len(self.structural_data) == 0: 1121 self.add_model() 1122 1123 # Loop over the models. 1124 for i in range(len(self.structural_data)): 1125 # Add the molecule. 1126 self.structural_data[i].mol.add_item(mol_name=name, mol_cont=MolContainer())

1127 1128

1129 - def are_bonded(self, atom_id1=None, atom_id2=None):

1130 """Determine if two atoms are directly bonded to each other. 1131 1132 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 1133 @type atom_id1: str 1134 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 1135 @type atom_id2: str 1136 @return: True if the atoms are directly bonded. 1137 @rtype: bool 1138 """ 1139 1140 # Generate the selection objects. 1141 sel_obj1 = Selection(atom_id1) 1142 sel_obj2 = Selection(atom_id2) 1143 1144 # Build the connectivities if needed. 1145 for mol in self.structural_data[0].mol: 1146 for i in range(len(mol.atom_num)): 1147 if not len(mol.bonded[i]): 1148 self._find_bonded_atoms(i, mol, radius=2) 1149 1150 # Loop over the molecules. 1151 for mol in self.structural_data[0].mol: 1152 # Skip non-matching molecules. 1153 if not sel_obj1.contains_mol(mol.mol_name): 1154 continue 1155 if not sel_obj2.contains_mol(mol.mol_name): 1156 continue 1157 1158 # Find the first atom. 1159 index1 = None 1160 for i in range(len(mol.atom_num)): 1161 # Skip a non-matching first atom. 1162 if sel_obj1.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1163 index1 = i 1164 break 1165 1166 # Find the second atom. 1167 index2 = None 1168 for i in range(len(mol.atom_num)): 1169 # Skip a non-matching first atom. 1170 if sel_obj2.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1171 index2 = i 1172 break 1173 1174 # Connectivities exist. 1175 if index1 < len(mol.bonded): 1176 if index2 in mol.bonded[index1]: 1177 return True 1178 else: 1179 return False

1180 1181

1182 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, index_flag=False, ave=False):

1183 """Generator function for looping over all atoms in the internal relax structural object. 1184 1185 This method should be designed as a U{generator<http://www.python.org/dev/peps/pep-0255/>}. It should loop over all atoms of the system yielding the following atomic information, if the corresponding flag is True, in tuple form: 1186 1187 1. Model number. 1188 2. Molecule name. 1189 3. Residue number. 1190 4. Residue name. 1191 5. Atom number. 1192 6. Atom name. 1193 7. The element name (its atomic symbol and optionally the isotope, e.g. 'N', 'Mg', '17O', '13C', etc). 1194 8. The position of the atom in Euclidean space. 1195 1196 1197 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded. 1198 @type atom_id: str 1199 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 1200 @type str_id: str, int, or None 1201 @keyword model_num: Only loop over a specific model. 1202 @type model_num: int or None 1203 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 1204 @type mol_name_flag: bool 1205 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 1206 @type res_num_flag: bool 1207 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 1208 @type res_name_flag: bool 1209 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 1210 @type atom_num_flag: bool 1211 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 1212 @type atom_name_flag: bool 1213 @keyword element_flag: A flag which if True will cause the element name to be yielded. 1214 @type element_flag: bool 1215 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 1216 @type pos_flag: bool 1217 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 1218 @type index_flag: bool 1219 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 1220 @type ave: bool 1221 @return: A tuple of atomic information, as described in the docstring. 1222 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 1223 """ 1224 1225 # Check that the structure is loaded. 1226 if not len(self.structural_data): 1227 raise RelaxNoPdbError 1228 1229 # Generate the selection object. 1230 sel_obj = None 1231 if atom_id: 1232 sel_obj = Selection(atom_id) 1233 1234 # Obtain all data from the first model (except the position data). 1235 model = self.structural_data[0] 1236 1237 # Loop over the molecules. 1238 for mol_index in range(len(model.mol)): 1239 mol = model.mol[mol_index] 1240 1241 # Skip non-matching molecules. 1242 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1243 continue 1244 1245 # Loop over all atoms. 1246 for i in range(len(mol.atom_name)): 1247 # Skip non-matching atoms. 1248 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1249 continue 1250 1251 # Initialise. 1252 res_num = mol.res_num[i] 1253 res_name = mol.res_name[i] 1254 atom_num = mol.atom_num[i] 1255 atom_name = mol.atom_name[i] 1256 element = mol.element[i] 1257 1258 # The atom position. 1259 if pos_flag: 1260 # Average the position. 1261 if ave: 1262 # Initialise. 1263 pos = zeros(3, float64) 1264 1265 # Loop over the models. 1266 for model in self.model_loop(model=model_num): 1267 # Alias. 1268 mol2 = model.mol[mol_index] 1269 1270 # Some sanity checks. 1271 if mol2.atom_num[i] != atom_num: 1272 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.") 1273 1274 # Sum the atom positions. 1275 pos = pos + array([mol2.x[i], mol2.y[i], mol2.z[i]], float64) 1276 1277 # Average the position array (divide by the number of models). 1278 pos = pos / len(self.structural_data) 1279 1280 # All positions. 1281 else: 1282 # Initialise. 1283 pos = [] 1284 1285 # Loop over the models. 1286 for model in self.model_loop(model=model_num): 1287 # Alias. 1288 mol2 = model.mol[mol_index] 1289 1290 # Append the position. 1291 pos.append([mol2.x[i], mol2.y[i], mol2.z[i]]) 1292 1293 # Convert. 1294 pos = array(pos, float64) 1295 1296 # The molecule name. 1297 mol_name = mol.mol_name 1298 1299 # Build the tuple to be yielded. 1300 atomic_tuple = () 1301 if mol_name_flag: 1302 atomic_tuple = atomic_tuple + (mol_name,) 1303 if res_num_flag: 1304 atomic_tuple = atomic_tuple + (res_num,) 1305 if res_name_flag: 1306 atomic_tuple = atomic_tuple + (res_name,) 1307 if atom_num_flag: 1308 atomic_tuple = atomic_tuple + (atom_num,) 1309 if atom_name_flag: 1310 atomic_tuple = atomic_tuple + (atom_name,) 1311 if element_flag: 1312 atomic_tuple = atomic_tuple + (element,) 1313 if pos_flag: 1314 atomic_tuple = atomic_tuple + (pos,) 1315 if index_flag: 1316 atomic_tuple += (i,) 1317 1318 # Yield the information. 1319 if len(atomic_tuple) == 1: 1320 atomic_tuple = atomic_tuple[0] 1321 yield atomic_tuple

1322 1323

1324 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

1325 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'. 1326 1327 @keyword attached_atom: The name of the bonded atom. 1328 @type attached_atom: str 1329 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned. 1330 @type model_num: None or int 1331 @keyword mol_name: The name of the molecule that attached_atom belongs to. 1332 @type mol_name: str 1333 @keyword res_num: The number of the residue that attached_atom belongs to. 1334 @type res_num: str 1335 @keyword res_name: The name of the residue that attached_atom belongs to. 1336 @type res_name: str 1337 @keyword spin_num: The number of the spin that attached_atom is attached to. 1338 @type spin_num: str 1339 @keyword spin_name: The name of the spin that attached_atom is attached to. 1340 @type spin_name: str 1341 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors. 1342 @type return_name: bool 1343 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 1344 @type return_warnings: bool 1345 @return: The list of bond vectors for each model. 1346 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set) 1347 """ 1348 1349 # Initialise some objects. 1350 vectors = [] 1351 attached_name = None 1352 warnings = None 1353 1354 # Use the first model for the atom matching. 1355 model = self.structural_data[0] 1356 1357 # Loop over the molecules. 1358 for mol_index in range(len(model.mol)): 1359 # Alias. 1360 mol = model.mol[mol_index] 1361 1362 # Skip non-matching molecules. 1363 if mol_name and mol_name != mol.mol_name: 1364 continue 1365 1366 # Find the atomic index of the base atom. 1367 index = None 1368 for i in range(len(mol.atom_name)): 1369 # Residues don't match. 1370 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name): 1371 continue 1372 1373 # Atoms don't match. 1374 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name): 1375 continue 1376 1377 # Update the index and stop searching. 1378 index = i 1379 break 1380 1381 # Found the atom. 1382 if index != None: 1383 # Loop over the models. 1384 for model in self.model_loop(model=model_num): 1385 # Alias. 1386 mol = model.mol[mol_index] 1387 1388 # Get the atom bonded to this model/molecule/residue/atom. 1389 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol) 1390 1391 # No bonded atom. 1392 if (bonded_num, bonded_name, element) == (None, None, None): 1393 continue 1394 1395 # The bond vector. 1396 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64) 1397 1398 # Append the vector to the vectors array. 1399 vectors.append(vector) 1400 1401 # Not found. 1402 else: 1403 warnings = "Cannot find the atom in the structure" 1404 1405 # Build the tuple to be yielded. 1406 data = (vectors,) 1407 if return_name: 1408 data = data + (attached_name,) 1409 if return_warnings: 1410 data = data + (warnings,) 1411 1412 # Return the data. 1413 return data

1414 1415

1416 - def connect_atom(self, mol_name=None, index1=None, index2=None):

1417 """Connect two atoms in the structural data object. 1418 1419 @keyword mol_name: The name of the molecule. 1420 @type mol_name: str 1421 @keyword index1: The global index of the first atom. 1422 @type index1: str 1423 @keyword index2: The global index of the first atom. 1424 @type index2: str 1425 """ 1426 1427 # Add the molecule, if it does not exist. 1428 if self.get_molecule(mol_name) == None: 1429 self.add_molecule(name=mol_name) 1430 1431 # Loop over each model. 1432 for model in self.structural_data: 1433 # Specific molecule. 1434 mol = self.get_molecule(mol_name) 1435 1436 # Add the atom. 1437 mol.atom_connect(index1=index1, index2=index2)

1438 1439

1440 - def delete(self, atom_id=None):

1441 """Deletion of structural information. 1442 1443 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 1444 @type atom_id: str or None 1445 """ 1446 1447 # All data. 1448 if atom_id == None: 1449 # Print out. 1450 print("Deleting the following structural data:\n") 1451 print(self.structural_data) 1452 1453 # Delete the structural data. 1454 del self.structural_data 1455 1456 # Initialise the empty model list. 1457 self.structural_data = ModelList() 1458 1459 # Atom subset deletion. 1460 else: 1461 # Generate the selection object. 1462 sel_obj = None 1463 if atom_id: 1464 sel_obj = Selection(atom_id) 1465 1466 # Loop over the models. 1467 del_res_nums = [] 1468 for model in self.model_loop(): 1469 # Loop over the molecules. 1470 for mol_index in range(len(model.mol)): 1471 mol = model.mol[mol_index] 1472 1473 # Skip non-matching molecules. 1474 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1475 continue 1476 1477 # Loop over the atoms. 1478 indices = [] 1479 for i in self.atom_loop(atom_id=atom_id, model_num=model.num, index_flag=True): 1480 indices.append(i) 1481 1482 # Generate a residue data dictionary for the metadata trimming (prior to atom deletion). 1483 res_data = self._residue_data(res_nums=mol.res_num, res_names=mol.res_name) 1484 1485 # Loop over the reverse indices and pop out the data. 1486 indices.reverse() 1487 for i in indices: 1488 mol.atom_num.pop(i) 1489 mol.atom_name.pop(i) 1490 mol.bonded.pop(i) 1491 mol.chain_id.pop(i) 1492 mol.element.pop(i) 1493 mol.pdb_record.pop(i) 1494 mol.res_name.pop(i) 1495 res_num = mol.res_num.pop(i) 1496 mol.seg_id.pop(i) 1497 mol.x.pop(i) 1498 mol.y.pop(i) 1499 mol.z.pop(i) 1500 1501 # The residue no longer exists. 1502 if res_num not in mol.res_num and res_num not in del_res_nums: 1503 del_res_nums.append(res_num) 1504 1505 # Nothing more to do. 1506 if not len(del_res_nums): 1507 return 1508 1509 # Fix the deleted residue number order. 1510 del_res_nums.reverse() 1511 1512 # Handle the helix metadata. 1513 del_helix_indices = [] 1514 for i in range(len(self.helices)): 1515 # Trim the helix. 1516 helix = self._trim_helix(helix=self.helices[i], trim_res_list=del_res_nums, res_data=res_data) 1517 1518 # Trimmed helix. 1519 if helix != None: 1520 self.helices[i] = helix 1521 1522 # No helix left. 1523 else: 1524 del_helix_indices.append(i) 1525 1526 # Loop over the reverse helix indices and pop out the data. 1527 del_helix_indices.reverse() 1528 for i in del_helix_indices: 1529 self.helices.pop(i) 1530 1531 # Handle the sheet metadata. 1532 del_sheet_indices = [] 1533 for i in range(len(self.sheets)): 1534 # Trim the sheet. 1535 sheet = self._trim_sheet(sheet=self.sheets[i], trim_res_list=del_res_nums, res_data=res_data) 1536 1537 # Trimmed sheet. 1538 if sheet != None: 1539 self.sheets[i] = sheet 1540 1541 # No sheet left. 1542 else: 1543 del_sheet_indices.append(i) 1544 1545 # Loop over the reverse sheet indices and pop out the data. 1546 del_sheet_indices.reverse() 1547 for i in del_sheet_indices: 1548 self.sheets.pop(i)

1549 1550

1551 - def empty(self):

1552 """Report if the structural data structure is empty or not. 1553 1554 @return: True if empty, False otherwise. 1555 @rtype: bool 1556 """ 1557 1558 # Check the ModelList structure. 1559 if len(self.structural_data) == 0: 1560 return True 1561 else: 1562 return False

1563 1564

1565 - def from_xml(self, str_node, dir=None, file_version=1):

1566 """Recreate the structural object from the XML structural object node. 1567 1568 @param str_node: The structural object XML node. 1569 @type str_node: xml.dom.minicompat.Element instance 1570 @keyword dir: The name of the directory containing the results file. 1571 @type dir: str 1572 @keyword file_version: The relax XML version of the XML file. 1573 @type file_version: int 1574 """ 1575 1576 # Recreate all base objects (i.e. metadata). 1577 xml_to_object(str_node, self, file_version=file_version, blacklist=['model', 'displacements']) 1578 1579 # Recreate the model / molecule data structure. 1580 model_nodes = str_node.getElementsByTagName('model') 1581 self.structural_data.from_xml(model_nodes, file_version=file_version) 1582 1583 # The displacement structure. 1584 disp_nodes = str_node.getElementsByTagName('displacements') 1585 if len(disp_nodes): 1586 # Initialise the object. 1587 self.displacements = Displacements() 1588 1589 # Recreate the molecule data structures for the current model. 1590 self.displacements.from_xml(disp_nodes[0], file_version=file_version)

1591 1592

1593 - def get_model(self, model):

1594 """Return or create the model. 1595 1596 @param model: The model number. 1597 @type model: int or None 1598 @return: The ModelContainer corresponding to the model number or that newly created. 1599 @rtype: ModelContainer instance 1600 """ 1601 1602 # Check if the target is a single model. 1603 if model == None and self.num_models() > 1: 1604 raise RelaxError("The target model cannot be determined as there are %s models already present." % self.num_modes()) 1605 1606 # No model specified. 1607 if model == None: 1608 # Create the first model, if necessary. 1609 if self.num_models(): 1610 self.structural_data.add_item() 1611 1612 # Alias the first model. 1613 model_cont = self.structural_data[0] 1614 1615 # The model has been specified. 1616 else: 1617 # Get the preexisting model. 1618 found = False 1619 for model_cont in self.structural_data: 1620 if model_cont.num == model: 1621 found = True 1622 break 1623 1624 # Add the model if it doesn't exist. 1625 if not found: 1626 self.structural_data.add_item(model) 1627 model_cont = self.structural_data[-1]

1628 1629

1630 - def get_molecule(self, molecule, model=None):

1631 """Return the molecule. 1632 1633 Only one model can be specified. 1634 1635 1636 @param molecule: The molecule name. 1637 @type molecule: int or None 1638 @keyword model: The model number. 1639 @type model: int or None 1640 @raises RelaxError: If the model is not specified and there is more than one model loaded. 1641 @return: The MolContainer corresponding to the molecule name and model number. 1642 @rtype: MolContainer instance or None 1643 """ 1644 1645 # Check if the target is a single molecule. 1646 if model == None and self.num_models() > 1: 1647 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 1648 1649 # Check the model argument. 1650 if not isinstance(model, int) and not model == None: 1651 raise RelaxNoneIntError 1652 1653 # No models. 1654 if not len(self.structural_data): 1655 return 1656 1657 # Loop over the models. 1658 for model_cont in self.model_loop(model): 1659 # Loop over the molecules. 1660 for mol in model_cont.mol: 1661 # Return the matching molecule. 1662 if mol.mol_name == molecule: 1663 return mol

1664 1665

1666 - def has_molecule(self, name=None):

1667 """Check if the molecule name exists. 1668 1669 @param name: The molecule name. 1670 @type name: str 1671 @return: True if the molecule exists, False otherwise. 1672 @rtype: bool 1673 """ 1674 1675 # No models. 1676 if not len(self.structural_data): 1677 return False 1678 1679 # Loop over the models. 1680 for model_cont in self.model_loop(): 1681 # Loop over the molecules. 1682 for mol in model_cont.mol: 1683 # Matching molecule. 1684 if mol.mol_name == name: 1685 return True 1686 1687 # No match. 1688 return False

1689 1690

1691 - def load_gaussian(self, file_path, set_mol_name=None, set_model_num=None, verbosity=False):

1692 """Method for loading structures from a Gaussian log file. 1693 1694 @param file_path: The full path of the Gaussian log file. 1695 @type file_path: str 1696 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1697 @type set_mol_name: None, str, or list of str 1698 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the Gaussian log model numbers will be preserved, if they exist. 1699 @type set_model_num: None, int, or list of int 1700 @keyword verbosity: A flag which if True will cause messages to be printed. 1701 @type verbosity: bool 1702 @return: The status of the loading of the Gaussian log file. 1703 @rtype: bool 1704 """ 1705 1706 # Initial printout. 1707 if verbosity: 1708 print("\nInternal relax Gaussian log parser.") 1709 1710 # Test if the file exists. 1711 if not access(file_path, F_OK): 1712 # Exit indicating failure. 1713 return False 1714 1715 # Separate the file name and path. 1716 path, file_name = os.path.split(file_path) 1717 1718 # The absolute path. 1719 path_abs = abspath(curdir) + sep + path 1720 1721 # Set up the molecule name data structure. 1722 if set_mol_name: 1723 if not isinstance(set_mol_name, list): 1724 set_mol_name = [set_mol_name] 1725 else: 1726 set_mol_name = [file_root(file_name) + '_mol1'] 1727 1728 # Set up the model number data structure. 1729 if set_model_num: 1730 if not isinstance(set_model_num, list): 1731 set_model_num = [set_model_num] 1732 else: 1733 set_model_num = [1] 1734 1735 # Loop over all models in the Gaussian log file, doing nothing so the last model records are stored. 1736 for model_records in self._parse_models_gaussian(file_path): 1737 pass 1738 1739 # Generate the molecule container. 1740 mol = MolContainer() 1741 1742 # Fill the molecular data object. 1743 mol.fill_object_from_gaussian(model_records) 1744 1745 # Create the structural data data structures. 1746 self.pack_structs([[mol]], orig_model_num=[1], set_model_num=set_model_num, orig_mol_num=[0], set_mol_name=set_mol_name, file_name=file_name, file_path=path, file_path_abs=path_abs) 1747 1748 # Loading worked. 1749 return True

1750 1751

1752 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):

1753 """Method for loading structures from a PDB file. 1754 1755 @param file_path: The full path of the PDB file. 1756 @type file_path: str 1757 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1758 @type read_mol: None, int, or list of int 1759 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1760 @type set_mol_name: None, str, or list of str 1761 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 1762 @type read_model: None, int, or list of int 1763 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 1764 @type set_model_num: None, int, or list of int 1765 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 1766 @type alt_loc: str or None 1767 @keyword verbosity: A flag which if True will cause messages to be printed. 1768 @type verbosity: bool 1769 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 1770 @type merge: bool 1771 @return: The status of the loading of the PDB file. 1772 @rtype: bool 1773 """ 1774 1775 # Initial printout. 1776 if verbosity: 1777 print("\nInternal relax PDB parser.") 1778 1779 # Test if the file exists. 1780 if not access(file_path, F_OK): 1781 # Exit indicating failure. 1782 return False 1783 1784 # Separate the file name and path. 1785 path, file = os.path.split(file_path) 1786 1787 # The absolute path. 1788 path_abs = abspath(curdir) + sep + path 1789 1790 # Convert the structure reading args into lists. 1791 if read_mol and not isinstance(read_mol, list): 1792 read_mol = [read_mol] 1793 if set_mol_name and not isinstance(set_mol_name, list): 1794 set_mol_name = [set_mol_name] 1795 if read_model and not isinstance(read_model, list): 1796 read_model = [read_model] 1797 if set_model_num and not isinstance(set_model_num, list): 1798 set_model_num = [set_model_num] 1799 1800 # Open the PDB file. 1801 pdb_file = open_read_file(file_path) 1802 pdb_lines = pdb_file.readlines() 1803 pdb_file.close() 1804 1805 # Check for empty files. 1806 if pdb_lines == []: 1807 raise RelaxError("The PDB file is empty.") 1808 1809 # Process the different sections. 1810 pdb_lines = self._parse_pdb_title(pdb_lines) 1811 pdb_lines = self._parse_pdb_prim_struct(pdb_lines) 1812 pdb_lines = self._parse_pdb_hetrogen(pdb_lines) 1813 pdb_lines = self._parse_pdb_ss(pdb_lines) 1814 pdb_lines = self._parse_pdb_connectivity_annotation(pdb_lines) 1815 pdb_lines = self._parse_pdb_misc(pdb_lines) 1816 pdb_lines = self._parse_pdb_transform(pdb_lines) 1817 1818 # Loop over all models in the PDB file. 1819 model_index = 0 1820 orig_model_num = [] 1821 mol_conts = [] 1822 for model_num, model_records in self._parse_pdb_coord(pdb_lines): 1823 # Only load the desired model. 1824 if read_model and model_num not in read_model: 1825 continue 1826 1827 # Store the original model number. 1828 orig_model_num.append(model_num) 1829 1830 # Loop over the molecules of the model. 1831 mol_conts.append([]) 1832 mol_index = 0 1833 orig_mol_num = [] 1834 new_mol_name = [] 1835 for mol_num, mol_records in self._parse_mols(model_records): 1836 # Only load the desired model. 1837 if read_mol and mol_num not in read_mol: 1838 continue 1839 1840 # Set the target molecule name. 1841 if set_mol_name: 1842 new_mol_name.append(set_mol_name[mol_index]) 1843 else: 1844 # Number of structures already present for the model. 1845 num_struct = 0 1846 if self.structural_data != None and len(self.structural_data) and (not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[0].num)): 1847 num_struct = len(self.structural_data[0].mol) 1848 1849 # Set the name to the file name plus the structure number. 1850 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct)) 1851 1852 # Store the original mol number. 1853 orig_mol_num.append(mol_num) 1854 1855 # Generate the molecule container. 1856 mol = MolContainer() 1857 1858 # Fill the molecular data object. 1859 mol.fill_object_from_pdb(mol_records, alt_loc_select=alt_loc) 1860 1861 # Store the molecule container. 1862 mol_conts[model_index].append(mol) 1863 1864 # Increment the molecule index. 1865 mol_index = mol_index + 1 1866 1867 # Increment the model index. 1868 model_index = model_index + 1 1869 1870 # No data, so throw a warning and exit. 1871 if not len(mol_conts): 1872 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path)) 1873 return False 1874 1875 # Create the structural data data structures. 1876 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, merge=merge) 1877 1878 # Loading worked. 1879 return True

1880 1881

1882 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

1883 """Method for loading structures from a XYZ file. 1884 1885 @param file_path: The full path of the XYZ file. 1886 @type file_path: str 1887 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1888 @type read_mol: None, int, or list of int 1889 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1890 @type set_mol_name: None, str, or list of str 1891 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models will be loaded. 1892 @type read_model: None, int, or list of int 1893 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the XYZ model numbers will be preserved, if they exist. 1894 @type set_model_num: None, int, or list of int 1895 @keyword verbosity: A flag which if True will cause messages to be printed. 1896 @type verbosity: bool 1897 @return: The status of the loading of the XYZ file. 1898 @rtype: bool 1899 """ 1900 1901 # Initial printout. 1902 if verbosity: 1903 print("\nInternal relax XYZ parser.") 1904 1905 # Test if the file exists. 1906 if not access(file_path, F_OK): 1907 # Exit indicating failure. 1908 return False 1909 1910 # Separate the file name and path. 1911 path, file = os.path.split(file_path) 1912 1913 # The absolute path. 1914 path_abs = abspath(curdir) + sep + path 1915 1916 # Convert the structure reading args into lists. 1917 if read_mol and not isinstance(read_mol, list): 1918 read_mol = [read_mol] 1919 if set_mol_name and not isinstance(set_mol_name, list): 1920 set_mol_name = [set_mol_name] 1921 if read_model and not isinstance(read_model, list): 1922 read_model = [read_model] 1923 if set_model_num and not isinstance(set_model_num, list): 1924 set_model_num = [set_model_num] 1925 1926 # Loop over all models in the XYZ file. 1927 mol_index = 0 1928 model_index = 0 1929 xyz_model_increment = 0 1930 orig_model_num = [] 1931 mol_conts = [] 1932 orig_mol_num = [] 1933 new_mol_name = [] 1934 for model_records in self._parse_models_xyz(file_path): 1935 # Increment the xyz_model_increment 1936 xyz_model_increment = xyz_model_increment +1 1937 1938 # Only load the desired model. 1939 if read_model and xyz_model_increment not in read_model: 1940 continue 1941 1942 # Store the original model number. 1943 orig_model_num.append(model_index) 1944 1945 # Loop over the molecules of the model. 1946 if read_mol and mol_index not in read_mol: 1947 continue 1948 1949 # Set the target molecule name. 1950 if set_mol_name: 1951 new_mol_name.append(set_mol_name[mol_index]) 1952 else: 1953 if mol_index == 0: 1954 #Set the name to the file name plus the structure number. 1955 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1)) 1956 1957 # Store the original mol number. 1958 orig_mol_num.append(mol_index) 1959 1960 # Generate the molecule container. 1961 mol = MolContainer() 1962 1963 # Fill the molecular data object. 1964 mol.fill_object_from_xyz(model_records) 1965 1966 # Store the molecule container. 1967 mol_conts.append([]) 1968 mol_conts[model_index].append(mol) 1969 1970 # Increment the molecule index. 1971 mol_index = mol_index + 1 1972 1973 # Increment the model index. 1974 model_index = model_index + 1 1975 1976 # Create the structural data data structures. 1977 orig_mol_num = [0] 1978 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs) 1979 1980 # Loading worked. 1981 return True

1982 1983

1984 - def model_loop(self, model=None):

1985 """Generator method for looping over the models in numerical order. 1986 1987 @keyword model: Limit the loop to a single number. 1988 @type model: int 1989 @return: The model structural object. 1990 @rtype: ModelContainer container 1991 """ 1992 1993 # A single model. 1994 if model: 1995 for i in range(len(self.structural_data)): 1996 if self.structural_data[i].num == model: 1997 yield self.structural_data[i] 1998 1999 # All models. 2000 else: 2001 # The models. 2002 model_nums = [] 2003 for i in range(len(self.structural_data)): 2004 if self.structural_data[i].num != None: 2005 model_nums.append(self.structural_data[i].num) 2006 2007 # Sort. 2008 if model_nums: 2009 model_nums.sort() 2010 2011 # Loop over the models in order. 2012 for model_num in model_nums: 2013 # Find the model. 2014 for i in range(len(self.structural_data)): 2015 # Yield the model. 2016 if self.structural_data[i].num == model_num: 2017 yield self.structural_data[i] 2018 2019 # No models, so just yield the single container. 2020 if not model_nums: 2021 yield self.structural_data[0]

2022 2023

2024 - def num_models(self):

2025 """Method for returning the number of models. 2026 2027 @return: The number of models in the structural object. 2028 @rtype: int 2029 """ 2030 2031 return len(self.structural_data)

2032 2033

2034 - def num_molecules(self):

2035 """Method for returning the number of molecules. 2036 2037 @return: The number of molecules in the structural object. 2038 @rtype: int 2039 """ 2040 2041 # No data. 2042 if self.empty(): 2043 return 0 2044 2045 # Validate the structural object. 2046 self.validate() 2047 2048 # Return the number. 2049 return len(self.structural_data[0].mol)

2050 2051

2052 - def pack_structs(self, data_matrix, orig_model_num=None, set_model_num=None, orig_mol_num=None, set_mol_name=None, file_name=None, file_path=None, file_path_abs=None, merge=False):

2053 """From the given structural data, expand the structural data data structure. 2054 2055 @param data_matrix: A matrix of structural objects. 2056 @type data_matrix: list of lists of structural objects 2057 @keyword orig_model_num: The original model numbers (for storage). 2058 @type orig_model_num: list of int 2059 @keyword set_model_num: The new model numbers (for model renumbering). 2060 @type set_model_num: list of int 2061 @keyword orig_mol_num: The original molecule numbers (for storage). 2062 @type orig_mol_num: list of int 2063 @keyword set_mol_name: The new molecule names. 2064 @type set_mol_name: list of str 2065 @keyword file_name: The name of the file from which the molecular data has been extracted. 2066 @type file_name: None or str 2067 @keyword file_path: The full path to the file specified by 'file_name'. 2068 @type file_path: None or str 2069 @keyword file_path_abs: The absolute path to the file specified by 'file_name'. This is a fallback mechanism in case results or save files are located somewhere other than the working directory. 2070 @type file_path_abs: None or str 2071 @keyword merge: A flag which if set to True will try to merge the structure into the currently loaded structures. 2072 @type merge: bool 2073 """ 2074 2075 # Test the number of models. 2076 if len(orig_model_num) != len(data_matrix): 2077 raise RelaxError("Structural data mismatch, %s original models verses %s in the structural data." % (len(orig_model_num), len(data_matrix))) 2078 2079 # Test the number of molecules. 2080 if len(orig_mol_num) != len(data_matrix[0]): 2081 raise RelaxError("Structural data mismatch, %s original molecules verses %s in the structural data." % (len(orig_mol_num), len(data_matrix[0]))) 2082 2083 # Model numbers do not change. 2084 if not set_model_num: 2085 set_model_num = orig_model_num 2086 2087 # Test the model mapping. 2088 if len(set_model_num) != len(data_matrix): 2089 raise RelaxError("Failure of the mapping of new model numbers, %s new model numbers verses %s models in the structural data." % (len(set_model_num), len(data_matrix))) 2090 2091 # Test the molecule mapping. 2092 if len(set_mol_name) != len(data_matrix[0]): 2093 raise RelaxError("Failure of the mapping of new molecule names, %s new molecule names verses %s molecules in the structural data." % (len(set_mol_name), len(data_matrix[0]))) 2094 2095 # Test that the target models and structures are absent, and get the already existing model numbers. 2096 for i in range(len(set_model_num)): 2097 # Merging flag is set, so skip the checks. 2098 if merge: 2099 continue 2100 2101 # A new model, so no need to check. 2102 if not set_model_num[i] in self.structural_data.current_models: 2103 continue 2104 2105 # Loop over the structures. 2106 index = self.structural_data.current_models.index(set_model_num[i]) 2107 for j in range(len(self.structural_data[index].mol)): 2108 if self.structural_data[index].mol[j].mol_name in set_mol_name: 2109 raise RelaxError("The molecule '%s' of model %s already exists." % (self.structural_data[i].mol[j].mol_name, self.structural_data[i].num)) 2110 2111 # Loop over the models. 2112 for i in range(len(set_model_num)): 2113 # The model doesn't currently exist. 2114 if set_model_num[i] not in self.structural_data.current_models: 2115 # Create the model. 2116 self.structural_data.add_item(set_model_num[i]) 2117 2118 # Get the model. 2119 model = self.structural_data[-1] 2120 2121 # Otherwise get the pre-existing model. 2122 else: 2123 model = self.structural_data[self.structural_data.current_models.index(set_model_num[i])] 2124 2125 # Loop over the molecules. 2126 for j in range(len(set_mol_name)): 2127 # Print out. 2128 if merge: 2129 print("Merging with model %s of molecule '%s' (from the original molecule number %s of model %s)" % (set_model_num[i], set_mol_name[j], orig_mol_num[j], orig_model_num[i])) 2130 else: 2131 print("Adding molecule '%s' to model %s (from the original molecule number %s of model %s)" % (set_mol_name[j], set_model_num[i], orig_mol_num[j], orig_model_num[i])) 2132 2133 # The index of the new molecule to add or merge. 2134 index = len(model.mol) 2135 if merge: 2136 index -= 1 2137 2138 # Consistency check. 2139 if model.num != self.structural_data[0].num and self.structural_data[0].mol[index].mol_name != set_mol_name[j]: 2140 raise RelaxError("The new molecule name of '%s' in model %s does not match the corresponding molecule's name of '%s' in model %s." % (set_mol_name[j], set_model_num[i], self.structural_data[0].mol[index].mol_name, self.structural_data[0].num)) 2141 2142 # Pack the structures. 2143 if merge: 2144 mol = model.mol.merge_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2145 else: 2146 mol = model.mol.add_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2147 2148 # Set the molecule name and store the structure file info. 2149 mol.mol_name = set_mol_name[j] 2150 mol.file_name = file_name 2151 mol.file_path = file_path 2152 mol.file_path_abs = file_path_abs 2153 mol.file_mol_num = orig_mol_num[j] 2154 mol.file_model = orig_model_num[i]

2155 2156

2157 - def rotate(self, R=None, origin=None, model=None, atom_id=None):

2158 """Rotate the structural information about the given origin. 2159 2160 @keyword R: The forwards rotation matrix. 2161 @type R: numpy 3D, rank-2 array 2162 @keyword origin: The origin of the rotation. 2163 @type origin: numpy 3D, rank-1 array 2164 @keyword model: The model to rotate. If None, all models will be rotated. 2165 @type model: int 2166 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2167 @type atom_id: str or None 2168 """ 2169 2170 # Generate the selection object. 2171 sel_obj = None 2172 if atom_id: 2173 sel_obj = Selection(atom_id) 2174 2175 # Loop over the models. 2176 for model_cont in self.model_loop(model): 2177 # Loop over the molecules. 2178 for mol in model_cont.mol: 2179 # Skip non-matching molecules. 2180 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2181 continue 2182 2183 # Loop over the atoms. 2184 for i in range(len(mol.atom_num)): 2185 # Skip non-matching atoms. 2186 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2187 continue 2188 2189 # The origin to atom vector. 2190 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin 2191 2192 # Rotation. 2193 rot_vect = dot(R, vect) 2194 2195 # The new position. 2196 pos = rot_vect + origin 2197 mol.x[i] = pos[0] 2198 mol.y[i] = pos[1] 2199 mol.z[i] = pos[2]

2200 2201

2202 - def target_mol_name(self, set=None, target=None, index=None, mol_num=None, file=None):

2203 """Add the new molecule name to the target data structure. 2204 2205 @keyword set: The list of new molecule names. If not supplied, the names will be generated from the file name. 2206 @type set: None or list of str 2207 @keyword target: The target molecule name data structure to which the new name will be appended. 2208 @type target: list 2209 @keyword index: The molecule index, matching the set argument. 2210 @type index: int 2211 @keyword mol_num: The molecule number. 2212 @type mol_num: int 2213 @keyword file: The name of the file, excluding all directories. 2214 @type file: str 2215 """ 2216 2217 # Set the target molecule name. 2218 if set: 2219 target.append(set[index]) 2220 else: 2221 # Set the name to the file name plus the structure number. 2222 target.append(file_root(file) + '_mol' + repr(mol_num))

2223 2224

2225 - def translate(self, T=None, model=None, atom_id=None):

2226 """Displace the structural information by the given translation vector. 2227 2228 @keyword T: The translation vector. 2229 @type T: numpy 3D, rank-1 array 2230 @keyword model: The model to rotate. If None, all models will be rotated. 2231 @type model: int 2232 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2233 @type atom_id: str or None 2234 """ 2235 2236 # Generate the selection object. 2237 sel_obj = None 2238 if atom_id: 2239 sel_obj = Selection(atom_id) 2240 2241 # Loop over the models. 2242 for model_cont in self.model_loop(model): 2243 # Loop over the molecules. 2244 for mol in model_cont.mol: 2245 # Skip non-matching molecules. 2246 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2247 continue 2248 2249 # Loop over the atoms. 2250 for i in range(len(mol.atom_num)): 2251 # Skip non-matching atoms. 2252 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2253 continue 2254 2255 # Translate. 2256 mol.x[i] = mol.x[i] + T[0] 2257 mol.y[i] = mol.y[i] + T[1] 2258 mol.z[i] = mol.z[i] + T[2]

2259 2260

2261 - def to_xml(self, doc, element):

2262 """Prototype method for converting the structural object to an XML representation. 2263 2264 @param doc: The XML document object. 2265 @type doc: xml.dom.minidom.Document instance 2266 @param element: The element to add the alignment tensors XML element to. 2267 @type element: XML element object 2268 """ 2269 2270 # Create the structural element and add it to the higher level element. 2271 str_element = doc.createElement('structure') 2272 element.appendChild(str_element) 2273 2274 # Set the structural attributes. 2275 str_element.setAttribute('desc', 'Structural information') 2276 2277 # No contents to store, so pack up the structural containers. 2278 if not self.structural_data.is_empty(): 2279 self.structural_data.to_xml(doc, str_element) 2280 2281 # The structural metadata. 2282 metadata = ['helices', 'sheets'] 2283 for name in metadata: 2284 # The metadata does not exist. 2285 if not hasattr(self, name): 2286 continue 2287 2288 # Get the object. 2289 obj = getattr(self, name) 2290 2291 # Create a new element for this object, and add it to the main element. 2292 sub_elem = doc.createElement(name) 2293 str_element.appendChild(sub_elem) 2294 2295 # Add the value to the sub element. 2296 object_to_xml(doc, sub_elem, value=obj) 2297 2298 # The displacement structure. 2299 if hasattr(self, 'displacements'): 2300 # Create an XML element. 2301 disp_element = doc.createElement('displacements') 2302 str_element.appendChild(disp_element) 2303 2304 # Set the attributes. 2305 disp_element.setAttribute('desc', 'The rotational and translational displacements between models') 2306 2307 # Add the displacement data. 2308 self.displacements.to_xml(doc, disp_element)

2309 2310

2311 - def validate_models(self):

2312 """Check that the models are consistent with each other. 2313 2314 This checks that the primary structure is identical between the models. 2315 """ 2316 2317 # Print out. 2318 print("Validating models:") 2319 2320 # Loop over the models. 2321 for i in range(len(self.structural_data)): 2322 # Check the molecules. 2323 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol): 2324 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol))) 2325 2326 # Loop over the molecules. 2327 for j in range(len(self.structural_data[i].mol)): 2328 # Alias the molecules. 2329 mol = self.structural_data[i].mol[j] 2330 mol_ref = self.structural_data[0].mol[j] 2331 2332 # Check the names. 2333 if mol.mol_name != mol_ref.mol_name: 2334 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name)) 2335 2336 # Loop over the atoms. 2337 for k in range(len(mol.atom_name)): 2338 # Create pseudo-pdb formatted records (with no atomic coordinates). 2339 atom = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '') 2340 atom_ref = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '') 2341 2342 # Check the atom info. 2343 if atom != atom_ref: 2344 print(atom) 2345 print(atom_ref) 2346 raise RelaxError("The atoms of model %i do not match the first model." % self.structural_data[i].num) 2347 2348 # Final printout. 2349 print("\tAll models are consistent")

2350 2351

2352 - def write_pdb(self, file, model_num=None):

2353 """Method for the creation of a PDB file from the structural data. 2354 2355 A number of PDB records including HET, HETNAM, FORMUL, HELIX, SHEET, HETATM, TER, CONECT, MASTER, and END are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data structure 'het_data' is created. It is an array of arrays where the first dimension corresponds to a different residue and the second dimension has the elements: 2356 2357 0. Residue number. 2358 1. Residue name. 2359 2. Chain ID. 2360 3. Total number of atoms in the residue. 2361 4. Number of H atoms in the residue. 2362 5. Number of C atoms in the residue. 2363 2364 2365 @param file: The PDB file object. This object must be writable. 2366 @type file: file object 2367 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 2368 @type model_num: None or int 2369 """ 2370 2371 # Validate the structural data. 2372 self.validate() 2373 2374 # Print out. 2375 print("\nCreating the PDB records\n") 2376 2377 # Write some initial remarks. 2378 print("REMARK") 2379 pdb_write.remark(file, num=4, remark="This file complies with format v. 3.30, Jul-2011.") 2380 pdb_write.remark(file, num=40, remark="Created by relax (http://www.nmr-relax.com).") 2381 num_remark = 2 2382 2383 # Determine if model records will be created. 2384 model_records = False 2385 for model in self.model_loop(): 2386 if hasattr(model, 'num') and model.num != None: 2387 model_records = True 2388 2389 2390 #################### 2391 # Hetrogen section # 2392 #################### 2393 2394 # Initialise the hetrogen info array. 2395 het_data = [] 2396 het_data_coll = [] 2397 2398 # Loop over the molecules of the first model. 2399 index = 0 2400 for mol in self.structural_data[0].mol: 2401 # Check the validity of the data. 2402 self._validate_data_arrays(mol) 2403 2404 # Append an empty array for this molecule. 2405 het_data.append([]) 2406 2407 # Collect the non-standard residue info. 2408 for i in range(len(mol.atom_name)): 2409 # Skip non-HETATM records and HETATM records with no residue info. 2410 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None: 2411 continue 2412 2413 # If the residue is not already stored initialise a new het_data element. 2414 # (residue number, residue name, chain ID, number of atoms, atom count array). 2415 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]: 2416 het_data[index].append([mol.res_num[i], mol.res_name[i], CHAIN_ID_LIST[index], 0, []]) 2417 2418 # Catch missing chain_ids. 2419 if het_data[index][-1][2] == None: 2420 het_data[index][-1][2] = '' 2421 2422 # Total atom count. 2423 het_data[index][-1][3] = het_data[index][-1][3] + 1 2424 2425 # Find if the atom has already a count entry. 2426 entry = False 2427 for j in range(len(het_data[index][-1][4])): 2428 if mol.element[i] == het_data[index][-1][4][j][0]: 2429 entry = True 2430 2431 # Create a new specific atom count entry. 2432 if not entry: 2433 het_data[index][-1][4].append([mol.element[i], 0]) 2434 2435 # Increment the specific atom count. 2436 for j in range(len(het_data[index][-1][4])): 2437 if mol.element[i] == het_data[index][-1][4][j][0]: 2438 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1 2439 2440 # Create the collective hetrogen info data structure. 2441 for i in range(len(het_data[index])): 2442 # Find the entry in the collective structure. 2443 found = False 2444 for j in range(len(het_data_coll)): 2445 # Different residue numbers. 2446 if het_data[index][i][0] != het_data_coll[j][0]: 2447 continue 2448 2449 # Different chain IDs. 2450 if het_data_coll[j][2] != het_data[index][i][2]: 2451 continue 2452 2453 # Sanity checks. 2454 if het_data_coll[j][1] != het_data[index][i][1]: 2455 raise RelaxError("The " + repr(het_data[index][i][1]) + " residue name of hetrogen " + repr(het_data[index][i][0]) + " " + het_data[index][i][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][1]) + " name of the previous structures.") 2456 elif het_data_coll[j][3] != het_data[index][i][3]: 2457 raise RelaxError("The " + repr(het_data[index][i][3]) + " atoms of hetrogen " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][3]) + " of the previous structures.") 2458 elif het_data_coll[j][4] != het_data[index][i][4]: 2459 raise RelaxError("The atom counts " + repr(het_data[index][i][4]) + " for the hetrogen residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " do not match the counts " + repr(het_data_coll[j][4]) + " of the previous structures.") 2460 2461 # Change the flag. 2462 found = True 2463 2464 # If there is no match, add the new residue to the collective. 2465 if not found: 2466 het_data_coll.append(het_data[index][i]) 2467 2468 # Increment the molecule index. 2469 index = index + 1 2470 2471 2472 # The HET records. 2473 ################## 2474 2475 # Print out. 2476 print("HET") 2477 2478 # Write the HET records. 2479 for het in het_data_coll: 2480 pdb_write.het(file, het_id=het[1], chain_id=het[2], seq_num=het[0], num_het_atoms=het[3]) 2481 2482 2483 # The HETNAM records. 2484 ##################### 2485 2486 # Print out. 2487 print("HETNAM") 2488 2489 # Loop over the non-standard residues. 2490 residues = [] 2491 for het in het_data_coll: 2492 # Test if the residue HETNAM record as already been written (otherwise store its name). 2493 if het[1] in residues: 2494 continue 2495 else: 2496 residues.append(het[1]) 2497 2498 # Get the chemical name. 2499 chemical_name = self._get_chemical_name(het[1]) 2500 if not chemical_name: 2501 chemical_name = 'Unknown' 2502 2503 # Write the HETNAM records. 2504 pdb_write.hetnam(file, het_id=het[1], text=chemical_name) 2505 2506 2507 # The FORMUL records. 2508 ##################### 2509 2510 # Print out. 2511 print("FORMUL") 2512 2513 # Loop over the non-standard residues and generate and write the chemical formula. 2514 residues = [] 2515 for i in range(len(het_data_coll)): 2516 # Alias. 2517 het = het_data_coll[i] 2518 2519 # Test if the residue HETNAM record as already been written (otherwise store its name). 2520 if het[1] in residues: 2521 continue 2522 else: 2523 residues.append(het[1]) 2524 2525 # Initialise the chemical formula. 2526 formula = '' 2527 2528 # Loop over the atoms. 2529 for atom_count in het[4]: 2530 formula = formula + atom_count[0] + repr(atom_count[1]) 2531 2532 # The FORMUL record (chemical formula). 2533 pdb_write.formul(file, comp_num=i+1, het_id=het[1], text=formula) 2534 2535 2536 ############################### 2537 # Secondary structure section # 2538 ############################### 2539 2540 # The HELIX records. 2541 #################### 2542 2543 if hasattr(self, 'helices') and len(self.helices): 2544 # Printout. 2545 print("HELIX") 2546 2547 # Loop over and unpack the helix data. 2548 index = 1 2549 for helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length in self.helices: 2550 pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=init_chain_id, init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=end_chain_id, end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) 2551 index += 1 2552 2553 # The SHEET records. 2554 #################### 2555 2556 if hasattr(self, 'sheets') and len(self.sheets): 2557 # Printout. 2558 print("SHEET") 2559 2560 # Loop over and unpack the helix data. 2561 index = 1 2562 for strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode in self.sheets: 2563 pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) 2564 index += 1 2565 2566 2567 ###################### 2568 # Coordinate section # 2569 ###################### 2570 2571 # Loop over the models. 2572 for model in self.model_loop(model_num): 2573 # Initialise record counts. 2574 num_hetatm = 0 2575 num_atom = 0 2576 num_ter = 0 2577 ser_num = 1 2578 2579 # MODEL record, for multiple models. 2580 #################################### 2581 2582 if model_records: 2583 # Print out. 2584 print("\nMODEL %s" % model.num) 2585 2586 # Write the model record. 2587 pdb_write.model(file, serial=model.num) 2588 2589 2590 # Add the atomic coordinate records (ATOM, HETATM, and TER). 2591 ############################################################ 2592 2593 # Loop over the molecules. 2594 index = 0 2595 for mol in model.mol: 2596 # Print out. 2597 print("ATOM, HETATM, TER") 2598 2599 # Loop over the atomic data. 2600 atom_record = False 2601 for i in range(len(mol.atom_name)): 2602 # Write the ATOM record. 2603 if mol.pdb_record[i] in [None, 'ATOM']: 2604 atom_record = True 2605 2606 # The atom number, if missing. 2607 atom_num = mol.atom_num[i] 2608 if atom_num == None: 2609 atom_num = i + 1 2610 2611 # Handle the funky atom name alignment. From the PDB format documents: 2612 # "Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13." 2613 if len(mol.atom_name[i]) == 1: 2614 atom_name = " %s" % mol.atom_name[i] 2615 else: 2616 atom_name = "%s" % mol.atom_name[i] 2617 2618 # Write out. 2619 pdb_write.atom(file, serial=atom_num, name=atom_name, res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0, element=mol.element[i]) 2620 num_atom += 1 2621 ser_num += 1 2622 2623 # Info for the TER record. 2624 ter_num = atom_num + 1 2625 ter_name = mol.res_name[i] 2626 ter_chain_id = CHAIN_ID_LIST[index] 2627 ter_res_num = mol.res_num[i] 2628 2629 # Finish the ATOM section with the TER record. 2630 if atom_record: 2631 pdb_write.ter(file, serial=ser_num, res_name=ter_name, chain_id=CHAIN_ID_LIST[index], res_seq=ter_res_num) 2632 num_ter += 1 2633 ser_num += 1 2634 2635 # Loop over the atomic data. 2636 count_shift = False 2637 for i in range(len(mol.atom_name)): 2638 # Write the HETATM record. 2639 if mol.pdb_record[i] == 'HETATM': 2640 # The atom number, if missing. 2641 atom_num = mol.atom_num[i] 2642 if atom_num == None: 2643 atom_num = i + 1 2644 2645 # Increment the atom number if a TER record was created. 2646 if atom_record and atom_num == ter_num: 2647 count_shift = True 2648 if atom_record and count_shift: 2649 atom_num += 1 2650 2651 # Write out. 2652 pdb_write.hetatm(file, serial=ser_num, name=self._translate(mol.atom_name[i]), res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0.0, element=mol.element[i]) 2653 num_hetatm += 1 2654 ser_num += 1 2655 2656 # Increment the molecule index. 2657 index += 1 2658 2659 2660 # ENDMDL record, for multiple structures. 2661 ######################################## 2662 2663 if model_records: 2664 print("ENDMDL") 2665 pdb_write.endmdl(file) 2666 2667 2668 # Create the CONECT records. 2669 ############################ 2670 2671 # Print out. 2672 print("CONECT") 2673 2674 # Initialise record counts. 2675 num_conect = 0 2676 2677 # The per molecule incremented atom counts. 2678 atom_counts = [0] 2679 index = 0 2680 for mol in self.structural_data[0].mol: 2681 if index == 0: 2682 atom_counts.append(len(mol.atom_name)) 2683 else: 2684 atom_counts.append(atom_counts[index] + len(mol.atom_name)) 2685 index += 1 2686 2687 # Loop over the molecules of the first model. 2688 index = 0 2689 for mol in self.structural_data[0].mol: 2690 # Loop over the atoms. 2691 for i in range(len(mol.atom_name)): 2692 # No bonded atoms, hence no CONECT record is required. 2693 if not len(mol.bonded[i]): 2694 continue 2695 2696 # Initialise some data structures. 2697 flush = 0 2698 bonded_index = 0 2699 bonded = ['', '', '', ''] 2700 bonded_shifted = ['', '', '', ''] 2701 2702 # Loop over the bonded atoms. 2703 for j in range(len(mol.bonded[i])): 2704 # End of the array, hence create the CONECT record in this iteration. 2705 if j == len(mol.bonded[i])-1: 2706 flush = True 2707 2708 # Only four covalently bonded atoms allowed in one CONECT record. 2709 if bonded_index == 3: 2710 flush = True 2711 2712 # Get the bonded atom index. 2713 bonded[bonded_index] = mol.bonded[i][j] 2714 2715 # Increment the bonded_index value. 2716 bonded_index = bonded_index + 1 2717 2718 # Generate the CONECT record and increment the counter. 2719 if flush: 2720 # Convert the atom indices to atom numbers. 2721 for k in range(4): 2722 if bonded[k] != '': 2723 if mol.atom_num[bonded[k]] != None: 2724 bonded[k] = mol.atom_num[bonded[k]] 2725 else: 2726 bonded[k] = bonded[k] + 1 2727 bonded_shifted[k] = bonded[k] + atom_counts[index] 2728 2729 # Write the CONECT record. 2730 pdb_write.conect(file, serial=i+1+atom_counts[index], bonded1=bonded_shifted[0], bonded2=bonded_shifted[1], bonded3=bonded_shifted[2], bonded4=bonded_shifted[3]) 2731 2732 # Reset the flush flag, the bonded atom count, and the bonded atom names. 2733 flush = False 2734 bonded_index = 0 2735 bonded = ['', '', '', ''] 2736 2737 # Increment the CONECT record count. 2738 num_conect = num_conect + 1 2739 2740 # Increment the molecule index. 2741 index += 1 2742 2743 2744 # MASTER record. 2745 ################ 2746 2747 print("\nMASTER") 2748 pdb_write.master(file, num_het=len(het_data_coll), num_coord=num_atom+num_hetatm, num_ter=num_ter, num_conect=num_conect) 2749 2750 2751 # END. 2752 ###### 2753 2754 print("END") 2755 pdb_write.end(file)

2756 2757

2758 - def validate(self):

2759 """Check the integrity of the structural data. 2760 2761 The number of molecules must be the same in all models. 2762 """ 2763 2764 # Reference number of molecules. 2765 num_mols = len(self.structural_data[0].mol) 2766 2767 # Loop over all other models. 2768 for i in range(1, len(self.structural_data)): 2769 # Model alias. 2770 model_i = self.structural_data[i] 2771 2772 # Size check. 2773 if num_mols != len(model_i.mol): 2774 raise RelaxError("The structural object is not valid - the number of molecules is not the same for all models.") 2775 2776 # Molecule name check. 2777 for j in range(len(model_i.mol)): 2778 if model_i.mol[j].mol_name != self.structural_data[0].mol[j].mol_name: 2779 raise RelaxError("The molecule name '%s' of model %s does not match the corresponding molecule '%s' of model %s." % (model_i.mol[j].mol_name, model_i.num, self.structural_data[0].mol[j].mol_name, self.structural_data[0].num))

Source Code for Module lib.structure.internal.object