lib.structure.internal.object

50 """The internal relax structural data object. 51 52 The structural data object for this class is a container possessing a number of different arrays corresponding to different structural information. These objects are described in the structural container docstring. 53 """ 54

55 - def __init__(self):

56 """Initialise the structural object.""" 57 58 # Initialise the empty model list. 59 self.structural_data = ModelList()

60 61

62 - def _bonded_atom(self, attached_atom, index, mol):

63 """Find the atom named attached_atom directly bonded to the atom located at the index. 64 65 @param attached_atom: The name of the attached atom to return. 66 @type attached_atom: str 67 @param index: The index of the atom which the attached atom is attached to. 68 @type index: int 69 @param mol: The molecule container. 70 @type mol: MolContainer instance 71 @return: A tuple of information about the bonded atom. 72 @rtype: tuple consisting of the atom number (int), atom name (str), element name (str), and atomic position (Numeric array of len 3) 73 """ 74 75 # Init. 76 bonded_found = False 77 78 # No bonded atoms, so determine the connectivities. 79 if not mol.bonded[index]: 80 # Determine the molecule type if needed. 81 if not hasattr(mol, 'type'): 82 self._mol_type(mol) 83 84 # Protein. 85 if mol.type == 'protein': 86 self._protein_connect(mol) 87 88 # Find everything within 2 Angstroms and say they are bonded. 89 else: 90 self._find_bonded_atoms(index, mol, radius=2) 91 92 # Loop over the bonded atoms. 93 matching_list = [] 94 for bonded_index in mol.bonded[index]: 95 if regex.search(mol.atom_name[bonded_index], attached_atom): 96 matching_list.append(bonded_index) 97 num_attached = len(matching_list) 98 99 # Problem. 100 if num_attached > 1: 101 # Get the atom names. 102 matching_names = [] 103 for i in matching_list: 104 matching_names.append(mol.atom_name[i]) 105 106 # Return nothing but a warning. 107 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 108 109 # No attached atoms. 110 if num_attached == 0: 111 return None, None, None, None, None, "No attached atom could be found" 112 113 # The bonded atom info. 114 index = matching_list[0] 115 bonded_num = mol.atom_num[index] 116 bonded_name = mol.atom_name[index] 117 element = mol.element[index] 118 pos = [mol.x[index], mol.y[index], mol.z[index]] 119 attached_name = mol.atom_name[index] 120 121 # Return the information. 122 return bonded_num, bonded_name, element, pos, attached_name, None

123 124

125 - def _find_bonded_atoms(self, index, mol, radius=1.2):

126 """Find all atoms within a sphere and say that they are attached to the central atom. 127 128 The found atoms will be added to the 'bonded' data structure. 129 130 131 @param index: The index of the central atom. 132 @type index: int 133 @param mol: The molecule container. 134 @type mol: MolContainer instance 135 """ 136 137 # Central atom info. 138 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64) 139 140 # Atom loop. 141 dist_list = [] 142 connect_list = {} 143 element_list = {} 144 for i in range(len(mol.atom_num)): 145 # Skip proton to proton bonds! 146 if mol.element[index] == 'H' and mol.element[i] == 'H': 147 continue 148 149 # The atom's position. 150 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 151 152 # The distance from the centre. 153 dist = linalg.norm(centre-pos) 154 155 # The atom is within the radius. 156 if dist < radius: 157 # Store the distance. 158 dist_list.append(dist) 159 160 # Store the atom index. 161 connect_list[dist] = i 162 163 # Store the element type. 164 element_list[dist] = mol.element[i] 165 166 # The maximum number of allowed covalent bonds. 167 max_conn = 1000 # Ridiculous default! 168 if mol.element[index] == 'H': 169 max_conn = 1 170 elif mol.element[index] == 'O': 171 max_conn = 2 172 elif mol.element[index] == 'N': 173 max_conn = 3 174 elif mol.element[index] == 'C': 175 max_conn = 4 176 177 # Sort. 178 dist_list.sort() 179 180 # Loop over the max number of connections (or the number of connected atoms, if less). 181 for i in range(min(max_conn, len(dist_list))): 182 mol.atom_connect(index, connect_list[dist_list[i]])

183 184

185 - def _get_chemical_name(self, hetID):

186 """Return the chemical name corresponding to the given residue ID. 187 188 The following names are currently returned:: 189 ________________________________________________ 190 | | | 191 | hetID | Chemical name | 192 |________|_____________________________________| 193 | | | 194 | TNS | Tensor | 195 | COM | Centre of mass | 196 | AXS | Tensor axes | 197 | SIM | Monte Carlo simulation tensor axes | 198 | PIV | Pivot point | 199 | CON | Cone object | 200 | AVE | Average vector | 201 |________|_____________________________________| 202 203 For any other residues, no description is returned. 204 205 @param hetID: The residue ID. 206 @type hetID: str 207 @return: The chemical name. 208 @rtype: str or None 209 """ 210 211 # Tensor. 212 if hetID == 'TNS': 213 return 'Tensor' 214 215 # Centre of mass. 216 if hetID == 'COM': 217 return 'Centre of mass' 218 219 # Tensor axes. 220 if hetID == 'AXS': 221 return 'Tensor axes' 222 223 # Monte Carlo simulation tensor axes. 224 if hetID == 'SIM': 225 return 'Monte Carlo simulation tensor axes' 226 227 # Pivot point. 228 if hetID == 'PIV': 229 return 'Pivot point' 230 231 # Cone object. 232 if hetID == 'CON': 233 return 'Cone' 234 235 # Average vector. 236 if hetID == 'AVE': 237 return 'Average vector'

238 239

240 - def _parse_pdb_connectivity_annotation(self, lines):

241 """Loop over and parse the PDB connectivity annotation records. 242 243 These are the records identified in the U{PDB version 3.30 documentation<http://www.wwpdb.org/documentation/format33/sect6.html>}. 244 245 246 @param lines: The lines of the PDB file excluding the sections prior to the connectivity annotation section. 247 @type lines: list of str 248 @return: The remaining PDB lines with the connectivity annotation records stripped. 249 @rtype: list of str 250 """ 251 252 # The ordered list of record names in the connectivity annotation section. 253 records = [ 254 'SSBOND', 255 'LINK ', 256 'CISPEP' 257 ] 258 259 # Loop over the lines. 260 for i in range(len(lines)): 261 # No match, therefore assume to be out of the connectivity annotation section. 262 if lines[i][:6] not in records: 263 break 264 265 # Return the remaining lines. 266 return lines[i:]

267 268

269 - def _parse_pdb_coord(self, lines):

270 """Generator function for looping over the models in the PDB file. 271 272 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect9.html}. 273 274 275 @param lines: The lines of the coordinate section. 276 @type lines: list of str 277 @return: The model number and all the records for that model. 278 @rtype: tuple of int and array of str 279 """ 280 281 # Init. 282 model = None 283 records = [] 284 285 # Loop over the data. 286 for i in range(len(lines)): 287 # A new model record. 288 if lines[i][:5] == 'MODEL': 289 try: 290 model = int(lines[i].split()[1]) 291 except: 292 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.") 293 294 # Skip all records prior to the first ATOM or HETATM record. 295 if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and not len(records): 296 continue 297 298 # End of the model. 299 if lines[i][:6] == 'ENDMDL': 300 # Yield the info. 301 yield model, records 302 303 # Reset the records. 304 records = [] 305 306 # Skip the rest of this loop. 307 continue 308 309 # Append the line as a record of the model. 310 records.append(lines[i]) 311 312 # If records is not empty then there are no models, so yield the lot. 313 if len(records): 314 yield model, records

315 316

317 - def _parse_pdb_hetrogen(self, lines):

318 """Loop over and parse the PDB hetrogen records. 319 320 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect4.html}. 321 322 323 @param lines: The lines of the PDB file excluding the sections prior to the hetrogen section. 324 @type lines: list of str 325 @return: The remaining PDB lines with the hetrogen records stripped. 326 @rtype: list of str 327 """ 328 329 # The ordered list of record names in the hetrogen section. 330 records = [ 331 'HET ', 332 'FORMUL', 333 'HETNAM', 334 'HETSYN' 335 ] 336 337 # Loop over the lines. 338 for i in range(len(lines)): 339 # No match, therefore assume to be out of the hetrogen section. 340 if lines[i][:6] not in records: 341 break 342 343 # Return the remaining lines. 344 return lines[i:]

345 346

347 - def _parse_pdb_misc(self, lines):

348 """Loop over and parse the PDB miscellaneous records. 349 350 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect7.html}. 351 352 353 @param lines: The lines of the PDB file excluding the sections prior to the miscellaneous section. 354 @type lines: list of str 355 @return: The remaining PDB lines with the miscellaneous records stripped. 356 @rtype: list of str 357 """ 358 359 # The ordered list of record names in the miscellaneous section. 360 records = [ 361 'SITE ' 362 ] 363 364 # Loop over the lines. 365 for i in range(len(lines)): 366 # No match, therefore assume to be out of the miscellaneous section. 367 if lines[i][:6] not in records: 368 break 369 370 # Return the remaining lines. 371 return lines[i:]

372 373

374 - def _parse_pdb_prim_struct(self, lines):

375 """Loop over and parse the PDB primary structure records. 376 377 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect3.html}. 378 379 380 @param lines: The lines of the PDB file excluding the title section. 381 @type lines: list of str 382 @return: The remaining PDB lines with the primary structure records stripped. 383 @rtype: list of str 384 """ 385 386 # The ordered list of record names in the primary structure section. 387 records = [ 388 'DBREF ', 389 'DBREF1', 390 'DBREF2', 391 'SEQADV', 392 'SEQRES', 393 'MODRES' 394 ] 395 396 # Loop over the lines. 397 for i in range(len(lines)): 398 # No match, therefore assume to be out of the primary structure section. 399 if lines[i][:6] not in records: 400 break 401 402 # Return the remaining lines. 403 return lines[i:]

404 405

406 - def _parse_pdb_ss(self, lines):

407 """Loop over and parse the PDB secondary structure records. 408 409 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect5.html}. 410 411 412 @param lines: The lines of the PDB file excluding the sections prior to the secondary structure section. 413 @type lines: list of str 414 @return: The remaining PDB lines with the secondary structure records stripped. 415 @rtype: list of str 416 """ 417 418 # The ordered list of record names in the secondary structure section (the depreciated TURN record is also included to handle old PDB files). 419 records = [ 420 'HELIX ', 421 'SHEET ', 422 'TURN ' 423 ] 424 425 # Loop over the lines. 426 for i in range(len(lines)): 427 # No match, therefore assume to be out of the secondary structure section. 428 if lines[i][:6] not in records: 429 break 430 431 # A helix. 432 if lines[i][:5] == 'HELIX': 433 # Parse the record. 434 record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) 435 436 # Store the data. 437 if not hasattr(self, 'helices'): 438 self.helices = [] 439 self.helices.append([helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length]) 440 441 # A sheet. 442 if lines[i][:5] == 'SHEET': 443 # Parse the record. 444 record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) 445 446 # Store the data. 447 if not hasattr(self, 'sheets'): 448 self.sheets = [] 449 self.sheets.append([strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode]) 450 451 # Return the remaining lines. 452 return lines[i:]

453 454

455 - def _parse_pdb_title(self, lines):

456 """Loop over and parse the PDB title records. 457 458 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect2.html}. 459 460 461 @param lines: All lines of the PDB file. 462 @type lines: list of str 463 @return: The remaining PDB lines with the title records stripped. 464 @rtype: list of str 465 """ 466 467 # The ordered list of (sometimes truncated) record names in the title section. 468 records = [ 469 'HEADER', 470 'OBSLTE', 471 'TITLE ', 472 'SPLT ', 473 'CAVEAT', 474 'COMPND', 475 'SOURCE', 476 'KEYWDS', 477 'EXPDTA', 478 'NUMMDL', 479 'MDLTYP', 480 'AUTHOR', 481 'REVDAT', 482 'SPRSDE', 483 'JRNL ', 484 'REMARK' 485 ] 486 487 # Loop over the lines. 488 for i in range(len(lines)): 489 # No match, therefore assume to be out of the title section. 490 if lines[i][:6] not in records: 491 break 492 493 # Return the remaining lines. 494 return lines[i:]

495 496

497 - def _parse_pdb_transform(self, lines):

498 """Loop over and parse the PDB transform records. 499 500 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/format33/sect8.html}. 501 502 503 @param lines: The lines of the PDB file excluding the sections prior to the transform section. 504 @type lines: list of str 505 @return: The remaining PDB lines with the transform records stripped. 506 @rtype: list of str 507 """ 508 509 # The ordered list of record names in the transform section. 510 records = [ 511 'CRYST', 512 'MTRIX', 513 'ORIGX', 514 'SCALE', 515 ] 516 517 # Loop over the lines. 518 for i in range(len(lines)): 519 # No match, therefore assume to be out of the transform section. 520 if lines[i][0: 5] not in records: 521 break 522 523 # Return the remaining lines. 524 return lines[i:]

525 526

527 - def _parse_models_xyz(self, file_path):

528 """Generator function for looping over the models in the XYZ file. 529 530 @param file_path: The full path of the XYZ file. 531 @type file_path: str 532 @return: The model number and all the records for that model. 533 @rtype: tuple of int and array of str 534 """ 535 536 # Open the file. 537 file = open_read_file(file_path) 538 lines = file.readlines() 539 file.close() 540 541 # Check for empty files. 542 if lines == []: 543 raise RelaxError("The XYZ file is empty.") 544 545 # Init. 546 total_atom = 0 547 model = 0 548 records = [] 549 550 # Loop over the data. 551 for i in range(len(lines)): 552 num=0 553 word = lines[i].split() 554 # Find the total atom number and the first model. 555 if (i==0) and (len(word)==1): 556 try: 557 total_atom = int(word[0]) 558 num = 1 559 except: 560 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.") 561 562 # End of the model. 563 if (len(records) == total_atom): 564 # Yield the info 565 yield records 566 567 # Reset the records. 568 records = [] 569 570 # Skip all records prior to atom coordinates record. 571 if (len(word) != 4): 572 continue 573 574 # Append the line as a record of the model. 575 records.append(lines[i]) 576 577 # If records is not empty then there are no models, so yield the lot. 578 if len(records): 579 yield records

580 581

582 - def _parse_mols(self, records):

583 """Generator function for looping over the molecules in the PDB records of a model. 584 585 @param records: The list of PDB records for the model, or if no models exist the entire PDB file. 586 @type records: list of str 587 @return: The molecule number and all the records for that molecule. 588 @rtype: tuple of int and list of str 589 """ 590 591 # Check for empty records. 592 if records == []: 593 raise RelaxError("There are no PDB records for this model.") 594 595 # Init. 596 mol_num = 1 597 mol_records = [] 598 end = False 599 600 # Loop over the data. 601 for i in range(len(records)): 602 # A PDB termination record. 603 if records[i][:3] == 'END': 604 break 605 606 # A master record, so we are done. 607 if records[i][:6] == 'MASTER': 608 break 609 610 # A model termination record. 611 if records[i][:6] == 'ENDMDL': 612 end = True 613 614 # A molecule termination record with no trailing HETATM. 615 elif i < len(records)-1 and records[i][:3] == 'TER' and not records[i+1][:6] == 'HETATM': 616 end = True 617 618 # A HETATM followed by an ATOM record. 619 elif i < len(records)-1 and records[i][:6] == 'HETATM' and records[i+1][:4] == 'ATOM': 620 end = True 621 622 # End. 623 if end: 624 # Yield the info. 625 yield mol_num, mol_records 626 627 # Reset the records. 628 mol_records = [] 629 630 # Increment the molecule number. 631 mol_num = mol_num + 1 632 633 # Reset the flag. 634 end = False 635 636 # Skip the rest of this loop. 637 continue 638 639 # Append the line as a record of the molecule. 640 mol_records.append(records[i]) 641 642 # If records is not empty then there is only a single molecule, so yield the lot. 643 if len(mol_records): 644 yield mol_num, mol_records

645 646

647 - def _pdb_chain_id_to_mol_index(self, chain_id=None):

648 """Convert the PDB chain ID into the molecule index in a regular way. 649 650 @keyword chain_id: The PDB chain ID string. 651 @type chain_id: str 652 @return: The corresponding molecule index. 653 @rtype: int 654 """ 655 656 # Initialise. 657 mol_index = 0 658 659 # Convert to the molecule index. 660 if chain_id: 661 mol_index = ascii_uppercase.index(chain_id) 662 663 # Return the index. 664 return mol_index

665 666

667 - def _residue_data(self, res_nums=None, res_names=None):

668 """Convert the residue info into a dictionary of unique residues with numbers as keys. 669 670 @keyword res_nums: The list of residue numbers. 671 @type res_nums: list of int 672 @keyword res_names: The list of residue names matching the numbers. 673 @type res_names: list of str 674 @return: The dictionary of residue names with residue numbers as keys. 675 @rtype: dict of str 676 """ 677 678 # Initialise. 679 data = {} 680 681 # Loop over the data. 682 for i in range(len(res_nums)): 683 # The residue data already exists. 684 if res_nums[i] in data: 685 continue 686 687 # Add the data. 688 data[res_nums[i]] = res_names[i] 689 690 # Return the dictionary. 691 return data

692 693

694 - def _validate_data_arrays(self, struct):

695 """Check the validity of the data arrays in the given structure object. 696 697 @param struct: The structural object. 698 @type struct: Structure_container instance 699 """ 700 701 # The number of atoms. 702 num = len(struct.atom_name) 703 704 # Check the other lengths. 705 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num: 706 raise RelaxError("The structural data is invalid.")

707 708

709 - def _mol_type(self, mol):

710 """Determine the type of molecule. 711 712 @param mol: The molecule data container. 713 @type mol: MolContainer instance 714 """ 715 716 # Amino acids. 717 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 718 719 # Set the molecule type to default to 'other'. 720 mol.type = 'other' 721 722 # Loop over the residues. 723 for res in mol.res_name: 724 # Protein. 725 if res in aa: 726 # Set the molecule type and return. 727 mol.type = 'protein' 728 return

729 730

731 - def _protein_connect(self, mol):

732 """Set up the connectivities for the protein. 733 734 @param mol: The molecule data container. 735 @type mol: MolContainer instance 736 """ 737 738 # Initialise some residue data. 739 curr_res_num = None 740 res_atoms = [] 741 742 # Loop over all atoms. 743 for i in range(len(mol.atom_num)): 744 # New residue. 745 if mol.res_num[i] != curr_res_num: 746 # Intra-residue connectivites. 747 if len(res_atoms): 748 self._protein_intra_connect(mol, res_atoms) 749 750 # Update the residue number. 751 curr_res_num = mol.res_num[i] 752 753 # Reset the residue atom index list. 754 res_atoms = [] 755 756 # Add the atom index to the list. 757 res_atoms.append(i) 758 759 # Last atom. 760 if i == len(mol.atom_num) - 1 and len(res_atoms): 761 self._protein_intra_connect(mol, res_atoms)

762 763

764 - def _protein_intra_connect(self, mol, res_atoms):

765 """Set up the connectivities for the protein. 766 767 @param mol: The molecule data container. 768 @type mol: MolContainer instance 769 @param res_atoms: The list of atom indices corresponding to the residue. 770 @type res_atoms: list of int 771 """ 772 773 # Back bond connectivity. 774 indices = { 775 'N': None, 776 'C': None, 777 'O': None, 778 'CA': None, 779 'HN': None, 780 'H': None, # Same as HN. 781 'HA': None 782 } 783 784 # Loop over all atoms to find the indices. 785 for index in res_atoms: 786 if mol.atom_name[index] in indices: 787 indices[mol.atom_name[index]] = index 788 789 # Connect the atom pairs. 790 pairs = [ 791 ['N', 'HN'], 792 ['N', 'H'], 793 ['N', 'CA'], 794 ['CA', 'HA'], 795 ['CA', 'C'], 796 ['C', 'O'] 797 ] 798 799 # Loop over the atoms pairs and connect them. 800 for pair in pairs: 801 if indices[pair[0]] != None and indices[pair[1]] != None: 802 mol.atom_connect(indices[pair[0]], indices[pair[1]])

803 804

805 - def _translate(self, data, format='str'):

806 """Convert the data into a format for writing to file. 807 808 @param data: The data to convert to the required format. 809 @type data: anything 810 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'. 811 @type format: str 812 @return: The converted version of the data. 813 @rtype: str 814 """ 815 816 # Conversion to string. 817 if format == 'str': 818 # None values. 819 if data == None: 820 data = '' 821 822 # Force convert to string. 823 if not isinstance(data, str): 824 data = repr(data) 825 826 # Conversion to float. 827 if format == 'float': 828 # None values. 829 if data == None: 830 data = 0.0 831 832 # Force convert to float. 833 if not isinstance(data, float): 834 data = float(data) 835 836 # Return the converted data. 837 return data

838 839

840 - def _trim_helix(self, helix=None, trim_res_list=[], res_data=None):

841 """Trim the given helix based on the list of deleted residue numbers. 842 843 @keyword helix: The single helix metadata structure. 844 @type helix: list 845 @keyword trim_res_list: The list of residue numbers which no longer exist. 846 @type trim_res_list: list of int 847 @keyword res_data: The dictionary of residue names with residue numbers as keys. 848 @type res_data: dict of str 849 @return: The trimmed helix metadata structure, or None if the whole helix is to be deleted. 850 @rtype: list or None 851 """ 852 853 # Unpack the helix residue numbers. 854 start_res = helix[3] 855 end_res = helix[6] 856 857 # The reverse residue list. 858 trim_res_list_rev = deepcopy(trim_res_list) 859 trim_res_list_rev.reverse() 860 861 # The helix residues. 862 helix_res = list(range(start_res, end_res+1)) 863 864 # Trim forwards. 865 for res_num in trim_res_list: 866 if res_num == start_res: 867 # Remove the residue. 868 helix_res.pop(0) 869 870 # No helix left. 871 if len(helix_res) == 0: 872 break 873 874 # Realias the starting residue. 875 start_res = helix_res[0] 876 877 # No helix left. 878 if len(helix_res) == 0: 879 return None 880 881 # Trim backwards. 882 for res_num in trim_res_list_rev: 883 if res_num == end_res: 884 helix_res.pop(-1) 885 end_res = helix_res[-1] 886 887 # Replace the starting and ending residues. 888 if start_res != helix[3]: 889 helix[3] = start_res 890 helix[2] = res_data[start_res] 891 if end_res != helix[6]: 892 helix[6] = end_res 893 helix[5] = res_data[end_res] 894 895 # The helix length. 896 helix[-1] = len(helix_res) 897 898 # Return the modified helix. 899 return helix

900 901

902 - def _trim_sheet(self, sheet=None, trim_res_list=[], res_data=None):

903 """Trim the given sheet based on the list of deleted residue numbers. 904 905 @keyword sheet: The single sheet metadata structure. 906 @type sheet: list 907 @keyword trim_res_list: The list of residue numbers which no longer exist. 908 @type trim_res_list: list of int 909 @keyword res_data: The dictionary of residue names with residue numbers as keys. 910 @type res_data: dict of str 911 @return: The trimmed sheet metadata structure, or None if the whole sheet is to be deleted. 912 @rtype: list or None 913 """ 914 915 # Unpack the sheet residue numbers. 916 start_res = sheet[5] 917 end_res = sheet[9] 918 919 # The reverse residue list. 920 trim_res_list_rev = deepcopy(trim_res_list) 921 trim_res_list_rev.reverse() 922 923 # The sheet residues. 924 sheet_res = list(range(start_res, end_res+1)) 925 926 # Trim forwards. 927 for res_num in trim_res_list: 928 if res_num == start_res: 929 # Remove the residue. 930 sheet_res.pop(0) 931 932 # No sheet left. 933 if len(sheet_res) == 0: 934 break 935 936 # Realias the starting residue. 937 start_res = sheet_res[0] 938 939 # No sheet left. 940 if len(sheet_res) == 0: 941 return None 942 943 # Trim backwards. 944 for res_num in trim_res_list_rev: 945 if res_num == end_res: 946 sheet_res.pop(-1) 947 end_res = sheet_res[-1] 948 949 # Replace the starting and ending residues. 950 if start_res != sheet[5]: 951 sheet[5] = start_res 952 sheet[3] = res_data[start_res] 953 if end_res != sheet[9]: 954 sheet[9] = end_res 955 sheet[7] = res_data[end_res] 956 957 # Return the modified sheet. 958 return sheet

959 960

961 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

962 """Add a new atom to the structural data object. 963 964 @keyword mol_name: The name of the molecule. 965 @type mol_name: str 966 @keyword atom_name: The atom name, e.g. 'H1'. 967 @type atom_name: str or None 968 @keyword res_name: The residue name. 969 @type res_name: str or None 970 @keyword res_num: The residue number. 971 @type res_num: int or None 972 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 973 @type pos: rank-1 or rank-2 array or list of float 974 @keyword element: The element symbol. 975 @type element: str or None 976 @keyword atom_num: The atom number. 977 @type atom_num: int or None 978 @keyword chain_id: The chain identifier. 979 @type chain_id: str or None 980 @keyword segment_id: The segment identifier. 981 @type segment_id: str or None 982 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 983 @type pdb_record: str or None 984 """ 985 986 # Add a model if not present. 987 if len(self.structural_data) == 0: 988 self.add_model() 989 990 # Check the position. 991 if is_float(pos[0]): 992 if len(pos) != 3: 993 raise RelaxError("The single atomic position %s must be a 3D list." % pos) 994 else: 995 if len(pos) != len(self.structural_data): 996 raise RelaxError("The %s atomic positions does not match the %s models present." % (len(pos), len(self.structural_data))) 997 998 # Loop over each model. 999 for i in range(len(self.structural_data)): 1000 # Alias the model. 1001 model = self.structural_data[i] 1002 1003 # Specific molecule. 1004 mol = self.get_molecule(mol_name, model=model.num) 1005 1006 # Add the molecule, if it does not exist. 1007 if mol == None: 1008 self.add_molecule(name=mol_name) 1009 mol = self.get_molecule(mol_name, model=model.num) 1010 1011 # Split up the position if needed. 1012 if is_float(pos[0]): 1013 model_pos = pos 1014 else: 1015 model_pos = pos[i] 1016 1017 # Add the atom. 1018 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=model_pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)

1019 1020

1021 - def add_model(self, model=None, coords_from=None):

1022 """Add a new model to the store. 1023 1024 The new model will be constructured with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used. 1025 1026 @keyword model: The number of the model to create. 1027 @type model: int or None 1028 @keyword coords_from: The model number to take the coordinates from. 1029 @type coords_from: int or None 1030 @return: The model container. 1031 @rtype: ModelContainer instance 1032 """ 1033 1034 # Check if the model currently exists. 1035 if model != None: 1036 for i in range(len(self.structural_data)): 1037 if model == self.structural_data[i].num: 1038 raise RelaxError("The model '%s' already exists." % model) 1039 1040 # Add a new model. 1041 self.structural_data.add_item(model_num=model) 1042 1043 # The model to duplicate. 1044 if coords_from == None: 1045 coords_from = self.structural_data[0].num 1046 1047 # Construct the structural data for the model from the other models. 1048 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in self.atom_loop(model_num=coords_from, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True): 1049 # Add the atom. 1050 self.add_atom(self, mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num) 1051 1052 # Return the model. 1053 return self.structural_data[-1]

1054 1055

1056 - def add_molecule(self, name=None):

1057 """Add a new molecule to the store. 1058 1059 @keyword name: The molecule identifier string. 1060 @type name: str 1061 """ 1062 1063 # Add a model if necessary. 1064 if len(self.structural_data) == 0: 1065 self.add_model() 1066 1067 # Loop over the models. 1068 for i in range(len(self.structural_data)): 1069 # Add the molecule. 1070 self.structural_data[i].mol.add_item(mol_name=name, mol_cont=MolContainer())

1071 1072

1073 - def are_bonded(self, atom_id1=None, atom_id2=None):

1074 """Determine if two atoms are directly bonded to each other. 1075 1076 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 1077 @type atom_id1: str 1078 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 1079 @type atom_id2: str 1080 @return: True if the atoms are directly bonded. 1081 @rtype: bool 1082 """ 1083 1084 # Generate the selection objects. 1085 sel_obj1 = Selection(atom_id1) 1086 sel_obj2 = Selection(atom_id2) 1087 1088 # Build the connectivities if needed. 1089 for mol in self.structural_data[0].mol: 1090 for i in range(len(mol.atom_num)): 1091 if not len(mol.bonded[i]): 1092 self._find_bonded_atoms(i, mol, radius=2) 1093 1094 # Loop over the molecules. 1095 for mol in self.structural_data[0].mol: 1096 # Skip non-matching molecules. 1097 if not sel_obj1.contains_mol(mol.mol_name): 1098 continue 1099 if not sel_obj2.contains_mol(mol.mol_name): 1100 continue 1101 1102 # Find the first atom. 1103 index1 = None 1104 for i in range(len(mol.atom_num)): 1105 # Skip a non-matching first atom. 1106 if sel_obj1.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1107 index1 = i 1108 break 1109 1110 # Find the second atom. 1111 index2 = None 1112 for i in range(len(mol.atom_num)): 1113 # Skip a non-matching first atom. 1114 if sel_obj2.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1115 index2 = i 1116 break 1117 1118 # Connectivities exist. 1119 if index1 < len(mol.bonded): 1120 if index2 in mol.bonded[index1]: 1121 return True 1122 else: 1123 return False

1124 1125

1126 - def atom_loop(self, atom_id=None, str_id=None, model_num=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, index_flag=False, ave=False):

1127 """Generator function for looping over all atoms in the internal relax structural object. 1128 1129 This method should be designed as a U{generator<http://www.python.org/dev/peps/pep-0255/>}. It should loop over all atoms of the system yielding the following atomic information, if the corresponding flag is True, in tuple form: 1130 1131 1. Model number. 1132 2. Molecule name. 1133 3. Residue number. 1134 4. Residue name. 1135 5. Atom number. 1136 6. Atom name. 1137 7. The element name (its atomic symbol and optionally the isotope, e.g. 'N', 'Mg', '17O', '13C', etc). 1138 8. The position of the atom in Euclidean space. 1139 1140 1141 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be yielded. 1142 @type atom_id: str 1143 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 1144 @type str_id: str, int, or None 1145 @keyword model_num: Only loop over a specific model. 1146 @type model_num: int or None 1147 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 1148 @type mol_name_flag: bool 1149 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 1150 @type res_num_flag: bool 1151 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 1152 @type res_name_flag: bool 1153 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 1154 @type atom_num_flag: bool 1155 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 1156 @type atom_name_flag: bool 1157 @keyword element_flag: A flag which if True will cause the element name to be yielded. 1158 @type element_flag: bool 1159 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 1160 @type pos_flag: bool 1161 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 1162 @type index_flag: bool 1163 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 1164 @type ave: bool 1165 @return: A tuple of atomic information, as described in the docstring. 1166 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 1167 """ 1168 1169 # Check that the structure is loaded. 1170 if not len(self.structural_data): 1171 raise RelaxNoPdbError 1172 1173 # Generate the selection object. 1174 sel_obj = None 1175 if atom_id: 1176 sel_obj = Selection(atom_id) 1177 1178 # Obtain all data from the first model (except the position data). 1179 model = self.structural_data[0] 1180 1181 # Loop over the molecules. 1182 for mol_index in range(len(model.mol)): 1183 mol = model.mol[mol_index] 1184 1185 # Skip non-matching molecules. 1186 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1187 continue 1188 1189 # Loop over all atoms. 1190 for i in range(len(mol.atom_name)): 1191 # Skip non-matching atoms. 1192 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1193 continue 1194 1195 # Initialise. 1196 res_num = mol.res_num[i] 1197 res_name = mol.res_name[i] 1198 atom_num = mol.atom_num[i] 1199 atom_name = mol.atom_name[i] 1200 element = mol.element[i] 1201 1202 # The atom position. 1203 if pos_flag: 1204 # Average the position. 1205 if ave: 1206 # Initialise. 1207 pos = zeros(3, float64) 1208 1209 # Loop over the models. 1210 for j in range(len(self.structural_data)): 1211 # A single model. 1212 if model_num != None and self.structural_data[j].num != model_num: 1213 continue 1214 1215 # Alias. 1216 model_index = self.structural_data.model_indices[self.structural_data.model_list[j]] 1217 mol2 = self.structural_data[model_index].mol[mol_index] 1218 1219 # Some sanity checks. 1220 if mol2.atom_num[i] != atom_num: 1221 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.") 1222 1223 # Sum the atom positions. 1224 pos = pos + array([mol2.x[i], mol2.y[i], mol2.z[i]], float64) 1225 1226 # Average the position array (divide by the number of models). 1227 pos = pos / len(self.structural_data) 1228 1229 # All positions. 1230 else: 1231 # Initialise. 1232 pos = [] 1233 1234 # Loop over the models. 1235 for j in range(len(self.structural_data)): 1236 # A single model. 1237 if model_num != None and self.structural_data[j].num != model_num: 1238 continue 1239 1240 # Alias. 1241 model_index = self.structural_data.model_indices[self.structural_data.model_list[j]] 1242 mol2 = self.structural_data[model_index].mol[mol_index] 1243 1244 # Append the position. 1245 pos.append([mol2.x[i], mol2.y[i], mol2.z[i]]) 1246 1247 # Convert. 1248 pos = array(pos, float64) 1249 1250 # The molecule name. 1251 mol_name = mol.mol_name 1252 1253 # Build the tuple to be yielded. 1254 atomic_tuple = () 1255 if mol_name_flag: 1256 atomic_tuple = atomic_tuple + (mol_name,) 1257 if res_num_flag: 1258 atomic_tuple = atomic_tuple + (res_num,) 1259 if res_name_flag: 1260 atomic_tuple = atomic_tuple + (res_name,) 1261 if atom_num_flag: 1262 atomic_tuple = atomic_tuple + (atom_num,) 1263 if atom_name_flag: 1264 atomic_tuple = atomic_tuple + (atom_name,) 1265 if element_flag: 1266 atomic_tuple = atomic_tuple + (element,) 1267 if pos_flag: 1268 atomic_tuple = atomic_tuple + (pos,) 1269 if index_flag: 1270 atomic_tuple += (i,) 1271 1272 # Yield the information. 1273 if len(atomic_tuple) == 1: 1274 atomic_tuple = atomic_tuple[0] 1275 yield atomic_tuple

1276 1277

1278 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

1279 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'. 1280 1281 @keyword attached_atom: The name of the bonded atom. 1282 @type attached_atom: str 1283 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned. 1284 @type model_num: None or int 1285 @keyword mol_name: The name of the molecule that attached_atom belongs to. 1286 @type mol_name: str 1287 @keyword res_num: The number of the residue that attached_atom belongs to. 1288 @type res_num: str 1289 @keyword res_name: The name of the residue that attached_atom belongs to. 1290 @type res_name: str 1291 @keyword spin_num: The number of the spin that attached_atom is attached to. 1292 @type spin_num: str 1293 @keyword spin_name: The name of the spin that attached_atom is attached to. 1294 @type spin_name: str 1295 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors. 1296 @type return_name: bool 1297 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 1298 @type return_warnings: bool 1299 @return: The list of bond vectors for each model. 1300 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set) 1301 """ 1302 1303 # Initialise some objects. 1304 vectors = [] 1305 attached_name = None 1306 warnings = None 1307 1308 # Use the first model for the atom matching. 1309 model = self.structural_data[0] 1310 1311 # Loop over the molecules. 1312 for mol_index in range(len(model.mol)): 1313 # Alias. 1314 mol = model.mol[mol_index] 1315 1316 # Skip non-matching molecules. 1317 if mol_name and mol_name != mol.mol_name: 1318 continue 1319 1320 # Find the atomic index of the base atom. 1321 index = None 1322 for i in range(len(mol.atom_name)): 1323 # Residues don't match. 1324 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name): 1325 continue 1326 1327 # Atoms don't match. 1328 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name): 1329 continue 1330 1331 # Update the index and stop searching. 1332 index = i 1333 break 1334 1335 # Found the atom. 1336 if index != None: 1337 # Loop over the models. 1338 for j in range(len(self.structural_data)): 1339 # A single model. 1340 if model_num != None and self.structural_data[j].num != model_num: 1341 continue 1342 1343 # Alias the molecule. 1344 model_index = self.structural_data.model_indices[self.structural_data.model_list[j]] 1345 mol = self.structural_data[model_index].mol[mol_index] 1346 1347 # Get the atom bonded to this model/molecule/residue/atom. 1348 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol) 1349 1350 # No bonded atom. 1351 if (bonded_num, bonded_name, element) == (None, None, None): 1352 continue 1353 1354 # The bond vector. 1355 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64) 1356 1357 # Append the vector to the vectors array. 1358 vectors.append(vector) 1359 1360 # Not found. 1361 else: 1362 warnings = "Cannot find the atom in the structure" 1363 1364 # Build the tuple to be yielded. 1365 data = (vectors,) 1366 if return_name: 1367 data = data + (attached_name,) 1368 if return_warnings: 1369 data = data + (warnings,) 1370 1371 # Return the data. 1372 return data

1373 1374

1375 - def connect_atom(self, mol_name=None, index1=None, index2=None):

1376 """Connect two atoms in the structural data object. 1377 1378 @keyword mol_name: The name of the molecule. 1379 @type mol_name: str 1380 @keyword index1: The global index of the first atom. 1381 @type index1: str 1382 @keyword index2: The global index of the first atom. 1383 @type index2: str 1384 """ 1385 1386 # Add the molecule, if it does not exist. 1387 if self.get_molecule(mol_name) == None: 1388 self.add_molecule(name=mol_name) 1389 1390 # Loop over each model. 1391 for model in self.structural_data: 1392 # Specific molecule. 1393 mol = self.get_molecule(mol_name) 1394 1395 # Add the atom. 1396 mol.atom_connect(index1=index1, index2=index2)

1397 1398

1399 - def delete(self, atom_id=None):

1400 """Deletion of structural information. 1401 1402 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 1403 @type atom_id: str or None 1404 """ 1405 1406 # All data. 1407 if atom_id == None: 1408 # Print out. 1409 print("Deleting the following structural data:\n") 1410 print(self.structural_data) 1411 1412 # Delete the structural data. 1413 del self.structural_data 1414 1415 # Initialise the empty model list. 1416 self.structural_data = ModelList() 1417 1418 # Atom subset deletion. 1419 else: 1420 # Generate the selection object. 1421 sel_obj = None 1422 if atom_id: 1423 sel_obj = Selection(atom_id) 1424 1425 # Loop over the models. 1426 del_res_nums = [] 1427 for model in self.model_loop(): 1428 # Loop over the molecules. 1429 for mol_index in range(len(model.mol)): 1430 mol = model.mol[mol_index] 1431 1432 # Skip non-matching molecules. 1433 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 1434 continue 1435 1436 # Loop over the atoms. 1437 indices = [] 1438 for i in self.atom_loop(atom_id=atom_id, model_num=model.num, index_flag=True): 1439 indices.append(i) 1440 1441 # Generate a residue data dictionary for the metadata trimming (prior to atom deletion). 1442 res_data = self._residue_data(res_nums=mol.res_num, res_names=mol.res_name) 1443 1444 # Loop over the reverse indices and pop out the data. 1445 indices.reverse() 1446 for i in indices: 1447 mol.atom_num.pop(i) 1448 mol.atom_name.pop(i) 1449 mol.bonded.pop(i) 1450 mol.chain_id.pop(i) 1451 mol.element.pop(i) 1452 mol.pdb_record.pop(i) 1453 mol.res_name.pop(i) 1454 res_num = mol.res_num.pop(i) 1455 mol.seg_id.pop(i) 1456 mol.x.pop(i) 1457 mol.y.pop(i) 1458 mol.z.pop(i) 1459 1460 # The residue no longer exists. 1461 if res_num not in mol.res_num and res_num not in del_res_nums: 1462 del_res_nums.append(res_num) 1463 1464 # Nothing more to do. 1465 if not len(del_res_nums): 1466 return 1467 1468 # Fix the deleted residue number order. 1469 del_res_nums.reverse() 1470 1471 # Handle the helix metadata. 1472 del_helix_indices = [] 1473 for i in range(len(self.helices)): 1474 # Trim the helix. 1475 helix = self._trim_helix(helix=self.helices[i], trim_res_list=del_res_nums, res_data=res_data) 1476 1477 # Trimmed helix. 1478 if helix != None: 1479 self.helices[i] = helix 1480 1481 # No helix left. 1482 else: 1483 del_helix_indices.append(i) 1484 1485 # Loop over the reverse helix indices and pop out the data. 1486 del_helix_indices.reverse() 1487 for i in del_helix_indices: 1488 self.helices.pop(i) 1489 1490 # Handle the sheet metadata. 1491 del_sheet_indices = [] 1492 for i in range(len(self.sheets)): 1493 # Trim the sheet. 1494 sheet = self._trim_sheet(sheet=self.sheets[i], trim_res_list=del_res_nums, res_data=res_data) 1495 1496 # Trimmed sheet. 1497 if sheet != None: 1498 self.sheets[i] = sheet 1499 1500 # No sheet left. 1501 else: 1502 del_sheet_indices.append(i) 1503 1504 # Loop over the reverse sheet indices and pop out the data. 1505 del_sheet_indices.reverse() 1506 for i in del_sheet_indices: 1507 self.sheets.pop(i)

1508 1509

1510 - def empty(self):

1511 """Report if the structural data structure is empty or not. 1512 1513 @return: True if empty, False otherwise. 1514 @rtype: bool 1515 """ 1516 1517 # Check the ModelList structure. 1518 if len(self.structural_data) == 0: 1519 return True 1520 else: 1521 return False

1522 1523

1524 - def from_xml(self, str_node, dir=None, file_version=1):

1525 """Recreate the structural object from the XML structural object node. 1526 1527 @param str_node: The structural object XML node. 1528 @type str_node: xml.dom.minicompat.Element instance 1529 @keyword dir: The name of the directory containing the results file. 1530 @type dir: str 1531 @keyword file_version: The relax XML version of the XML file. 1532 @type file_version: int 1533 """ 1534 1535 # Recreate all base objects (i.e. metadata). 1536 xml_to_object(str_node, self, file_version=file_version, blacklist=['model', 'displacements']) 1537 1538 # Recreate the model / molecule data structure. 1539 model_nodes = str_node.getElementsByTagName('model') 1540 self.structural_data.from_xml(model_nodes, file_version=file_version) 1541 1542 # The displacement structure. 1543 disp_nodes = str_node.getElementsByTagName('displacements') 1544 if len(disp_nodes): 1545 # Initialise the object. 1546 self.displacements = Displacements() 1547 1548 # Recreate the molecule data structures for the current model. 1549 self.displacements.from_xml(disp_nodes[0], file_version=file_version)

1550 1551

1552 - def get_model(self, model):

1553 """Return or create the model. 1554 1555 @param model: The model number. 1556 @type model: int or None 1557 @return: The ModelContainer corresponding to the model number or that newly created. 1558 @rtype: ModelContainer instance 1559 """ 1560 1561 # Check if the target is a single model. 1562 if model == None and self.num_models() > 1: 1563 raise RelaxError("The target model cannot be determined as there are %s models already present." % self.num_modes()) 1564 1565 # No model specified. 1566 if model == None: 1567 # Create the first model, if necessary. 1568 if self.num_models(): 1569 self.structural_data.add_item() 1570 1571 # Alias the first model. 1572 model_cont = self.structural_data[0] 1573 1574 # The model has been specified. 1575 else: 1576 # Get the preexisting model. 1577 found = False 1578 for model_cont in self.structural_data: 1579 if model_cont.num == model: 1580 found = True 1581 break 1582 1583 # Add the model if it doesn't exist. 1584 if not found: 1585 self.structural_data.add_item(model) 1586 model_cont = self.structural_data[-1]

1587 1588

1589 - def get_molecule(self, molecule, model=None):

1590 """Return the molecule. 1591 1592 Only one model can be specified. 1593 1594 1595 @param molecule: The molecule name. 1596 @type molecule: int or None 1597 @keyword model: The model number. 1598 @type model: int or None 1599 @raises RelaxError: If the model is not specified and there is more than one model loaded. 1600 @return: The MolContainer corresponding to the molecule name and model number. 1601 @rtype: MolContainer instance or None 1602 """ 1603 1604 # Check if the target is a single molecule. 1605 if model == None and self.num_models() > 1: 1606 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 1607 1608 # Check the model argument. 1609 if not isinstance(model, int) and not model == None: 1610 raise RelaxNoneIntError 1611 1612 # No models. 1613 if not len(self.structural_data): 1614 return 1615 1616 # Loop over the models. 1617 for model_cont in self.model_loop(model): 1618 # Loop over the molecules. 1619 for mol in model_cont.mol: 1620 # Return the matching molecule. 1621 if mol.mol_name == molecule: 1622 return mol

1623 1624

1625 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):

1626 """Method for loading structures from a PDB file. 1627 1628 @param file_path: The full path of the PDB file. 1629 @type file_path: str 1630 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1631 @type read_mol: None, int, or list of int 1632 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1633 @type set_mol_name: None, str, or list of str 1634 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 1635 @type read_model: None, int, or list of int 1636 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 1637 @type set_model_num: None, int, or list of int 1638 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 1639 @type alt_loc: str or None 1640 @keyword verbosity: A flag which if True will cause messages to be printed. 1641 @type verbosity: bool 1642 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 1643 @type merge: bool 1644 @return: The status of the loading of the PDB file. 1645 @rtype: bool 1646 """ 1647 1648 # Initial printout. 1649 if verbosity: 1650 print("\nInternal relax PDB parser.") 1651 1652 # Test if the file exists. 1653 if not access(file_path, F_OK): 1654 # Exit indicating failure. 1655 return False 1656 1657 # Separate the file name and path. 1658 path, file = os.path.split(file_path) 1659 1660 # The absolute path. 1661 path_abs = abspath(curdir) + sep + path 1662 1663 # Convert the structure reading args into lists. 1664 if read_mol and not isinstance(read_mol, list): 1665 read_mol = [read_mol] 1666 if set_mol_name and not isinstance(set_mol_name, list): 1667 set_mol_name = [set_mol_name] 1668 if read_model and not isinstance(read_model, list): 1669 read_model = [read_model] 1670 if set_model_num and not isinstance(set_model_num, list): 1671 set_model_num = [set_model_num] 1672 1673 # Open the PDB file. 1674 pdb_file = open_read_file(file_path) 1675 pdb_lines = pdb_file.readlines() 1676 pdb_file.close() 1677 1678 # Check for empty files. 1679 if pdb_lines == []: 1680 raise RelaxError("The PDB file is empty.") 1681 1682 # Process the different sections. 1683 pdb_lines = self._parse_pdb_title(pdb_lines) 1684 pdb_lines = self._parse_pdb_prim_struct(pdb_lines) 1685 pdb_lines = self._parse_pdb_hetrogen(pdb_lines) 1686 pdb_lines = self._parse_pdb_ss(pdb_lines) 1687 pdb_lines = self._parse_pdb_connectivity_annotation(pdb_lines) 1688 pdb_lines = self._parse_pdb_misc(pdb_lines) 1689 pdb_lines = self._parse_pdb_transform(pdb_lines) 1690 1691 # Loop over all models in the PDB file. 1692 model_index = 0 1693 orig_model_num = [] 1694 mol_conts = [] 1695 for model_num, model_records in self._parse_pdb_coord(pdb_lines): 1696 # Only load the desired model. 1697 if read_model and model_num not in read_model: 1698 continue 1699 1700 # Store the original model number. 1701 orig_model_num.append(model_num) 1702 1703 # Loop over the molecules of the model. 1704 mol_conts.append([]) 1705 mol_index = 0 1706 orig_mol_num = [] 1707 new_mol_name = [] 1708 for mol_num, mol_records in self._parse_mols(model_records): 1709 # Only load the desired model. 1710 if read_mol and mol_num not in read_mol: 1711 continue 1712 1713 # Set the target molecule name. 1714 if set_mol_name: 1715 new_mol_name.append(set_mol_name[mol_index]) 1716 else: 1717 # Number of structures already present for the model. 1718 num_struct = 0 1719 for model in self.structural_data: 1720 if not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == model.num): 1721 num_struct = len(model.mol) 1722 1723 # Set the name to the file name plus the structure number. 1724 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct)) 1725 1726 # Store the original mol number. 1727 orig_mol_num.append(mol_num) 1728 1729 # Generate the molecule container. 1730 mol = MolContainer() 1731 1732 # Fill the molecular data object. 1733 mol.fill_object_from_pdb(mol_records, alt_loc_select=alt_loc) 1734 1735 # Store the molecule container. 1736 mol_conts[model_index].append(mol) 1737 1738 # Increment the molecule index. 1739 mol_index = mol_index + 1 1740 1741 # Increment the model index. 1742 model_index = model_index + 1 1743 1744 # No data, so throw a warning and exit. 1745 if not len(mol_conts): 1746 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path)) 1747 return False 1748 1749 # Create the structural data data structures. 1750 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, merge=merge) 1751 1752 # Loading worked. 1753 return True

1754 1755

1756 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

1757 """Method for loading structures from a XYZ file. 1758 1759 @param file_path: The full path of the XYZ file. 1760 @type file_path: str 1761 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1762 @type read_mol: None, int, or list of int 1763 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1764 @type set_mol_name: None, str, or list of str 1765 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models will be loaded. 1766 @type read_model: None, int, or list of int 1767 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the XYZ model numbers will be preserved, if they exist. 1768 @type set_model_num: None, int, or list of int 1769 @keyword verbosity: A flag which if True will cause messages to be printed. 1770 @type verbosity: bool 1771 @return: The status of the loading of the XYZ file. 1772 @rtype: bool 1773 """ 1774 1775 # Initial printout. 1776 if verbosity: 1777 print("\nInternal relax XYZ parser.") 1778 1779 # Test if the file exists. 1780 if not access(file_path, F_OK): 1781 # Exit indicating failure. 1782 return False 1783 1784 # Separate the file name and path. 1785 path, file = os.path.split(file_path) 1786 1787 # The absolute path. 1788 path_abs = abspath(curdir) + sep + path 1789 1790 # Convert the structure reading args into lists. 1791 if read_mol and not isinstance(read_mol, list): 1792 read_mol = [read_mol] 1793 if set_mol_name and not isinstance(set_mol_name, list): 1794 set_mol_name = [set_mol_name] 1795 if read_model and not isinstance(read_model, list): 1796 read_model = [read_model] 1797 if set_model_num and not isinstance(set_model_num, list): 1798 set_model_num = [set_model_num] 1799 1800 # Loop over all models in the XYZ file. 1801 mol_index = 0 1802 model_index = 0 1803 xyz_model_increment = 0 1804 orig_model_num = [] 1805 mol_conts = [] 1806 orig_mol_num = [] 1807 new_mol_name = [] 1808 for model_records in self._parse_models_xyz(file_path): 1809 # Increment the xyz_model_increment 1810 xyz_model_increment = xyz_model_increment +1 1811 1812 # Only load the desired model. 1813 if read_model and xyz_model_increment not in read_model: 1814 continue 1815 1816 # Store the original model number. 1817 orig_model_num.append(model_index) 1818 1819 # Loop over the molecules of the model. 1820 if read_mol and mol_index not in read_mol: 1821 continue 1822 1823 # Set the target molecule name. 1824 if set_mol_name: 1825 new_mol_name.append(set_mol_name[mol_index]) 1826 else: 1827 if mol_index == 0: 1828 #Set the name to the file name plus the structure number. 1829 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1)) 1830 1831 # Store the original mol number. 1832 orig_mol_num.append(mol_index) 1833 1834 # Generate the molecule container. 1835 mol = MolContainer() 1836 1837 # Fill the molecular data object. 1838 mol.fill_object_from_xyz(model_records) 1839 1840 # Store the molecule container. 1841 mol_conts.append([]) 1842 mol_conts[model_index].append(mol) 1843 1844 # Increment the molecule index. 1845 mol_index = mol_index + 1 1846 1847 # Increment the model index. 1848 model_index = model_index + 1 1849 1850 # Create the structural data data structures. 1851 orig_mol_num = [0] 1852 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs) 1853 1854 # Loading worked. 1855 return True

1856 1857

1858 - def model_loop(self, model=None):

1859 """Generator method for looping over the models in numerical order. 1860 1861 @keyword model: Limit the loop to a single number. 1862 @type model: int 1863 @return: The model structural object. 1864 @rtype: ModelContainer container 1865 """ 1866 1867 # A single model. 1868 if model: 1869 for i in range(len(self.structural_data)): 1870 if self.structural_data[i].num == model: 1871 yield self.structural_data[i] 1872 1873 # All models. 1874 else: 1875 # The models. 1876 model_nums = [] 1877 for i in range(len(self.structural_data)): 1878 if self.structural_data[i].num != None: 1879 model_nums.append(self.structural_data[i].num) 1880 1881 # Sort. 1882 if model_nums: 1883 model_nums.sort() 1884 1885 # Loop over the models in order. 1886 for model_num in model_nums: 1887 # Find the model. 1888 for i in range(len(self.structural_data)): 1889 # Yield the model. 1890 if self.structural_data[i].num == model_num: 1891 yield self.structural_data[i] 1892 1893 # No models, so just yield the single container. 1894 if not model_nums: 1895 yield self.structural_data[0]

1896 1897

1898 - def num_models(self):

1899 """Method for returning the number of models. 1900 1901 @return: The number of models in the structural object. 1902 @rtype: int 1903 """ 1904 1905 return len(self.structural_data)

1906 1907

1908 - def num_molecules(self):

1909 """Method for returning the number of molecules. 1910 1911 @return: The number of molecules in the structural object. 1912 @rtype: int 1913 """ 1914 1915 # No data. 1916 if self.empty(): 1917 return 0 1918 1919 # Validate the structural object. 1920 self.validate() 1921 1922 # Return the number. 1923 return len(self.structural_data[0].mol)

1924 1925

1926 - def pack_structs(self, data_matrix, orig_model_num=None, set_model_num=None, orig_mol_num=None, set_mol_name=None, file_name=None, file_path=None, file_path_abs=None, merge=False):

1927 """From the given structural data, expand the structural data data structure. 1928 1929 @param data_matrix: A matrix of structural objects. 1930 @type data_matrix: list of lists of structural objects 1931 @keyword orig_model_num: The original model numbers (for storage). 1932 @type orig_model_num: list of int 1933 @keyword set_model_num: The new model numbers (for model renumbering). 1934 @type set_model_num: list of int 1935 @keyword orig_mol_num: The original molecule numbers (for storage). 1936 @type orig_mol_num: list of int 1937 @keyword set_mol_name: The new molecule names. 1938 @type set_mol_name: list of str 1939 @keyword file_name: The name of the file from which the molecular data has been extracted. 1940 @type file_name: None or str 1941 @keyword file_path: The full path to the file specified by 'file_name'. 1942 @type file_path: None or str 1943 @keyword file_path_abs: The absolute path to the file specified by 'file_name'. This is a fallback mechanism in case results or save files are located somewhere other than the working directory. 1944 @type file_path_abs: None or str 1945 @keyword merge: A flag which if set to True will try to merge the structure into the currently loaded structures. 1946 @type merge: bool 1947 """ 1948 1949 # Test the number of models. 1950 if len(orig_model_num) != len(data_matrix): 1951 raise RelaxError("Structural data mismatch, %s original models verses %s in the structural data." % (len(orig_model_num), len(data_matrix))) 1952 1953 # Test the number of molecules. 1954 if len(orig_mol_num) != len(data_matrix[0]): 1955 raise RelaxError("Structural data mismatch, %s original molecules verses %s in the structural data." % (len(orig_mol_num), len(data_matrix[0]))) 1956 1957 # Model numbers do not change. 1958 if not set_model_num: 1959 set_model_num = orig_model_num 1960 1961 # Test the model mapping. 1962 if len(set_model_num) != len(data_matrix): 1963 raise RelaxError("Failure of the mapping of new model numbers, %s new model numbers verses %s models in the structural data." % (len(set_model_num), len(data_matrix))) 1964 1965 # Test the molecule mapping. 1966 if len(set_mol_name) != len(data_matrix[0]): 1967 raise RelaxError("Failure of the mapping of new molecule names, %s new molecule names verses %s molecules in the structural data." % (len(set_mol_name), len(data_matrix[0]))) 1968 1969 # Test that the target models and structures are absent, and get the already existing model numbers. 1970 current_models = [] 1971 for i in range(len(self.structural_data)): 1972 # Create a list of current models. 1973 current_models.append(self.structural_data[i].num) 1974 1975 # Loop over the structures. 1976 for j in range(len(self.structural_data[i].mol)): 1977 if not merge and self.structural_data[i].num in set_model_num and self.structural_data[i].mol[j].mol_name in set_mol_name: 1978 raise RelaxError("The molecule '%s' of model %s already exists." % (self.structural_data[i].mol[j].mol_name, self.structural_data[i].num)) 1979 1980 # Loop over the models. 1981 for i in range(len(set_model_num)): 1982 # The model doesn't currently exist. 1983 if set_model_num[i] not in current_models: 1984 # Create the model. 1985 self.structural_data.add_item(set_model_num[i]) 1986 1987 # Add the model number to the current_models list. 1988 current_models.append(set_model_num[i]) 1989 1990 # Get the model. 1991 model = self.structural_data[-1] 1992 1993 # Otherwise get the pre-existing model. 1994 else: 1995 model = self.structural_data[current_models.index(set_model_num[i])] 1996 1997 # Loop over the molecules. 1998 for j in range(len(set_mol_name)): 1999 # Print out. 2000 if merge: 2001 print("Merging with model %s of molecule '%s' (from the original molecule number %s of model %s)" % (set_model_num[i], set_mol_name[j], orig_mol_num[j], orig_model_num[i])) 2002 else: 2003 print("Adding molecule '%s' to model %s (from the original molecule number %s of model %s)" % (set_mol_name[j], set_model_num[i], orig_mol_num[j], orig_model_num[i])) 2004 2005 # The index of the new molecule to add or merge. 2006 index = len(model.mol) 2007 if merge: 2008 index -= 1 2009 2010 # Consistency check. 2011 if model.num != self.structural_data[0].num and self.structural_data[0].mol[index].mol_name != set_mol_name[j]: 2012 raise RelaxError("The new molecule name of '%s' in model %s does not match the corresponding molecule's name of '%s' in model %s." % (set_mol_name[j], set_model_num[i], self.structural_data[0].mol[index].mol_name, self.structural_data[0].num)) 2013 2014 # Pack the structures. 2015 if merge: 2016 mol = model.mol.merge_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2017 else: 2018 mol = model.mol.add_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2019 2020 # Set the molecule name and store the structure file info. 2021 mol.mol_name = set_mol_name[j] 2022 mol.file_name = file_name 2023 mol.file_path = file_path 2024 mol.file_path_abs = file_path_abs 2025 mol.file_mol_num = orig_mol_num[j] 2026 mol.file_model = orig_model_num[i]

2027 2028

2029 - def rotate(self, R=None, origin=None, model=None, atom_id=None):

2030 """Rotate the structural information about the given origin. 2031 2032 @keyword R: The forwards rotation matrix. 2033 @type R: numpy 3D, rank-2 array 2034 @keyword origin: The origin of the rotation. 2035 @type origin: numpy 3D, rank-1 array 2036 @keyword model: The model to rotate. If None, all models will be rotated. 2037 @type model: int 2038 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2039 @type atom_id: str or None 2040 """ 2041 2042 # Generate the selection object. 2043 sel_obj = None 2044 if atom_id: 2045 sel_obj = Selection(atom_id) 2046 2047 # Loop over the models. 2048 for model_cont in self.model_loop(model): 2049 # Loop over the molecules. 2050 for mol in model_cont.mol: 2051 # Skip non-matching molecules. 2052 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2053 continue 2054 2055 # Loop over the atoms. 2056 for i in range(len(mol.atom_num)): 2057 # Skip non-matching atoms. 2058 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2059 continue 2060 2061 # The origin to atom vector. 2062 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin 2063 2064 # Rotation. 2065 rot_vect = dot(R, vect) 2066 2067 # The new position. 2068 pos = rot_vect + origin 2069 mol.x[i] = pos[0] 2070 mol.y[i] = pos[1] 2071 mol.z[i] = pos[2]

2072 2073

2074 - def target_mol_name(self, set=None, target=None, index=None, mol_num=None, file=None):

2075 """Add the new molecule name to the target data structure. 2076 2077 @keyword set: The list of new molecule names. If not supplied, the names will be generated from the file name. 2078 @type set: None or list of str 2079 @keyword target: The target molecule name data structure to which the new name will be appended. 2080 @type target: list 2081 @keyword index: The molecule index, matching the set argument. 2082 @type index: int 2083 @keyword mol_num: The molecule number. 2084 @type mol_num: int 2085 @keyword file: The name of the file, excluding all directories. 2086 @type file: str 2087 """ 2088 2089 # Set the target molecule name. 2090 if set: 2091 target.append(set[index]) 2092 else: 2093 # Set the name to the file name plus the structure number. 2094 target.append(file_root(file) + '_mol' + repr(mol_num))

2095 2096

2097 - def translate(self, T=None, model=None, atom_id=None):

2098 """Displace the structural information by the given translation vector. 2099 2100 @keyword T: The translation vector. 2101 @type T: numpy 3D, rank-1 array 2102 @keyword model: The model to rotate. If None, all models will be rotated. 2103 @type model: int 2104 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2105 @type atom_id: str or None 2106 """ 2107 2108 # Generate the selection object. 2109 sel_obj = None 2110 if atom_id: 2111 sel_obj = Selection(atom_id) 2112 2113 # Loop over the models. 2114 for model_cont in self.model_loop(model): 2115 # Loop over the molecules. 2116 for mol in model_cont.mol: 2117 # Skip non-matching molecules. 2118 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2119 continue 2120 2121 # Loop over the atoms. 2122 for i in range(len(mol.atom_num)): 2123 # Skip non-matching atoms. 2124 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2125 continue 2126 2127 # Translate. 2128 mol.x[i] = mol.x[i] + T[0] 2129 mol.y[i] = mol.y[i] + T[1] 2130 mol.z[i] = mol.z[i] + T[2]

2131 2132

2133 - def to_xml(self, doc, element):

2134 """Prototype method for converting the structural object to an XML representation. 2135 2136 @param doc: The XML document object. 2137 @type doc: xml.dom.minidom.Document instance 2138 @param element: The element to add the alignment tensors XML element to. 2139 @type element: XML element object 2140 """ 2141 2142 # Create the structural element and add it to the higher level element. 2143 str_element = doc.createElement('structure') 2144 element.appendChild(str_element) 2145 2146 # Set the structural attributes. 2147 str_element.setAttribute('desc', 'Structural information') 2148 2149 # No contents to store, so pack up the structural containers. 2150 if not self.structural_data.is_empty(): 2151 self.structural_data.to_xml(doc, str_element) 2152 2153 # The structural metadata. 2154 metadata = ['helices', 'sheets'] 2155 for name in metadata: 2156 # The metadata does not exist. 2157 if not hasattr(self, name): 2158 continue 2159 2160 # Get the object. 2161 obj = getattr(self, name) 2162 2163 # Create a new element for this object, and add it to the main element. 2164 sub_elem = doc.createElement(name) 2165 str_element.appendChild(sub_elem) 2166 2167 # Add the value to the sub element. 2168 object_to_xml(doc, sub_elem, value=obj) 2169 2170 # The displacement structure. 2171 if hasattr(self, 'displacements'): 2172 # Create an XML element. 2173 disp_element = doc.createElement('displacements') 2174 str_element.appendChild(disp_element) 2175 2176 # Set the attributes. 2177 disp_element.setAttribute('desc', 'The rotational and translational displacements between models') 2178 2179 # Add the displacement data. 2180 self.displacements.to_xml(doc, disp_element)

2181 2182

2183 - def validate_models(self):

2184 """Check that the models are consistent with each other. 2185 2186 This checks that the primary structure is identical between the models. 2187 """ 2188 2189 # Print out. 2190 print("Validating models:") 2191 2192 # Loop over the models. 2193 for i in range(len(self.structural_data)): 2194 # Check the molecules. 2195 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol): 2196 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol))) 2197 2198 # Loop over the molecules. 2199 for j in range(len(self.structural_data[i].mol)): 2200 # Alias the molecules. 2201 mol = self.structural_data[i].mol[j] 2202 mol_ref = self.structural_data[0].mol[j] 2203 2204 # Check the names. 2205 if mol.mol_name != mol_ref.mol_name: 2206 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name)) 2207 2208 # Loop over the atoms. 2209 for k in range(len(mol.atom_name)): 2210 # Create pseudo-pdb formatted records (with no atomic coordinates). 2211 atom = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '') 2212 atom_ref = "%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '') 2213 2214 # Check the atom info. 2215 if atom != atom_ref: 2216 print(atom) 2217 print(atom_ref) 2218 raise RelaxError("The atoms of model %i do not match the first model." % self.structural_data[i].num) 2219 2220 # Final printout. 2221 print("\tAll models are consistent")

2222 2223

2224 - def write_pdb(self, file, model_num=None):

2225 """Method for the creation of a PDB file from the structural data. 2226 2227 A number of PDB records including HET, HETNAM, FORMUL, HELIX, SHEET, HETATM, TER, CONECT, MASTER, and END are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data structure 'het_data' is created. It is an array of arrays where the first dimension corresponds to a different residue and the second dimension has the elements: 2228 2229 0. Residue number. 2230 1. Residue name. 2231 2. Chain ID. 2232 3. Total number of atoms in the residue. 2233 4. Number of H atoms in the residue. 2234 5. Number of C atoms in the residue. 2235 2236 2237 @param file: The PDB file object. This object must be writable. 2238 @type file: file object 2239 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 2240 @type model_num: None or int 2241 """ 2242 2243 # Validate the structural data. 2244 self.validate() 2245 2246 # Initialise record counts. 2247 num_hetatm = 0 2248 num_atom = 0 2249 num_ter = 0 2250 num_conect = 0 2251 2252 # Print out. 2253 print("\nCreating the PDB records\n") 2254 2255 # Write some initial remarks. 2256 print("REMARK") 2257 pdb_write.remark(file, num=4, remark="This file complies with format v. 3.30, Jul-2011.") 2258 pdb_write.remark(file, num=40, remark="Created by relax (http://www.nmr-relax.com).") 2259 num_remark = 2 2260 2261 # Determine if model records will be created. 2262 model_records = False 2263 for model in self.model_loop(): 2264 if hasattr(model, 'num') and model.num != None: 2265 model_records = True 2266 2267 2268 #################### 2269 # Hetrogen section # 2270 #################### 2271 2272 # Initialise the hetrogen info array. 2273 het_data = [] 2274 het_data_coll = [] 2275 2276 # Loop over the molecules of the first model. 2277 index = 0 2278 for mol in self.structural_data[0].mol: 2279 # Check the validity of the data. 2280 self._validate_data_arrays(mol) 2281 2282 # Append an empty array for this molecule. 2283 het_data.append([]) 2284 2285 # Collect the non-standard residue info. 2286 for i in range(len(mol.atom_name)): 2287 # Skip non-HETATM records and HETATM records with no residue info. 2288 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None: 2289 continue 2290 2291 # If the residue is not already stored initialise a new het_data element. 2292 # (residue number, residue name, chain ID, number of atoms, atom count array). 2293 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]: 2294 het_data[index].append([mol.res_num[i], mol.res_name[i], mol.chain_id[i], 0, []]) 2295 2296 # Catch missing chain_ids. 2297 if het_data[index][-1][2] == None: 2298 het_data[index][-1][2] = '' 2299 2300 # Total atom count. 2301 het_data[index][-1][3] = het_data[index][-1][3] + 1 2302 2303 # Find if the atom has already a count entry. 2304 entry = False 2305 for j in range(len(het_data[index][-1][4])): 2306 if mol.element[i] == het_data[index][-1][4][j][0]: 2307 entry = True 2308 2309 # Create a new specific atom count entry. 2310 if not entry: 2311 het_data[index][-1][4].append([mol.element[i], 0]) 2312 2313 # Increment the specific atom count. 2314 for j in range(len(het_data[index][-1][4])): 2315 if mol.element[i] == het_data[index][-1][4][j][0]: 2316 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1 2317 2318 # Create the collective hetrogen info data structure. 2319 for i in range(len(het_data[index])): 2320 # Find the entry in the collective structure. 2321 found = False 2322 for j in range(len(het_data_coll)): 2323 # Matching residue numbers. 2324 if het_data[index][i][0] == het_data_coll[j][0]: 2325 # Change the flag. 2326 found = True 2327 2328 # The checks. 2329 if het_data_coll[j][1] != het_data[index][i][1]: 2330 raise RelaxError("The " + repr(het_data[index][i][1]) + " residue name of hetrogen " + repr(het_data[index][i][0]) + " " + het_data[index][i][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][1]) + " name of the previous structures.") 2331 2332 elif het_data_coll[j][2] != het_data[index][i][2]: 2333 raise RelaxError("The hetrogen chain id " + repr(het_data[index][i][2]) + " does not match " + repr(het_data_coll[j][2]) + " of residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of the previous structures.") 2334 2335 elif het_data_coll[j][3] != het_data[index][i][3]: 2336 raise RelaxError("The " + repr(het_data[index][i][3]) + " atoms of hetrogen " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " does not match the " + repr(het_data_coll[j][3]) + " of the previous structures.") 2337 2338 elif het_data_coll[j][4] != het_data[index][i][4]: 2339 raise RelaxError("The atom counts " + repr(het_data[index][i][4]) + " for the hetrogen residue " + repr(het_data_coll[j][0]) + " " + het_data_coll[j][1] + " of structure " + repr(index) + " do not match the counts " + repr(het_data_coll[j][4]) + " of the previous structures.") 2340 2341 # If there is no match, add the new residue to the collective. 2342 if not found: 2343 het_data_coll.append(het_data[index][i]) 2344 2345 # Increment the molecule index. 2346 index = index + 1 2347 2348 2349 # The HET records. 2350 ################## 2351 2352 # Print out. 2353 print("HET") 2354 2355 # Write the HET records. 2356 for het in het_data_coll: 2357 pdb_write.het(file, het_id=het[1], chain_id=het[2], seq_num=het[0], num_het_atoms=het[3]) 2358 2359 2360 # The HETNAM records. 2361 ##################### 2362 2363 # Print out. 2364 print("HETNAM") 2365 2366 # Loop over the non-standard residues. 2367 residues = [] 2368 for het in het_data_coll: 2369 # Test if the residue HETNAM record as already been written (otherwise store its name). 2370 if het[1] in residues: 2371 continue 2372 else: 2373 residues.append(het[1]) 2374 2375 # Get the chemical name. 2376 chemical_name = self._get_chemical_name(het[1]) 2377 if not chemical_name: 2378 chemical_name = 'Unknown' 2379 2380 # Write the HETNAM records. 2381 pdb_write.hetnam(file, het_id=het[1], text=chemical_name) 2382 2383 2384 # The FORMUL records. 2385 ##################### 2386 2387 # Print out. 2388 print("FORMUL") 2389 2390 # Loop over the non-standard residues and generate and write the chemical formula. 2391 residues = [] 2392 for i in range(len(het_data_coll)): 2393 # Alias. 2394 het = het_data_coll[i] 2395 2396 # Test if the residue HETNAM record as already been written (otherwise store its name). 2397 if het[1] in residues: 2398 continue 2399 else: 2400 residues.append(het[1]) 2401 2402 # Initialise the chemical formula. 2403 formula = '' 2404 2405 # Loop over the atoms. 2406 for atom_count in het[4]: 2407 formula = formula + atom_count[0] + repr(atom_count[1]) 2408 2409 # The FORMUL record (chemical formula). 2410 pdb_write.formul(file, comp_num=i+1, het_id=het[1], text=formula) 2411 2412 2413 ############################### 2414 # Secondary structure section # 2415 ############################### 2416 2417 # The HELIX records. 2418 #################### 2419 2420 if hasattr(self, 'helices') and len(self.helices): 2421 # Printout. 2422 print("HELIX") 2423 2424 # Loop over and unpack the helix data. 2425 index = 1 2426 for helix_id, init_chain_id, init_res_name, init_seq_num, end_chain_id, end_res_name, end_seq_num, helix_class, length in self.helices: 2427 pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=init_chain_id, init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=end_chain_id, end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) 2428 index += 1 2429 2430 # The SHEET records. 2431 #################### 2432 2433 if hasattr(self, 'sheets') and len(self.sheets): 2434 # Printout. 2435 print("SHEET") 2436 2437 # Loop over and unpack the helix data. 2438 index = 1 2439 for strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode in self.sheets: 2440 pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) 2441 index += 1 2442 2443 2444 ###################### 2445 # Coordinate section # 2446 ###################### 2447 2448 # Loop over the models. 2449 for model in self.model_loop(model_num): 2450 # MODEL record, for multiple models. 2451 #################################### 2452 2453 if model_records: 2454 # Print out. 2455 print("\nMODEL %s" % model.num) 2456 2457 # Write the model record. 2458 pdb_write.model(file, serial=model.num) 2459 2460 2461 # Add the atomic coordinate records (ATOM, HETATM, and TER). 2462 ############################################################ 2463 2464 # Loop over the molecules. 2465 for mol in model.mol: 2466 # Print out. 2467 print("ATOM, HETATM, TER") 2468 2469 # Loop over the atomic data. 2470 atom_record = False 2471 for i in range(len(mol.atom_name)): 2472 # Write the ATOM record. 2473 if mol.pdb_record[i] in [None, 'ATOM']: 2474 atom_record = True 2475 2476 # The atom number, if missing. 2477 atom_num = mol.atom_num[i] 2478 if atom_num == None: 2479 atom_num = i + 1 2480 2481 # Handle the funky atom name alignment. From the PDB format documents: 2482 # "Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13." 2483 if len(mol.atom_name[i]) == 1: 2484 atom_name = " %s" % mol.atom_name[i] 2485 else: 2486 atom_name = "%s" % mol.atom_name[i] 2487 2488 # Write out. 2489 pdb_write.atom(file, serial=atom_num, name=atom_name, res_name=mol.res_name[i], chain_id=self._translate(mol.chain_id[i]), res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0, element=mol.element[i]) 2490 num_atom = num_atom + 1 2491 2492 # Info for the TER record. 2493 ter_num = atom_num + 1 2494 ter_name = mol.res_name[i] 2495 ter_chain_id = mol.chain_id[i] 2496 ter_res_num = mol.res_num[i] 2497 2498 # Finish the ATOM section with the TER record. 2499 if atom_record: 2500 pdb_write.ter(file, serial=ter_num, res_name=ter_name, chain_id=self._translate(ter_chain_id), res_seq=ter_res_num) 2501 num_ter = num_ter + 1 2502 2503 # Loop over the atomic data. 2504 count_shift = False 2505 for i in range(len(mol.atom_name)): 2506 # Write the HETATM record. 2507 if mol.pdb_record[i] == 'HETATM': 2508 # The atom number, if missing. 2509 atom_num = mol.atom_num[i] 2510 if atom_num == None: 2511 atom_num = i + 1 2512 2513 # Increment the atom number if a TER record was created. 2514 if atom_record and atom_num == ter_num: 2515 count_shift = True 2516 if atom_record and count_shift: 2517 atom_num += 1 2518 2519 # Write out. 2520 pdb_write.hetatm(file, serial=atom_num, name=self._translate(mol.atom_name[i]), res_name=mol.res_name[i], chain_id=self._translate(mol.chain_id[i]), res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0.0, element=mol.element[i]) 2521 num_hetatm = num_hetatm + 1 2522 2523 2524 # ENDMDL record, for multiple structures. 2525 ######################################## 2526 2527 if model_records: 2528 print("ENDMDL") 2529 pdb_write.endmdl(file) 2530 2531 2532 # Create the CONECT records. 2533 ############################ 2534 2535 # Print out. 2536 print("CONECT") 2537 2538 # Loop over the molecules of the first model. 2539 for mol in self.structural_data[0].mol: 2540 # Loop over the atoms. 2541 for i in range(len(mol.atom_name)): 2542 # No bonded atoms, hence no CONECT record is required. 2543 if not len(mol.bonded[i]): 2544 continue 2545 2546 # Initialise some data structures. 2547 flush = 0 2548 bonded_index = 0 2549 bonded = ['', '', '', ''] 2550 2551 # Loop over the bonded atoms. 2552 for j in range(len(mol.bonded[i])): 2553 # End of the array, hence create the CONECT record in this iteration. 2554 if j == len(mol.bonded[i])-1: 2555 flush = True 2556 2557 # Only four covalently bonded atoms allowed in one CONECT record. 2558 if bonded_index == 3: 2559 flush = True 2560 2561 # Get the bonded atom index. 2562 bonded[bonded_index] = mol.bonded[i][j] 2563 2564 # Increment the bonded_index value. 2565 bonded_index = bonded_index + 1 2566 2567 # Generate the CONECT record and increment the counter. 2568 if flush: 2569 # Convert the atom indices to atom numbers. 2570 for k in range(4): 2571 if bonded[k] != '': 2572 if mol.atom_num[bonded[k]] != None: 2573 bonded[k] = mol.atom_num[bonded[k]] 2574 else: 2575 bonded[k] = bonded[k] + 1 2576 2577 # Write the CONECT record. 2578 pdb_write.conect(file, serial=i+1, bonded1=bonded[0], bonded2=bonded[1], bonded3=bonded[2], bonded4=bonded[3]) 2579 2580 # Reset the flush flag, the bonded atom count, and the bonded atom names. 2581 flush = False 2582 bonded_index = 0 2583 bonded = ['', '', '', ''] 2584 2585 # Increment the CONECT record count. 2586 num_conect = num_conect + 1 2587 2588 2589 2590 # MASTER record. 2591 ################ 2592 2593 print("\nMASTER") 2594 pdb_write.master(file, num_het=len(het_data_coll), num_coord=num_atom+num_hetatm, num_ter=num_ter, num_conect=num_conect) 2595 2596 2597 # END. 2598 ###### 2599 2600 print("END") 2601 pdb_write.end(file)

2602 2603

2604 - def validate(self):

2605 """Check the integrity of the structural data. 2606 2607 The number of molecules must be the same in all models. 2608 """ 2609 2610 # Reference number of molecules. 2611 num_mols = len(self.structural_data[0].mol) 2612 2613 # Loop over all other models. 2614 for i in range(1, len(self.structural_data)): 2615 # Model alias. 2616 model_i = self.structural_data[i] 2617 2618 # Size check. 2619 if num_mols != len(model_i.mol): 2620 raise RelaxError("The structural object is not valid - the number of molecules is not the same for all models.") 2621 2622 # Molecule name check. 2623 for j in range(len(model_i.mol)): 2624 if model_i.mol[j].mol_name != self.structural_data[0].mol[j].mol_name: 2625 raise RelaxError("The molecule name '%s' of model %s does not match the corresponding molecule '%s' of model %s." % (model_i.mol[j].mol_name, model_i.num, self.structural_data[0].mol[j].mol_name, self.structural_data[0].num))

Source Code for Module lib.structure.internal.object