lib.structure.internal.object

58 """The internal relax structural data object. 59 60 The structural data object for this class is a container possessing a number of different arrays corresponding to different structural information. These objects are described in the structural container docstring. 61 """ 62

63 - def __init__(self):

64 """Initialise the structural object.""" 65 66 # Initialise the empty model list. 67 self.structural_data = ModelList()

68 69

70 - def _bonded_atom(self, attached_atom, index, mol):

71 """Find the atom named attached_atom directly bonded to the atom located at the index. 72 73 @param attached_atom: The name of the attached atom to return. 74 @type attached_atom: str 75 @param index: The index of the atom which the attached atom is attached to. 76 @type index: int 77 @param mol: The molecule container. 78 @type mol: MolContainer instance 79 @return: A tuple of information about the bonded atom. 80 @rtype: tuple consisting of the atom number (int), atom name (str), element name (str), and atomic position (Numeric array of len 3) 81 """ 82 83 # Init. 84 bonded_found = False 85 86 # No bonded atoms, so determine the connectivities. 87 if not mol.bonded[index]: 88 # Determine the molecule type if needed. 89 if not hasattr(mol, 'type'): 90 self._mol_type(mol) 91 92 # Protein. 93 if mol.type == 'protein': 94 self._protein_connect(mol) 95 96 # Find everything within 2 Angstroms and say they are bonded. 97 else: 98 self._find_bonded_atoms(index, mol, radius=2) 99 100 # Loop over the bonded atoms. 101 matching_list = [] 102 for bonded_index in mol.bonded[index]: 103 if regex.search(mol.atom_name[bonded_index], attached_atom): 104 matching_list.append(bonded_index) 105 num_attached = len(matching_list) 106 107 # Problem. 108 if num_attached > 1: 109 # Get the atom names. 110 matching_names = [] 111 for i in matching_list: 112 matching_names.append(mol.atom_name[i]) 113 114 # Return nothing but a warning. 115 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 116 117 # No attached atoms. 118 if num_attached == 0: 119 return None, None, None, None, None, "No attached atom could be found" 120 121 # The bonded atom info. 122 index = matching_list[0] 123 bonded_num = mol.atom_num[index] 124 bonded_name = mol.atom_name[index] 125 element = mol.element[index] 126 pos = [mol.x[index], mol.y[index], mol.z[index]] 127 attached_name = mol.atom_name[index] 128 129 # Return the information. 130 return bonded_num, bonded_name, element, pos, attached_name, None

131 132

133 - def _find_bonded_atoms(self, index, mol, radius=1.2):

134 """Find all atoms within a sphere and say that they are attached to the central atom. 135 136 The found atoms will be added to the 'bonded' data structure. 137 138 139 @param index: The index of the central atom. 140 @type index: int 141 @param mol: The molecule container. 142 @type mol: MolContainer instance 143 """ 144 145 # Central atom info. 146 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64) 147 148 # Atom loop. 149 dist_list = [] 150 connect_list = {} 151 element_list = {} 152 for i in range(len(mol.atom_num)): 153 # Skip proton to proton bonds! 154 if mol.element[index] == 'H' and mol.element[i] == 'H': 155 continue 156 157 # The atom's position. 158 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 159 160 # The distance from the centre. 161 dist = linalg.norm(centre-pos) 162 163 # The atom is within the radius. 164 if dist < radius: 165 # Store the distance. 166 dist_list.append(dist) 167 168 # Store the atom index. 169 connect_list[dist] = i 170 171 # Store the element type. 172 element_list[dist] = mol.element[i] 173 174 # The maximum number of allowed covalent bonds. 175 max_conn = 1000 # Ridiculous default! 176 if mol.element[index] == 'H': 177 max_conn = 1 178 elif mol.element[index] == 'O': 179 max_conn = 2 180 elif mol.element[index] == 'N': 181 max_conn = 3 182 elif mol.element[index] == 'C': 183 max_conn = 4 184 185 # Sort. 186 dist_list.sort() 187 188 # Loop over the max number of connections (or the number of connected atoms, if less). 189 for i in range(min(max_conn, len(dist_list))): 190 mol.atom_connect(index, connect_list[dist_list[i]])

191 192

193 - def _get_chemical_name(self, hetID):

194 """Return the chemical name corresponding to the given residue ID. 195 196 The following names are currently returned:: 197 ________________________________________________ 198 | | | 199 | hetID | Chemical name | 200 |________|_____________________________________| 201 | | | 202 | TNS | Tensor | 203 | COM | Centre of mass | 204 | AXS | Tensor axes | 205 | SIM | Monte Carlo simulation tensor axes | 206 | PIV | Pivot point | 207 | CON | Cone object | 208 | AVE | Average vector | 209 |________|_____________________________________| 210 211 For any other residues, no description is returned. 212 213 @param hetID: The residue ID. 214 @type hetID: str 215 @return: The chemical name. 216 @rtype: str or None 217 """ 218 219 # Tensor. 220 if hetID == 'TNS': 221 return 'Tensor' 222 223 # Centre of mass. 224 if hetID == 'COM': 225 return 'Centre of mass' 226 227 # Tensor axes. 228 if hetID == 'AXS': 229 return 'Tensor axes' 230 231 # Monte Carlo simulation tensor axes. 232 if hetID == 'SIM': 233 return 'Monte Carlo simulation tensor axes' 234 235 # Pivot point. 236 if hetID == 'PIV': 237 return 'Pivot point' 238 239 # Cone object. 240 if hetID == 'CON': 241 return 'Cone' 242 243 # Average vector. 244 if hetID == 'AVE': 245 return 'Average vector'

246 247

248 - def _parse_models_gaussian(self, file_path, verbosity=1):

249 """Generator function for looping over the models in the Gaussian log file. 250 251 @param file_path: The full path of the Gaussian log file. 252 @type file_path: str 253 @return: The model number and all the records for that model. 254 @rtype: tuple of int and array of str 255 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 256 @type verbosity: int 257 """ 258 259 # Open the file. 260 file = open_read_file(file_path, verbosity=verbosity) 261 lines = file.readlines() 262 file.close() 263 264 # Check for empty files. 265 if lines == []: 266 raise RelaxError("The Gaussian log file is empty.") 267 268 # Init. 269 found = False 270 str_index = 0 271 total_atom = 0 272 model = 0 273 records = [] 274 275 # Loop over the data. 276 for i in range(len(lines)): 277 # Found a structure. 278 if search("Standard orientation", lines[i]): 279 found = True 280 str_index = 0 281 continue 282 283 # End of the model. 284 if found and str_index > 4 and search("---------", lines[i]): 285 # Yield the info 286 yield records 287 288 # Reset. 289 records = [] 290 found = False 291 292 # Not a structure line. 293 if not found: 294 continue 295 296 # Append the line as a record of the model. 297 records.append(lines[i]) 298 299 # Increment the structure line index. 300 str_index += 1

301 302

303 - def _parse_pdb_connectivity_annotation(self, lines):

304 """Loop over and parse the PDB connectivity annotation records. 305 306 These are the records identified in the U{PDB version 3.30 documentation<http://www.wwpdb.org/documentation/file-format/format33/sect6.html>}. 307 308 309 @param lines: The lines of the PDB file excluding the sections prior to the connectivity annotation section. 310 @type lines: list of str 311 @return: The remaining PDB lines with the connectivity annotation records stripped. 312 @rtype: list of str 313 """ 314 315 # The ordered list of record names in the connectivity annotation section. 316 records = [ 317 'SSBOND', 318 'LINK ', 319 'CISPEP' 320 ] 321 322 # Loop over the lines. 323 for i in range(len(lines)): 324 # No match, therefore assume to be out of the connectivity annotation section. 325 if lines[i][:6] not in records: 326 break 327 328 # Return the remaining lines. 329 return lines[i:]

330 331

332 - def _parse_pdb_coord(self, lines):

333 """Generator function for looping over the models in the PDB file. 334 335 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html}. 336 337 338 @param lines: The lines of the coordinate section. 339 @type lines: list of str 340 @return: The model number and all the records for that model. 341 @rtype: tuple of int and array of str 342 """ 343 344 # Init. 345 model = None 346 records = [] 347 348 # Loop over the data. 349 for i in range(len(lines)): 350 # A new model record. 351 if lines[i][:5] == 'MODEL': 352 try: 353 model = int(lines[i].split()[1]) 354 except: 355 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.") 356 357 # Skip all records prior to the first ATOM or HETATM record. 358 if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and not len(records): 359 continue 360 361 # End of the model. 362 if lines[i][:6] == 'ENDMDL': 363 # Yield the info. 364 yield model, records 365 366 # Reset the records. 367 records = [] 368 369 # Skip the rest of this loop. 370 continue 371 372 # Append the line as a record of the model. 373 records.append(lines[i]) 374 375 # If records is not empty then there are no models, so yield the lot. 376 if len(records): 377 yield model, records

378 379

380 - def _parse_pdb_hetrogen(self, lines):

381 """Loop over and parse the PDB hetrogen records. 382 383 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect4.html}. 384 385 386 @param lines: The lines of the PDB file excluding the sections prior to the hetrogen section. 387 @type lines: list of str 388 @return: The remaining PDB lines with the hetrogen records stripped. 389 @rtype: list of str 390 """ 391 392 # The ordered list of record names in the hetrogen section. 393 records = [ 394 'HET ', 395 'FORMUL', 396 'HETNAM', 397 'HETSYN' 398 ] 399 400 # Loop over the lines. 401 for i in range(len(lines)): 402 # No match, therefore assume to be out of the hetrogen section. 403 if lines[i][:6] not in records: 404 break 405 406 # Return the remaining lines. 407 return lines[i:]

408 409

410 - def _parse_pdb_misc(self, lines):

411 """Loop over and parse the PDB miscellaneous records. 412 413 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect7.html}. 414 415 416 @param lines: The lines of the PDB file excluding the sections prior to the miscellaneous section. 417 @type lines: list of str 418 @return: The remaining PDB lines with the miscellaneous records stripped. 419 @rtype: list of str 420 """ 421 422 # The ordered list of record names in the miscellaneous section. 423 records = [ 424 'SITE ' 425 ] 426 427 # Loop over the lines. 428 for i in range(len(lines)): 429 # No match, therefore assume to be out of the miscellaneous section. 430 if lines[i][:6] not in records: 431 break 432 433 # Return the remaining lines. 434 return lines[i:]

435 436

437 - def _parse_pdb_prim_struct(self, lines):

438 """Loop over and parse the PDB primary structure records. 439 440 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect3.html}. 441 442 443 @param lines: The lines of the PDB file excluding the title section. 444 @type lines: list of str 445 @return: The remaining PDB lines with the primary structure records stripped. 446 @rtype: list of str 447 """ 448 449 # The ordered list of record names in the primary structure section. 450 records = [ 451 'DBREF ', 452 'DBREF1', 453 'DBREF2', 454 'SEQADV', 455 'SEQRES', 456 'MODRES' 457 ] 458 459 # Loop over the lines. 460 for i in range(len(lines)): 461 # No match, therefore assume to be out of the primary structure section. 462 if lines[i][:6] not in records: 463 break 464 465 # Return the remaining lines. 466 return lines[i:]

467 468

469 - def _parse_pdb_ss(self, lines, read_mol=None, helices=None, sheets=None):

470 """Loop over and parse the PDB secondary structure records. 471 472 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect5.html}. 473 474 475 @param lines: The lines of the PDB file excluding the sections prior to the secondary structure section. 476 @type lines: list of str 477 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 478 @type read_mol: None, int, or list of int 479 @keyword helices: The list of helix data to append to. 480 @type helices: list 481 @keyword sheets: The list of sheet data to append to. 482 @type sheets: list 483 @return: The remaining PDB lines with the secondary structure records stripped. 484 @rtype: list of str 485 """ 486 487 # The ordered list of record names in the secondary structure section (the depreciated TURN record is also included to handle old PDB files). 488 records = [ 489 'HELIX ', 490 'SHEET ', 491 'TURN ' 492 ] 493 494 # The number of pre-existing molecules. 495 if not len(self.structural_data): 496 mol_num = 0 497 else: 498 mol_num = len(self.structural_data[0].mol) 499 500 # Loop over the lines. 501 for i in range(len(lines)): 502 # No match, therefore assume to be out of the secondary structure section. 503 if lines[i][:6] not in records: 504 break 505 506 # A helix. 507 if lines[i][:5] == 'HELIX': 508 # Parse the record. 509 record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) 510 511 # The molecule indices. 512 mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) 513 mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) 514 515 # Only load the desired molecule. 516 if read_mol != None: 517 if mol_init_index + 1 not in read_mol: 518 continue 519 if mol_end_index + 1 not in read_mol: 520 continue 521 522 # Store the data. 523 helices.append([helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length]) 524 525 # A sheet. 526 if lines[i][:5] == 'SHEET': 527 # Parse the record. 528 record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) 529 530 # The molecule indices. 531 mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) 532 mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) 533 534 # Only load the desired molecule. 535 if read_mol != None: 536 if mol_init_index + 1 not in read_mol: 537 continue 538 if mol_end_index + 1 not in read_mol: 539 continue 540 541 # New molecule indices based on currently loaded data. 542 mol_cur_index = None 543 if cur_chain_id: 544 mol_cur_index = self._pdb_chain_id_to_mol_index(cur_chain_id) 545 mol_prev_index = None 546 if prev_chain_id: 547 mol_prev_index = self._pdb_chain_id_to_mol_index(prev_chain_id) 548 549 # Store the data. 550 sheets.append([strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode]) 551 552 # Return the remaining lines. 553 return lines[i:]

554 555

556 - def _parse_pdb_title(self, lines):

557 """Loop over and parse the PDB title records. 558 559 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect2.html}. 560 561 562 @param lines: All lines of the PDB file. 563 @type lines: list of str 564 @return: The remaining PDB lines with the title records stripped. 565 @rtype: list of str 566 """ 567 568 # The ordered list of (sometimes truncated) record names in the title section. 569 records = [ 570 'HEADER', 571 'OBSLTE', 572 'TITLE ', 573 'SPLT ', 574 'CAVEAT', 575 'COMPND', 576 'SOURCE', 577 'KEYWDS', 578 'EXPDTA', 579 'NUMMDL', 580 'MDLTYP', 581 'AUTHOR', 582 'REVDAT', 583 'SPRSDE', 584 'JRNL ', 585 'REMARK' 586 ] 587 588 # Loop over the lines. 589 for i in range(len(lines)): 590 # No match, therefore assume to be out of the title section. 591 if lines[i][:6] not in records: 592 break 593 594 # Return the remaining lines. 595 return lines[i:]

596 597

598 - def _parse_pdb_transform(self, lines):

599 """Loop over and parse the PDB transform records. 600 601 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect8.html}. 602 603 604 @param lines: The lines of the PDB file excluding the sections prior to the transform section. 605 @type lines: list of str 606 @return: The remaining PDB lines with the transform records stripped. 607 @rtype: list of str 608 """ 609 610 # The ordered list of record names in the transform section. 611 records = [ 612 'CRYST', 613 'MTRIX', 614 'ORIGX', 615 'SCALE', 616 ] 617 618 # Loop over the lines. 619 for i in range(len(lines)): 620 # No match, therefore assume to be out of the transform section. 621 if lines[i][0: 5] not in records: 622 break 623 624 # Return the remaining lines. 625 return lines[i:]

626 627

628 - def _parse_models_xyz(self, file_path, verbosity=1):

629 """Generator function for looping over the models in the XYZ file. 630 631 @param file_path: The full path of the XYZ file. 632 @type file_path: str 633 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 634 @type verbosity: int 635 @return: The model number and all the records for that model. 636 @rtype: tuple of int and array of str 637 """ 638 639 # Open the file. 640 file = open_read_file(file_path, verbosity=verbosity) 641 lines = file.readlines() 642 file.close() 643 644 # Check for empty files. 645 if lines == []: 646 raise RelaxError("The XYZ file is empty.") 647 648 # Init. 649 total_atom = 0 650 model = 0 651 records = [] 652 653 # Loop over the data. 654 for i in range(len(lines)): 655 num=0 656 word = lines[i].split() 657 # Find the total atom number and the first model. 658 if (i==0) and (len(word)==1): 659 try: 660 total_atom = int(word[0]) 661 num = 1 662 except: 663 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.") 664 665 # End of the model. 666 if (len(records) == total_atom): 667 # Yield the info 668 yield records 669 670 # Reset the records. 671 records = [] 672 673 # Skip all records prior to atom coordinates record. 674 if (len(word) != 4): 675 continue 676 677 # Append the line as a record of the model. 678 records.append(lines[i]) 679 680 # If records is not empty then there are no models, so yield the lot. 681 if len(records): 682 yield records

683 684

685 - def _parse_mols_pdb(self, records):

686 """Generator function for looping over the molecules in the PDB records of a model. 687 688 @param records: The list of PDB records for the model, or if no models exist the entire PDB file. 689 @type records: list of str 690 @return: The molecule number and all the records for that molecule. 691 @rtype: tuple of int and list of str 692 """ 693 694 # Check for empty records. 695 if records == []: 696 raise RelaxError("There are no PDB records for this model.") 697 698 # Init. 699 mol_count = 1 700 mol_records = [[]] 701 end = False 702 703 # Loop over the data. 704 for i in range(len(records)): 705 # A PDB termination record. 706 if records[i][:3] == 'END': 707 break 708 709 # A master record, so we are done. 710 if records[i][:6] == 'MASTER': 711 break 712 713 # A model termination record. 714 if records[i][:6] == 'ENDMDL': 715 end = True 716 717 # A molecule termination record with no trailing HETATM or CONECT. 718 elif i < len(records)-1 and records[i][:3] == 'TER' and not records[i+1][:6] == 'HETATM' and not records[i+1][:6] == 'CONECT': 719 end = True 720 721 # A HETATM followed by an ATOM record. 722 elif i < len(records)-1 and records[i][:6] == 'HETATM' and records[i+1][:4] == 'ATOM': 723 end = True 724 725 # End. 726 if end: 727 # Increment the molecule counter. 728 mol_count = mol_count + 1 729 730 # Reset the flag. 731 end = False 732 733 # The molecule number. 734 chain_id = records[i][21] 735 if chain_id == ' ': 736 mol_index = mol_count - 1 737 else: 738 mol_index = self._pdb_chain_id_to_mol_index(chain_id) 739 740 # Add a new records list as required. 741 while True: 742 if len(mol_records) <= mol_index: 743 mol_records.append([]) 744 else: 745 break 746 747 # Append the line as a record of the molecule. 748 mol_records[mol_index].append(records[i]) 749 750 # Loop over the molecules and yield the molecule number and records. 751 for i in range(len(mol_records)): 752 if mol_records[i] != []: 753 yield i+1, mol_records[i]

754 755

756 - def _pdb_chain_id_to_mol_index(self, chain_id=None):

757 """Convert the PDB chain ID into the molecule index in a regular way. 758 759 @keyword chain_id: The PDB chain ID string. 760 @type chain_id: str 761 @return: The corresponding molecule index. 762 @rtype: int 763 """ 764 765 # Initialise. 766 mol_index = 0 767 768 # Convert to the molecule index. 769 if chain_id: 770 mol_index = CHAIN_ID_LIST.index(chain_id) 771 772 # Return the index. 773 return mol_index

774 775

776 - def _residue_data(self, res_nums=None, res_names=None):

777 """Convert the residue info into a dictionary of unique residues with numbers as keys. 778 779 @keyword res_nums: The list of residue numbers. 780 @type res_nums: list of int 781 @keyword res_names: The list of residue names matching the numbers. 782 @type res_names: list of str 783 @return: The dictionary of residue names with residue numbers as keys. 784 @rtype: dict of str 785 """ 786 787 # Initialise. 788 data = {} 789 790 # Loop over the data. 791 for i in range(len(res_nums)): 792 # The residue data already exists. 793 if res_nums[i] in data: 794 continue 795 796 # Add the data. 797 data[res_nums[i]] = res_names[i] 798 799 # Return the dictionary. 800 return data

801 802

803 - def _validate_data_arrays(self, struct):

804 """Check the validity of the data arrays in the given structure object. 805 806 @param struct: The structural object. 807 @type struct: Structure_container instance 808 """ 809 810 # The number of atoms. 811 num = len(struct.atom_name) 812 813 # Check the other lengths. 814 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num: 815 raise RelaxError("The structural data is invalid.")

816 817

818 - def _validate_records(self, lines):

819 """Make sure all PDB records are 80 char in length, padding with whitespace when needed. 820 821 All newline characters are stripped from the records as well. 822 823 824 @param lines: All lines of the PDB file. 825 @type lines: list of str 826 @return: The padded PDB lines. 827 @rtype: list of str 828 """ 829 830 # Loop over the lines. 831 for i in range(len(lines)): 832 # Strip the newline character. 833 lines[i] = lines[i].rstrip('\r\n') 834 835 # Pad if needed. 836 if len(lines[i]) != 80: 837 lines[i] = "%-80s" % lines[i] 838 839 # Return the fixed lines. 840 return lines

841 842

843 - def _mol_type(self, mol):

844 """Determine the type of molecule. 845 846 @param mol: The molecule data container. 847 @type mol: MolContainer instance 848 """ 849 850 # Amino acids. 851 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 852 853 # Set the molecule type to default to 'other'. 854 mol.type = 'other' 855 856 # Loop over the residues. 857 for res in mol.res_name: 858 # Protein. 859 if res in aa: 860 # Set the molecule type and return. 861 mol.type = 'protein' 862 return

863 864

865 - def _protein_connect(self, mol):

866 """Set up the connectivities for the protein. 867 868 @param mol: The molecule data container. 869 @type mol: MolContainer instance 870 """ 871 872 # Initialise some residue data. 873 curr_res_num = None 874 res_atoms = [] 875 876 # Loop over all atoms. 877 for i in range(len(mol.atom_num)): 878 # New residue. 879 if mol.res_num[i] != curr_res_num: 880 # Intra-residue connectivites. 881 if len(res_atoms): 882 self._protein_intra_connect(mol, res_atoms) 883 884 # Update the residue number. 885 curr_res_num = mol.res_num[i] 886 887 # Reset the residue atom index list. 888 res_atoms = [] 889 890 # Add the atom index to the list. 891 res_atoms.append(i) 892 893 # Last atom. 894 if i == len(mol.atom_num) - 1 and len(res_atoms): 895 self._protein_intra_connect(mol, res_atoms)

896 897

898 - def _protein_intra_connect(self, mol, res_atoms):

899 """Set up the connectivities for the protein. 900 901 @param mol: The molecule data container. 902 @type mol: MolContainer instance 903 @param res_atoms: The list of atom indices corresponding to the residue. 904 @type res_atoms: list of int 905 """ 906 907 # Back bond connectivity. 908 indices = { 909 'N': None, 910 'C': None, 911 'O': None, 912 'CA': None, 913 'HN': None, 914 'H': None, # Same as HN. 915 'HA': None 916 } 917 918 # Loop over all atoms to find the indices. 919 for index in res_atoms: 920 if mol.atom_name[index] in indices: 921 indices[mol.atom_name[index]] = index 922 923 # Connect the atom pairs. 924 pairs = [ 925 ['N', 'HN'], 926 ['N', 'H'], 927 ['N', 'CA'], 928 ['CA', 'HA'], 929 ['CA', 'C'], 930 ['C', 'O'] 931 ] 932 933 # Loop over the atoms pairs and connect them. 934 for pair in pairs: 935 if indices[pair[0]] != None and indices[pair[1]] != None: 936 mol.atom_connect(indices[pair[0]], indices[pair[1]])

937 938

939 - def _translate(self, data, format='str'):

940 """Convert the data into a format for writing to file. 941 942 @param data: The data to convert to the required format. 943 @type data: anything 944 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'. 945 @type format: str 946 @return: The converted version of the data. 947 @rtype: str 948 """ 949 950 # Conversion to string. 951 if format == 'str': 952 # None values. 953 if data == None: 954 data = '' 955 956 # Force convert to string. 957 if not isinstance(data, str): 958 data = repr(data) 959 960 # Conversion to float. 961 if format == 'float': 962 # None values. 963 if data == None: 964 data = 0.0 965 966 # Force convert to float. 967 if not isinstance(data, float): 968 data = float(data) 969 970 # Return the converted data. 971 return data

972 973

974 - def _trim_helix(self, helix=None, trim_res_list=[], res_data=None):

975 """Trim the given helix based on the list of deleted residue numbers. 976 977 @keyword helix: The single helix metadata structure. 978 @type helix: list 979 @keyword trim_res_list: The list of residue numbers which no longer exist. 980 @type trim_res_list: list of int 981 @keyword res_data: The dictionary of residue names with residue numbers as keys. 982 @type res_data: dict of str 983 @return: The trimmed helix metadata structure, or None if the whole helix is to be deleted. 984 @rtype: list or None 985 """ 986 987 # Unpack the helix residue numbers. 988 start_res = helix[3] 989 end_res = helix[6] 990 991 # The reverse residue list. 992 trim_res_list_rev = deepcopy(trim_res_list) 993 trim_res_list_rev.reverse() 994 995 # The helix residues. 996 helix_res = list(range(start_res, end_res+1)) 997 998 # Trim forwards. 999 for res_num in trim_res_list: 1000 if res_num == start_res: 1001 # Remove the residue. 1002 helix_res.pop(0) 1003 1004 # No helix left. 1005 if len(helix_res) == 0: 1006 break 1007 1008 # Realias the starting residue. 1009 start_res = helix_res[0] 1010 1011 # No helix left. 1012 if len(helix_res) == 0: 1013 return None 1014 1015 # Trim backwards. 1016 for res_num in trim_res_list_rev: 1017 if res_num == end_res: 1018 helix_res.pop(-1) 1019 end_res = helix_res[-1] 1020 1021 # Replace the starting and ending residues. 1022 if start_res != helix[3]: 1023 helix[3] = start_res 1024 helix[2] = res_data[start_res] 1025 if end_res != helix[6]: 1026 helix[6] = end_res 1027 helix[5] = res_data[end_res] 1028 1029 # The helix length. 1030 helix[-1] = len(helix_res) 1031 1032 # Return the modified helix. 1033 return helix

1034 1035

1036 - def _trim_sheet(self, sheet=None, trim_res_list=[], res_data=None):

1037 """Trim the given sheet based on the list of deleted residue numbers. 1038 1039 @keyword sheet: The single sheet metadata structure. 1040 @type sheet: list 1041 @keyword trim_res_list: The list of residue numbers which no longer exist. 1042 @type trim_res_list: list of int 1043 @keyword res_data: The dictionary of residue names with residue numbers as keys. 1044 @type res_data: dict of str 1045 @return: The trimmed sheet metadata structure, or None if the whole sheet is to be deleted. 1046 @rtype: list or None 1047 """ 1048 1049 # Unpack the sheet residue numbers. 1050 start_res = sheet[5] 1051 end_res = sheet[9] 1052 1053 # The reverse residue list. 1054 trim_res_list_rev = deepcopy(trim_res_list) 1055 trim_res_list_rev.reverse() 1056 1057 # The sheet residues. 1058 sheet_res = list(range(start_res, end_res+1)) 1059 1060 # Trim forwards. 1061 for res_num in trim_res_list: 1062 if res_num == start_res: 1063 # Remove the residue. 1064 sheet_res.pop(0) 1065 1066 # No sheet left. 1067 if len(sheet_res) == 0: 1068 break 1069 1070 # Realias the starting residue. 1071 start_res = sheet_res[0] 1072 1073 # No sheet left. 1074 if len(sheet_res) == 0: 1075 return None 1076 1077 # Trim backwards. 1078 for res_num in trim_res_list_rev: 1079 if res_num == end_res: 1080 sheet_res.pop(-1) 1081 end_res = sheet_res[-1] 1082 1083 # Replace the starting and ending residues. 1084 if start_res != sheet[5]: 1085 sheet[5] = start_res 1086 sheet[3] = res_data[start_res] 1087 if end_res != sheet[9]: 1088 sheet[9] = end_res 1089 sheet[7] = res_data[end_res] 1090 1091 # Return the modified sheet. 1092 return sheet

1093 1094

1095 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None, sort=False):

1096 """Add a new atom to the structural data object. 1097 1098 @keyword mol_name: The name of the molecule. 1099 @type mol_name: str 1100 @keyword atom_name: The atom name, e.g. 'H1'. 1101 @type atom_name: str or None 1102 @keyword res_name: The residue name. 1103 @type res_name: str or None 1104 @keyword res_num: The residue number. 1105 @type res_num: int or None 1106 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 1107 @type pos: rank-1 or rank-2 array or list of float 1108 @keyword element: The element symbol. 1109 @type element: str or None 1110 @keyword atom_num: The atom number. 1111 @type atom_num: int or None 1112 @keyword chain_id: The chain identifier. 1113 @type chain_id: str or None 1114 @keyword segment_id: The segment identifier. 1115 @type segment_id: str or None 1116 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 1117 @type pdb_record: str or None 1118 @keyword sort: A flag which if True will cause the structural data to be sorted after adding the atom. 1119 @type sort: bool 1120 """ 1121 1122 # Add a model if not present. 1123 if len(self.structural_data) == 0: 1124 self.add_model() 1125 1126 # Check the position. 1127 if is_float(pos[0]): 1128 if len(pos) != 3: 1129 raise RelaxError("The single atomic position %s must be a 3D list." % pos) 1130 else: 1131 if len(pos) != len(self.structural_data): 1132 raise RelaxError("The %s atomic positions does not match the %s models present." % (len(pos), len(self.structural_data))) 1133 1134 # Loop over each model. 1135 for i in range(len(self.structural_data)): 1136 # Alias the model. 1137 model = self.structural_data[i] 1138 1139 # Specific molecule. 1140 mol = self.get_molecule(mol_name, model=model.num) 1141 1142 # Add the molecule, if it does not exist. 1143 if mol == None: 1144 self.add_molecule(name=mol_name) 1145 mol = self.get_molecule(mol_name, model=model.num) 1146 1147 # Split up the position if needed. 1148 if is_float(pos[0]): 1149 model_pos = pos 1150 else: 1151 model_pos = pos[i] 1152 1153 # Add the atom. 1154 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=model_pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record) 1155 1156 # Sort. 1157 if sort: 1158 mol._sort()

1159 1160

1161 - def add_coordinates(self, coord=None, mol_names=None, res_names=None, res_nums=None, atom_names=None, elements=None, set_mol_name=None, set_model_num=None):

1162 """Add a set of coordinates to the structural object. 1163 1164 @keyword coord: The array of atomic coordinates (first dimension is the atoms and the second are the coordinates). 1165 @type coord: numpy rank-2 float64 array 1166 @keyword mol_names: The list of molecule names corresponding to the first dimension of the coordinate array. 1167 @type mol_names: list of str 1168 @keyword res_names: The list of residue names corresponding to the first dimension of the coordinate array. 1169 @type res_names: list of str 1170 @keyword res_nums: The list of residue numbers corresponding to the first dimension of the coordinate array. 1171 @type res_nums: list of str 1172 @keyword atom_names: The list of atom names corresponding to the first dimension of the coordinate array. 1173 @type atom_names: list of str 1174 @keyword elements: The list of elements corresponding to the first dimension of the coordinate array. 1175 @type elements: list of str 1176 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then all molecule names must be identical and the new molecule will have the same name. 1177 @type set_mol_name: None or str 1178 @keyword set_model_num: Set the model number for the coordinates. 1179 @type set_model_num: None or int 1180 """ 1181 1182 # The new molecule name. 1183 if not set_mol_name: 1184 set_mol_name = mol_names[0] 1185 for name in mol_names: 1186 if set_mol_name != name: 1187 raise RelaxError("No unique molecule name can be found in the list %s for storing the new molecule structure.") 1188 1189 # No model set, so delete all current data. 1190 if set_mol_name == None and set_model_num == None: 1191 print("Deleting all structural data so that only the new molecule will be present.") 1192 self.delete(verbosity=0) 1193 1194 # Generate a molecule container for the new data. 1195 mol = MolContainer() 1196 1197 # Add the data. 1198 for i in range(len(coord)): 1199 mol.atom_add(atom_name=atom_names[i], res_name=res_names[i], res_num=res_nums[i], pos=coord[i], element=elements[i]) 1200 1201 # Create the structural data data structures. 1202 self.pack_structs([[mol]], orig_model_num=[None], set_model_num=[set_model_num], orig_mol_num=[[None]], set_mol_name=[set_mol_name])

1203 1204

1205 - def add_model(self, model=None, coords_from=None):

1206 """Add a new model to the store. 1207 1208 The new model will be constructed with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used. 1209 1210 @keyword model: The number of the model to create. 1211 @type model: int or None 1212 @keyword coords_from: The model number to take the coordinates from. 1213 @type coords_from: int or None 1214 @return: The model container. 1215 @rtype: ModelContainer instance 1216 """ 1217 1218 # Check if the model currently exists. 1219 if model != None: 1220 for i in range(len(self.structural_data)): 1221 if model == self.structural_data[i].num: 1222 raise RelaxError("The model '%s' already exists." % model) 1223 1224 # Add a new model. 1225 model = self.structural_data.add_item(model_num=model) 1226 1227 # The model to duplicate. 1228 if coords_from == None: 1229 model_from = self.structural_data[0] 1230 else: 1231 for i in range(len(self.structural_data)): 1232 if self.structural_data[i].num == coords_from: 1233 model_from = self.structural_data[i] 1234 break 1235 1236 # Duplicate all data from the MolList object down. 1237 for mol_index in range(len(model_from.mol)): 1238 # Create a new molecule container. 1239 model.mol.add_item(mol_name=model_from.mol[mol_index].mol_name, mol_cont=MolContainer()) 1240 mol = model.mol[mol_index] 1241 mol_from = model_from.mol[mol_index] 1242 1243 # Loop over the atomic data. 1244 for i in range(len(mol_from.atom_num)): 1245 mol.atom_num.append(mol_from.atom_num[i]) 1246 mol.atom_name.append(mol_from.atom_name[i]) 1247 mol.bonded.append(mol_from.bonded[i]) 1248 mol.chain_id.append(mol_from.chain_id[i]) 1249 mol.element.append(mol_from.element[i]) 1250 mol.pdb_record.append(mol_from.pdb_record[i]) 1251 mol.res_name.append(mol_from.res_name[i]) 1252 mol.res_num.append(mol_from.res_num[i]) 1253 mol.seg_id.append(mol_from.seg_id[i]) 1254 mol.x.append(mol_from.x[i]) 1255 mol.y.append(mol_from.y[i]) 1256 mol.z.append(mol_from.z[i]) 1257 1258 # Return the model. 1259 return self.structural_data[-1]

1260 1261

1262 - def add_molecule(self, model_num=None, name=None):

1263 """Add a new molecule to the store. 1264 1265 @keyword model_num: The optional model to add the molecule to. If not supplied, the molecule will be added to all models. 1266 @type model_num: None or int 1267 @keyword name: The molecule identifier string. 1268 @type name: str 1269 """ 1270 1271 # Add a model if necessary. 1272 if len(self.structural_data) == 0: 1273 self.add_model() 1274 1275 # Add the molecule to each model. 1276 for model in self.model_loop(model=model_num): 1277 model.mol.add_item(mol_name=name, mol_cont=MolContainer())

1278 1279

1280 - def are_bonded(self, atom_id1=None, atom_id2=None):

1281 """Determine if two atoms are directly bonded to each other. 1282 1283 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 1284 @type atom_id1: str 1285 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 1286 @type atom_id2: str 1287 @return: True if the atoms are directly bonded. 1288 @rtype: bool 1289 """ 1290 1291 # Generate the selection objects. 1292 sel_obj1 = Selection(atom_id1) 1293 sel_obj2 = Selection(atom_id2) 1294 1295 # Build the connectivities if needed. 1296 for mol in self.structural_data[0].mol: 1297 for i in range(len(mol.atom_num)): 1298 if not len(mol.bonded[i]): 1299 self._find_bonded_atoms(i, mol, radius=2) 1300 1301 # Loop over the molecules. 1302 for mol in self.structural_data[0].mol: 1303 # Skip non-matching molecules. 1304 if not sel_obj1.contains_mol(mol.mol_name): 1305 continue 1306 if not sel_obj2.contains_mol(mol.mol_name): 1307 continue 1308 1309 # Find the first atom. 1310 index1 = None 1311 index2 = None 1312 for i in range(len(mol.atom_num)): 1313 # Matching first atom. 1314 if index1 == None and sel_obj1.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1315 index1 = i 1316 1317 # Matching second atom. 1318 if index2 == None and sel_obj2.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1319 index2 = i 1320 1321 # Nothing left to do. 1322 if index1 != None and index2 != None: 1323 break 1324 1325 # Connectivities exist. 1326 if index1 < len(mol.bonded): 1327 if index2 in mol.bonded[index1]: 1328 return True 1329 else: 1330 return False

1331 1332

1333 - def are_bonded_index(self, mol_index1=None, atom_index1=None, mol_index2=None, atom_index2=None):

1334 """Determine if two atoms, given as indices, are directly bonded to each other. 1335 1336 @keyword mol_index1: The molecule index of the first atom. 1337 @type mol_index1: int 1338 @keyword atom_index1: The index of the first atom. 1339 @type atom_index1: int 1340 @keyword mol_index2: The molecule index of the second atom. 1341 @type mol_index2: int 1342 @keyword atom_index2: The index of the second atom. 1343 @type atom_index2: int 1344 @return: True if the atoms are directly bonded. 1345 @rtype: bool 1346 """ 1347 1348 # Alias the molecule. 1349 mol1 = self.structural_data[0].mol[mol_index1] 1350 mol2 = self.structural_data[0].mol[mol_index2] 1351 1352 # Build the connectivities if needed. 1353 if not len(mol1.bonded[atom_index1]): 1354 self._find_bonded_atoms(atom_index1, mol1, radius=2) 1355 if not len(mol2.bonded[atom_index2]): 1356 self._find_bonded_atoms(atom_index2, mol2, radius=2) 1357 1358 # Is the second atom in the bonded list of the first? 1359 if atom_index2 in mol1.bonded[atom_index1]: 1360 return True 1361 1362 # No! 1363 return False

1364 1365

1366 - def atom_loop(self, selection=None, str_id=None, model_num=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, mol_index_flag=False, index_flag=False, ave=False):

1367 """Generator function for looping over all atoms in the internal relax structural object. 1368 1369 This method should be designed as a U{generator<http://www.python.org/dev/peps/pep-0255/>}. It should loop over all atoms of the system yielding the following atomic information, if the corresponding flag is True, in tuple form: 1370 1371 1. Model number. 1372 2. Molecule name. 1373 3. Residue number. 1374 4. Residue name. 1375 5. Atom number. 1376 6. Atom name. 1377 7. The element name (its atomic symbol and optionally the isotope, e.g. 'N', 'Mg', '17O', '13C', etc). 1378 8. The position of the atom in Euclidean space. 1379 1380 1381 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 1382 @type selection: lib.structure.internal.Internal_selection instance 1383 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 1384 @type str_id: str, int, or None 1385 @keyword model_num: Only loop over a specific model. 1386 @type model_num: int or None 1387 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 1388 @type mol_name_flag: bool 1389 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 1390 @type res_num_flag: bool 1391 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 1392 @type res_name_flag: bool 1393 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 1394 @type atom_num_flag: bool 1395 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 1396 @type atom_name_flag: bool 1397 @keyword element_flag: A flag which if True will cause the element name to be yielded. 1398 @type element_flag: bool 1399 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 1400 @type pos_flag: bool 1401 @keyword mol_index_flag: A flag which if True will cause the molecule index to be yielded. 1402 @type mol_index_flag: bool 1403 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 1404 @type index_flag: bool 1405 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 1406 @type ave: bool 1407 @return: A tuple of atomic information, as described in the docstring. 1408 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 1409 """ 1410 1411 # Check that the structure is loaded. 1412 if not len(self.structural_data): 1413 raise RelaxNoPdbError 1414 1415 # Obtain all data from the first model (except the position data). 1416 model = self.structural_data[0] 1417 1418 # Loop over all molecules and atoms in the selection. 1419 for mol_index, i in selection.loop(): 1420 mol = model.mol[mol_index] 1421 1422 # Initialise. 1423 res_num = mol.res_num[i] 1424 res_name = mol.res_name[i] 1425 atom_num = mol.atom_num[i] 1426 atom_name = mol.atom_name[i] 1427 element = mol.element[i] 1428 1429 # The atom position. 1430 if pos_flag: 1431 # Average the position. 1432 if ave: 1433 # Initialise. 1434 pos = zeros(3, float64) 1435 1436 # Loop over the models. 1437 for model in self.model_loop(model=model_num): 1438 # Alias. 1439 mol2 = model.mol[mol_index] 1440 1441 # Some sanity checks. 1442 if mol2.atom_num[i] != atom_num: 1443 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.") 1444 1445 # Sum the atom positions. 1446 pos = pos + array([mol2.x[i], mol2.y[i], mol2.z[i]], float64) 1447 1448 # Average the position array (divide by the number of models). 1449 pos = pos / len(self.structural_data) 1450 1451 # All positions. 1452 else: 1453 # Initialise. 1454 pos = [] 1455 1456 # Loop over the models. 1457 for model in self.model_loop(model=model_num): 1458 # Alias. 1459 mol2 = model.mol[mol_index] 1460 1461 # Append the position. 1462 pos.append([mol2.x[i], mol2.y[i], mol2.z[i]]) 1463 1464 # Convert. 1465 pos = array(pos, float64) 1466 1467 # The molecule name. 1468 mol_name = mol.mol_name 1469 1470 # Build the tuple to be yielded. 1471 atomic_tuple = () 1472 if mol_name_flag: 1473 atomic_tuple = atomic_tuple + (mol_name,) 1474 if res_num_flag: 1475 atomic_tuple = atomic_tuple + (res_num,) 1476 if res_name_flag: 1477 atomic_tuple = atomic_tuple + (res_name,) 1478 if atom_num_flag: 1479 atomic_tuple = atomic_tuple + (atom_num,) 1480 if atom_name_flag: 1481 atomic_tuple = atomic_tuple + (atom_name,) 1482 if element_flag: 1483 atomic_tuple = atomic_tuple + (element,) 1484 if pos_flag: 1485 atomic_tuple = atomic_tuple + (pos,) 1486 if mol_index_flag: 1487 atomic_tuple += (mol_index,) 1488 if index_flag: 1489 atomic_tuple += (i,) 1490 1491 # Yield the information. 1492 if len(atomic_tuple) == 1: 1493 atomic_tuple = atomic_tuple[0] 1494 yield atomic_tuple

1495 1496

1497 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

1498 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'. 1499 1500 @keyword attached_atom: The name of the bonded atom. 1501 @type attached_atom: str 1502 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned. 1503 @type model_num: None or int 1504 @keyword mol_name: The name of the molecule that attached_atom belongs to. 1505 @type mol_name: str 1506 @keyword res_num: The number of the residue that attached_atom belongs to. 1507 @type res_num: str 1508 @keyword res_name: The name of the residue that attached_atom belongs to. 1509 @type res_name: str 1510 @keyword spin_num: The number of the spin that attached_atom is attached to. 1511 @type spin_num: str 1512 @keyword spin_name: The name of the spin that attached_atom is attached to. 1513 @type spin_name: str 1514 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors. 1515 @type return_name: bool 1516 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 1517 @type return_warnings: bool 1518 @return: The list of bond vectors for each model. 1519 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set) 1520 """ 1521 1522 # Initialise some objects. 1523 vectors = [] 1524 attached_name = None 1525 warnings = None 1526 1527 # Use the first model for the atom matching. 1528 model = self.structural_data[0] 1529 1530 # Loop over the molecules. 1531 for mol_index in range(len(model.mol)): 1532 # Alias. 1533 mol = model.mol[mol_index] 1534 1535 # Skip non-matching molecules. 1536 if mol_name and mol_name != mol.mol_name: 1537 continue 1538 1539 # Find the atomic index of the base atom. 1540 index = None 1541 for i in range(len(mol.atom_name)): 1542 # Residues don't match. 1543 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name): 1544 continue 1545 1546 # Atoms don't match. 1547 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name): 1548 continue 1549 1550 # Update the index and stop searching. 1551 index = i 1552 break 1553 1554 # Found the atom. 1555 if index != None: 1556 # Loop over the models. 1557 for model in self.model_loop(model=model_num): 1558 # Alias. 1559 mol = model.mol[mol_index] 1560 1561 # Get the atom bonded to this model/molecule/residue/atom. 1562 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol) 1563 1564 # No bonded atom. 1565 if (bonded_num, bonded_name, element) == (None, None, None): 1566 continue 1567 1568 # The bond vector. 1569 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64) 1570 1571 # Append the vector to the vectors array. 1572 vectors.append(vector) 1573 1574 # Not found. 1575 else: 1576 warnings = "Cannot find the atom in the structure" 1577 1578 # Build the tuple to be yielded. 1579 data = (vectors,) 1580 if return_name: 1581 data = data + (attached_name,) 1582 if return_warnings: 1583 data = data + (warnings,) 1584 1585 # Return the data. 1586 return data

1587 1588

1589 - def collapse_ensemble(self, model_num=None, model_to=1):

1590 """Collapse the ensemble into a single model. 1591 1592 @keyword model_num: The number of the model to keep. All other models will be removed. 1593 @type model_num: int 1594 @keyword model_to: The model number for the sole remaining model. 1595 @type model_to: int 1596 """ 1597 1598 # Store all the model numbers. 1599 models = [] 1600 for model_cont in self.model_loop(): 1601 if model_cont.num != model_num: 1602 models.append(model_cont.num) 1603 1604 # Delete all models. 1605 for model in models: 1606 self.delete(model) 1607 1608 # Renumber the remaining model. 1609 self.set_model(model_orig=model_num, model_new=model_to)

1610 1611

1612 - def connect_atom(self, mol_name=None, index1=None, index2=None):

1613 """Connect two atoms in the structural data object. 1614 1615 @keyword mol_name: The name of the molecule. 1616 @type mol_name: str 1617 @keyword index1: The global index of the first atom. 1618 @type index1: str 1619 @keyword index2: The global index of the first atom. 1620 @type index2: str 1621 """ 1622 1623 # Add the molecule, if it does not exist. 1624 if self.get_molecule(mol_name) == None: 1625 self.add_molecule(name=mol_name) 1626 1627 # Loop over each model. 1628 for model in self.structural_data: 1629 # Specific molecule. 1630 mol = self.get_molecule(mol_name) 1631 1632 # Add the atom. 1633 mol.atom_connect(index1=index1, index2=index2)

1634 1635

1636 - def delete(self, model=None, selection=None, verbosity=1):

1637 """Deletion of structural information. 1638 1639 @keyword model: Individual structural models from a loaded ensemble can be deleted by specifying the model number. 1640 @type model: None or int 1641 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 1642 @type selection: lib.structure.internal.Internal_selection instance 1643 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 1644 @type verbosity: int 1645 """ 1646 1647 # All data. 1648 if model == None and selection == None: 1649 # Printout. 1650 if verbosity: 1651 print("Deleting the following structural data:\n") 1652 print(self.structural_data) 1653 1654 # Delete the structural data. 1655 del self.structural_data 1656 1657 # Initialise the empty model list. 1658 self.structural_data = ModelList() 1659 1660 # Delete a whole model. 1661 elif selection == None: 1662 self.structural_data.delete_model(model_num=model) 1663 1664 # Atom subset deletion. 1665 else: 1666 # Loop over the atoms and find the indices of the atoms to delete. 1667 indices = {} 1668 for mol_index, i in selection.loop(): 1669 if mol_index not in indices: 1670 indices[mol_index] = [] 1671 indices[mol_index].append(i) 1672 for mol_index in indices: 1673 indices[mol_index].reverse() 1674 1675 # Loop over the models. 1676 del_res_nums = [] 1677 for model_cont in self.model_loop(): 1678 # Skip models. 1679 if model != None and model_cont.num == model: 1680 continue 1681 1682 # Loop over the molecules. 1683 for mol_index in indices: 1684 mol = model_cont.mol[mol_index] 1685 1686 # Generate a residue data dictionary for the metadata trimming (prior to atom deletion). 1687 res_data = self._residue_data(res_nums=mol.res_num, res_names=mol.res_name) 1688 1689 # Loop over the reverse indices and pop out the data. 1690 for i in indices[mol_index]: 1691 mol.atom_num.pop(i) 1692 mol.atom_name.pop(i) 1693 mol.bonded.pop(i) 1694 mol.chain_id.pop(i) 1695 mol.element.pop(i) 1696 mol.pdb_record.pop(i) 1697 mol.res_name.pop(i) 1698 res_num = mol.res_num.pop(i) 1699 mol.seg_id.pop(i) 1700 mol.x.pop(i) 1701 mol.y.pop(i) 1702 mol.z.pop(i) 1703 1704 # The residue no longer exists. 1705 if res_num not in mol.res_num and res_num not in del_res_nums: 1706 del_res_nums.append(res_num) 1707 1708 # Second atom of the bonded pair. 1709 for j in range(len(mol.bonded)): 1710 if i in mol.bonded[j]: 1711 mol.bonded[j].pop(mol.bonded[j].index(i)) 1712 1713 # Update the bonded lists, as the indices need to be shifted. 1714 for j in range(i, len(mol.bonded)): 1715 for k in range(len(mol.bonded[j])): 1716 mol.bonded[j][k] -= 1 1717 1718 # Reset the metadata if nothing remains. 1719 if mol.atom_num == []: 1720 if hasattr(mol, 'file_name'): 1721 del mol.file_name 1722 if hasattr(mol, 'file_path'): 1723 del mol.file_path 1724 if hasattr(mol, 'file_mol_num'): 1725 del mol.file_mol_num 1726 if hasattr(mol, 'file_model'): 1727 del mol.file_model 1728 1729 # Nothing more to do. 1730 if not len(del_res_nums): 1731 return 1732 if model != None and len(self.structural_data) > 1: 1733 return 1734 1735 # Fix the deleted residue number order. 1736 del_res_nums.reverse() 1737 1738 # Handle the helix metadata. 1739 if hasattr(self, 'helices'): 1740 del_helix_indices = [] 1741 for i in range(len(self.helices)): 1742 # Trim the helix. 1743 helix = self._trim_helix(helix=self.helices[i], trim_res_list=del_res_nums, res_data=res_data) 1744 1745 # Trimmed helix. 1746 if helix != None: 1747 self.helices[i] = helix 1748 1749 # No helix left. 1750 else: 1751 del_helix_indices.append(i) 1752 1753 # Loop over the reverse helix indices and pop out the data. 1754 del_helix_indices.reverse() 1755 for i in del_helix_indices: 1756 self.helices.pop(i) 1757 1758 # Handle the sheet metadata. 1759 if hasattr(self, 'sheets'): 1760 del_sheet_indices = [] 1761 for i in range(len(self.sheets)): 1762 # Trim the sheet. 1763 sheet = self._trim_sheet(sheet=self.sheets[i], trim_res_list=del_res_nums, res_data=res_data) 1764 1765 # Trimmed sheet. 1766 if sheet != None: 1767 self.sheets[i] = sheet 1768 1769 # No sheet left. 1770 else: 1771 del_sheet_indices.append(i) 1772 1773 # Loop over the reverse sheet indices and pop out the data. 1774 del_sheet_indices.reverse() 1775 for i in del_sheet_indices: 1776 self.sheets.pop(i)

1777 1778

1779 - def empty(self):

1780 """Report if the structural data structure is empty or not. 1781 1782 @return: True if empty, False otherwise. 1783 @rtype: bool 1784 """ 1785 1786 # Check the ModelList structure. 1787 if len(self.structural_data) == 0: 1788 return True 1789 else: 1790 return False

1791 1792

1793 - def from_xml(self, str_node, dir=None, file_version=1):

1794 """Recreate the structural object from the XML structural object node. 1795 1796 @param str_node: The structural object XML node. 1797 @type str_node: xml.dom.minicompat.Element instance 1798 @keyword dir: The name of the directory containing the results file. 1799 @type dir: str 1800 @keyword file_version: The relax XML version of the XML file. 1801 @type file_version: int 1802 """ 1803 1804 # Recreate all base objects (i.e. metadata). 1805 xml_to_object(str_node, self, file_version=file_version, blacklist=['model', 'displacements']) 1806 1807 # Recreate the model / molecule data structure. 1808 model_nodes = str_node.getElementsByTagName('model') 1809 self.structural_data.from_xml(model_nodes, file_version=file_version) 1810 1811 # The displacement structure. 1812 disp_nodes = str_node.getElementsByTagName('displacements') 1813 if len(disp_nodes): 1814 # Initialise the object. 1815 self.displacements = Displacements() 1816 1817 # Recreate the molecule data structures for the current model. 1818 self.displacements.from_xml(disp_nodes[0], file_version=file_version)

1819 1820

1821 - def get_model(self, model):

1822 """Return or create the model. 1823 1824 @param model: The model number. 1825 @type model: int or None 1826 @return: The ModelContainer corresponding to the model number or that newly created. 1827 @rtype: ModelContainer instance 1828 """ 1829 1830 # Check if the target is a single model. 1831 if model == None and self.num_models() > 1: 1832 raise RelaxError("The target model cannot be determined as there are %s models already present." % self.num_modes()) 1833 1834 # No model specified. 1835 if model == None: 1836 # Create the first model, if necessary. 1837 if self.num_models(): 1838 self.structural_data.add_item() 1839 1840 # Alias the first model. 1841 model_cont = self.structural_data[0] 1842 1843 # The model has been specified. 1844 else: 1845 # Get the preexisting model. 1846 found = False 1847 for model_cont in self.structural_data: 1848 if model_cont.num == model: 1849 found = True 1850 break 1851 1852 # Add the model if it doesn't exist. 1853 if not found: 1854 self.structural_data.add_item(model) 1855 model_cont = self.structural_data[-1] 1856 1857 # Return the container. 1858 return model_cont

1859 1860

1861 - def get_molecule(self, molecule, model=None):

1862 """Return the molecule. 1863 1864 Only one model can be specified. 1865 1866 1867 @param molecule: The molecule name. 1868 @type molecule: int or None 1869 @keyword model: The model number. 1870 @type model: int or None 1871 @raises RelaxError: If the model is not specified and there is more than one model loaded. 1872 @return: The MolContainer corresponding to the molecule name and model number. 1873 @rtype: MolContainer instance or None 1874 """ 1875 1876 # Check if the target is a single molecule. 1877 if model == None and self.num_models() > 1: 1878 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 1879 1880 # Check the model argument. 1881 if not isinstance(model, int) and not model == None: 1882 raise RelaxNoneIntError 1883 1884 # No models. 1885 if not len(self.structural_data): 1886 return 1887 1888 # Loop over the models. 1889 for model_cont in self.model_loop(model): 1890 # Loop over the molecules. 1891 for mol in model_cont.mol: 1892 # Return the matching molecule. 1893 if mol.mol_name == molecule: 1894 return mol

1895 1896

1897 - def has_molecule(self, model_num=None, name=None):

1898 """Check if the molecule name exists. 1899 1900 @keyword model_num: The optional model to check. If not supplied, the molecule will be searched for across all models. 1901 @type model_num: None or int 1902 @param name: The molecule name. 1903 @type name: str 1904 @return: True if the molecule exists, False otherwise. 1905 @rtype: bool 1906 """ 1907 1908 # No models. 1909 if not len(self.structural_data): 1910 return False 1911 1912 # Loop over the models. 1913 for model_cont in self.model_loop(model=model_num): 1914 # Loop over the molecules. 1915 for mol in model_cont.mol: 1916 # Matching molecule. 1917 if mol.mol_name == name: 1918 return True 1919 1920 # No match. 1921 return False

1922 1923

1924 - def load_gaussian(self, file_path, set_mol_name=None, set_model_num=None, verbosity=False):

1925 """Method for loading structures from a Gaussian log file. 1926 1927 @param file_path: The full path of the Gaussian log file. 1928 @type file_path: str 1929 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1930 @type set_mol_name: None, str, or list of str 1931 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the Gaussian log model numbers will be preserved, if they exist. 1932 @type set_model_num: None, int, or list of int 1933 @keyword verbosity: A flag which if True will cause messages to be printed. 1934 @type verbosity: bool 1935 @return: The status of the loading of the Gaussian log file. 1936 @rtype: bool 1937 """ 1938 1939 # Initial printout. 1940 if verbosity: 1941 print("\nInternal relax Gaussian log parser.") 1942 1943 # Test if the file exists. 1944 if not access(file_path, F_OK): 1945 # Exit indicating failure. 1946 return False 1947 1948 # Separate the file name and path. 1949 path, file_name = os.path.split(file_path) 1950 1951 # The absolute path. 1952 path_abs = abspath(curdir) + sep + path 1953 1954 # Set up the molecule name data structure. 1955 if set_mol_name: 1956 if not isinstance(set_mol_name, list): 1957 set_mol_name = [set_mol_name] 1958 else: 1959 set_mol_name = [file_root(file_name) + '_mol1'] 1960 1961 # Set up the model number data structure. 1962 if set_model_num: 1963 if not isinstance(set_model_num, list): 1964 set_model_num = [set_model_num] 1965 else: 1966 set_model_num = [1] 1967 1968 # Loop over all models in the Gaussian log file, doing nothing so the last model records are stored. 1969 for model_records in self._parse_models_gaussian(file_path, verbosity=verbosity): 1970 pass 1971 1972 # Generate the molecule container. 1973 mol = MolContainer() 1974 1975 # Fill the molecular data object. 1976 mol.fill_object_from_gaussian(model_records) 1977 1978 # Create the structural data data structures. 1979 self.pack_structs([[mol]], orig_model_num=[1], set_model_num=set_model_num, orig_mol_num=[[0]], set_mol_name=set_mol_name, file_name=file_name, file_path=path, file_path_abs=path_abs, verbosity=verbosity) 1980 1981 # Loading worked. 1982 return True

1983 1984

1985 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):

1986 """Method for loading structures from a PDB file. 1987 1988 @param file_path: The full path of the PDB file. 1989 @type file_path: str 1990 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1991 @type read_mol: None, int, or list of int 1992 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1993 @type set_mol_name: None, str, or list of str 1994 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 1995 @type read_model: None, int, or list of int 1996 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 1997 @type set_model_num: None, int, or list of int 1998 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 1999 @type alt_loc: str or None 2000 @keyword verbosity: A flag which if True will cause messages to be printed. 2001 @type verbosity: bool 2002 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 2003 @type merge: bool 2004 @return: The status of the loading of the PDB file. 2005 @rtype: bool 2006 """ 2007 2008 # Initial printout. 2009 if verbosity: 2010 print("\nInternal relax PDB parser.") 2011 2012 # Test if the file exists. 2013 if not access(file_path, F_OK): 2014 # Exit indicating failure. 2015 return False 2016 2017 # Separate the file name and path. 2018 path, file = os.path.split(file_path) 2019 2020 # The absolute path. 2021 path_abs = abspath(curdir) + sep + path 2022 2023 # Convert the structure reading args into lists. 2024 if read_mol and not isinstance(read_mol, list): 2025 read_mol = [read_mol] 2026 if set_mol_name and not isinstance(set_mol_name, list): 2027 set_mol_name = [set_mol_name] 2028 if read_mol: 2029 set_mol_name *= len(read_mol) 2030 if read_model and not isinstance(read_model, list): 2031 read_model = [read_model] 2032 if set_model_num and not isinstance(set_model_num, list): 2033 set_model_num = [set_model_num] 2034 if read_model: 2035 set_model_num *= len(read_model) 2036 2037 # Open the PDB file. 2038 pdb_file = open_read_file(file_path, verbosity=verbosity) 2039 pdb_lines = pdb_file.readlines() 2040 pdb_file.close() 2041 2042 # Check for empty files. 2043 if pdb_lines == []: 2044 raise RelaxError("The PDB file is empty.") 2045 2046 # Pre-process the lines, fixing PDB violations. 2047 pdb_lines = self._validate_records(pdb_lines) 2048 2049 # Secondary structure data. 2050 helices = [] 2051 sheets = [] 2052 2053 # Process the different sections. 2054 pdb_lines = self._parse_pdb_title(pdb_lines) 2055 pdb_lines = self._parse_pdb_prim_struct(pdb_lines) 2056 pdb_lines = self._parse_pdb_hetrogen(pdb_lines) 2057 pdb_lines = self._parse_pdb_ss(pdb_lines, read_mol=read_mol, helices=helices, sheets=sheets) 2058 pdb_lines = self._parse_pdb_connectivity_annotation(pdb_lines) 2059 pdb_lines = self._parse_pdb_misc(pdb_lines) 2060 pdb_lines = self._parse_pdb_transform(pdb_lines) 2061 2062 # Loop over all models in the PDB file. 2063 model_index = 0 2064 orig_model_num = [] 2065 mol_conts = [] 2066 orig_mol_num = [] 2067 for model_num, model_records in self._parse_pdb_coord(pdb_lines): 2068 # Only load the desired model. 2069 if read_model and model_num not in read_model: 2070 continue 2071 2072 # Store the original model number. 2073 orig_model_num.append(model_num) 2074 2075 # Loop over the molecules of the model. 2076 mol_conts.append([]) 2077 orig_mol_num.append([]) 2078 mol_index = 0 2079 new_mol_name = [] 2080 for mol_num, mol_records in self._parse_mols_pdb(model_records): 2081 # Only load the desired model. 2082 if read_mol and mol_num not in read_mol: 2083 continue 2084 2085 # Set the target molecule name. 2086 if set_mol_name: 2087 # Single name. 2088 if len(set_mol_name) == 1: 2089 new_mol_name.append(set_mol_name[0]) 2090 2091 # Multiple names. 2092 else: 2093 new_mol_name.append(set_mol_name[mol_index]) 2094 2095 # Auto-generated molecule name. 2096 else: 2097 # Number of structures already present for the model. 2098 num_struct = 0 2099 if self.structural_data != None and len(self.structural_data) and (not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[0].num)): 2100 num_struct = len(self.structural_data[0].mol) 2101 2102 # Set the name to the file name plus the structure number. 2103 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct)) 2104 2105 # Generate the molecule container. 2106 mol = MolContainer() 2107 2108 # Fill the molecular data object. 2109 mol.fill_object_from_pdb(mol_records, alt_loc_select=alt_loc) 2110 2111 # Store the molecule container. 2112 mol_conts[model_index].append(mol) 2113 2114 # Store the original molecule number. 2115 orig_mol_num[model_index].append(mol_num) 2116 2117 # Increment the molecule index. 2118 mol_index = mol_index + 1 2119 2120 # Increment the model index. 2121 model_index = model_index + 1 2122 2123 # No data, so throw a warning and exit. 2124 if not len(mol_conts): 2125 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path)) 2126 return False 2127 2128 # Create the structural data data structures. 2129 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, merge=merge, helices=helices, sheets=sheets, verbosity=verbosity) 2130 2131 # Loading worked. 2132 return True

2133 2134

2135 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

2136 """Method for loading structures from a XYZ file. 2137 2138 @param file_path: The full path of the XYZ file. 2139 @type file_path: str 2140 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 2141 @type read_mol: None, int, or list of int 2142 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 2143 @type set_mol_name: None, str, or list of str 2144 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models will be loaded. 2145 @type read_model: None, int, or list of int 2146 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the XYZ model numbers will be preserved, if they exist. 2147 @type set_model_num: None, int, or list of int 2148 @keyword verbosity: A flag which if True will cause messages to be printed. 2149 @type verbosity: bool 2150 @return: The status of the loading of the XYZ file. 2151 @rtype: bool 2152 """ 2153 2154 # Initial printout. 2155 if verbosity: 2156 print("\nInternal relax XYZ parser.") 2157 2158 # Test if the file exists. 2159 if not access(file_path, F_OK): 2160 # Exit indicating failure. 2161 return False 2162 2163 # Separate the file name and path. 2164 path, file = os.path.split(file_path) 2165 2166 # The absolute path. 2167 path_abs = abspath(curdir) + sep + path 2168 2169 # Convert the structure reading args into lists. 2170 if read_mol and not isinstance(read_mol, list): 2171 read_mol = [read_mol] 2172 if set_mol_name and not isinstance(set_mol_name, list): 2173 set_mol_name = [set_mol_name] 2174 if read_model and not isinstance(read_model, list): 2175 read_model = [read_model] 2176 if set_model_num and not isinstance(set_model_num, list): 2177 set_model_num = [set_model_num] 2178 2179 # Loop over all models in the XYZ file. 2180 mol_index = 0 2181 model_index = 0 2182 xyz_model_increment = 0 2183 orig_model_num = [] 2184 mol_conts = [] 2185 orig_mol_num = [] 2186 new_mol_name = [] 2187 for model_records in self._parse_models_xyz(file_path, verbosity=verbosity): 2188 # Increment the xyz_model_increment 2189 xyz_model_increment = xyz_model_increment +1 2190 2191 # Only load the desired model. 2192 if read_model and xyz_model_increment not in read_model: 2193 continue 2194 2195 # Store the original model number. 2196 orig_model_num.append(model_index) 2197 2198 # Loop over the molecules of the model. 2199 if read_mol and mol_index not in read_mol: 2200 continue 2201 2202 # Set the target molecule name. 2203 if set_mol_name: 2204 new_mol_name.append(set_mol_name[mol_index]) 2205 else: 2206 if mol_index == 0: 2207 #Set the name to the file name plus the structure number. 2208 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1)) 2209 2210 # Store the original mol number. 2211 orig_mol_num.append(mol_index) 2212 2213 # Generate the molecule container. 2214 mol = MolContainer() 2215 2216 # Fill the molecular data object. 2217 mol.fill_object_from_xyz(model_records) 2218 2219 # Store the molecule container. 2220 mol_conts.append([]) 2221 mol_conts[model_index].append(mol) 2222 2223 # Increment the molecule index. 2224 mol_index = mol_index + 1 2225 2226 # Increment the model index. 2227 model_index = model_index + 1 2228 2229 # Create the structural data data structures. 2230 orig_mol_num = [[0]] 2231 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, verbosity=verbosity) 2232 2233 # Loading worked. 2234 return True

2235 2236

2237 - def mean(self):

2238 """Calculate the mean structure from all models in the structural data object.""" 2239 2240 # Create a new model for the mean structure. 2241 num = self.num_models() 2242 self.add_model() 2243 mean_model = self.structural_data[-1] 2244 2245 # The selection object. 2246 selection = self.selection() 2247 2248 # Loop over the molecules and atoms. 2249 for mol_index, i in selection.loop(): 2250 # Set the mean structure coordinate to zero. 2251 mean_model.mol[mol_index].x[i] = 0.0 2252 mean_model.mol[mol_index].y[i] = 0.0 2253 mean_model.mol[mol_index].z[i] = 0.0 2254 2255 # Loop over the models and sum the coordinates. 2256 for model_index in range(num): 2257 model_cont = self.structural_data[model_index] 2258 mean_model.mol[mol_index].x[i] += model_cont.mol[mol_index].x[i] 2259 mean_model.mol[mol_index].y[i] += model_cont.mol[mol_index].y[i] 2260 mean_model.mol[mol_index].z[i] += model_cont.mol[mol_index].z[i] 2261 2262 # Averages. 2263 mean_model.mol[mol_index].x[i] /= num 2264 mean_model.mol[mol_index].y[i] /= num 2265 mean_model.mol[mol_index].z[i] /= num 2266 2267 # Delete all models but the mean. 2268 for model_index in reversed(list(range(num))): 2269 self.delete(model=self.structural_data[model_index].num)

2270 2271

2272 - def model_list(self):

2273 """Create a list of all models. 2274 2275 @return: The list of all models. 2276 @rtype: list of int 2277 """ 2278 2279 # Assemble the list. 2280 models = [] 2281 for model in self.model_loop(): 2282 models.append(model.num) 2283 2284 # Return the list. 2285 return models

2286 2287

2288 - def model_loop(self, model=None):

2289 """Generator method for looping over the models in numerical order. 2290 2291 @keyword model: Limit the loop to a single number. 2292 @type model: int 2293 @return: The model structural object. 2294 @rtype: ModelContainer container 2295 """ 2296 2297 # A single model. 2298 if model: 2299 for i in range(len(self.structural_data)): 2300 if self.structural_data[i].num == model: 2301 yield self.structural_data[i] 2302 2303 # All models. 2304 else: 2305 # The models. 2306 model_nums = [] 2307 for i in range(len(self.structural_data)): 2308 if self.structural_data[i].num != None: 2309 model_nums.append(self.structural_data[i].num) 2310 2311 # Sort. 2312 if model_nums: 2313 model_nums.sort() 2314 2315 # Loop over the models in order. 2316 for model_num in model_nums: 2317 # Find the model. 2318 for i in range(len(self.structural_data)): 2319 # Yield the model. 2320 if self.structural_data[i].num == model_num: 2321 yield self.structural_data[i] 2322 2323 # No models, so just yield the single container. 2324 if not model_nums: 2325 yield self.structural_data[0]

2326 2327

2328 - def num_models(self):

2329 """Method for returning the number of models. 2330 2331 @return: The number of models in the structural object. 2332 @rtype: int 2333 """ 2334 2335 return len(self.structural_data)

2336 2337

2338 - def num_molecules(self):

2339 """Method for returning the number of molecules. 2340 2341 @return: The number of molecules in the structural object. 2342 @rtype: int 2343 """ 2344 2345 # No data. 2346 if self.empty(): 2347 return 0 2348 2349 # Validate the structural object. 2350 self.validate() 2351 2352 # Return the number. 2353 return len(self.structural_data[0].mol)

2354 2355

2356 - def one_letter_codes(self, mol_name=None, selection=None):

2357 """Generate and return the one letter code sequence for the given molecule. 2358 2359 @keyword mol_name: The name of the molecule to return the one letter codes for. 2360 @type mol_name: str 2361 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2362 @type selection: lib.structure.internal.Internal_selection instance 2363 @return: The one letter code sequence for the given molecule. 2364 @rtype: str 2365 """ 2366 2367 # Initialise. 2368 codes = '' 2369 2370 # Use the first model. 2371 model = self.structural_data[0] 2372 2373 # Residue numbers. 2374 res_nums = [] 2375 2376 # Loop over the molecules. 2377 for mol_index, i in selection.loop(): 2378 # Alias. 2379 mol = model.mol[mol_index] 2380 2381 # Skip non-matching molecules. 2382 if mol_name and mol_name != mol.mol_name: 2383 continue 2384 2385 # Not a new residue. 2386 if mol.res_num[i] in res_nums: 2387 continue 2388 2389 # Convert to the one letter code and store the residue number. 2390 codes += aa_codes_three_to_one(mol.res_name[i]) 2391 res_nums.append(mol.res_num[i]) 2392 2393 # Return the codes. 2394 return codes

2395 2396

2397 - def pack_structs(self, data_matrix, orig_model_num=None, set_model_num=None, orig_mol_num=None, set_mol_name=None, file_name=None, file_path=None, file_path_abs=None, helices=None, sheets=None, verbosity=1, merge=False):

2398 """From the given structural data, expand the structural data data structure. 2399 2400 @param data_matrix: A matrix of structural objects. The first dimension is the models, the second is the molecules. 2401 @type data_matrix: list of lists of structural objects 2402 @keyword orig_model_num: The original model numbers (for storage). 2403 @type orig_model_num: list of int 2404 @keyword set_model_num: The new model numbers (for model renumbering). 2405 @type set_model_num: list of int 2406 @keyword orig_mol_num: The original molecule numbers (for storage). The dimensions match the data matrix. 2407 @type orig_mol_num: list of list of int 2408 @keyword set_mol_name: The new molecule names. 2409 @type set_mol_name: list of str 2410 @keyword file_name: The name of the file from which the molecular data has been extracted. 2411 @type file_name: None or str 2412 @keyword file_path: The full path to the file specified by 'file_name'. 2413 @type file_path: None or str 2414 @keyword file_path_abs: The absolute path to the file specified by 'file_name'. This is a fallback mechanism in case results or save files are located somewhere other than the working directory. 2415 @type file_path_abs: None or str 2416 @keyword helices: The helix secondary structure information to store. The first dimension corresponds to each helix element. The second dimension consists of the helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, and length. 2417 @type helices: list of lists 2418 @keyword sheets: The sheet secondary structure information to store. 2419 @type sheets: list of lists 2420 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 2421 @type verbosity: int 2422 @keyword merge: A flag which if set to True will try to merge the structure into the currently loaded structures. 2423 @type merge: bool 2424 """ 2425 2426 # Test the number of models. 2427 if len(orig_model_num) != len(data_matrix): 2428 raise RelaxError("Structural data mismatch, %s original models verses %s in the structural data." % (len(orig_model_num), len(data_matrix))) 2429 2430 # Test the number of molecules. 2431 for i in range(len(data_matrix)): 2432 if len(orig_mol_num[i]) != len(data_matrix[i]): 2433 raise RelaxError("Structural data mismatch, %s original molecules verses %s in the structural data." % (len(orig_mol_num[i]), len(data_matrix[i]))) 2434 2435 # Model numbers do not change. 2436 if not set_model_num: 2437 set_model_num = orig_model_num 2438 2439 # Test the model mapping. 2440 if len(set_model_num) != len(data_matrix): 2441 raise RelaxError("Failure of the mapping of new model numbers, %s new model numbers verses %s models in the structural data." % (len(set_model_num), len(data_matrix))) 2442 2443 # Test the molecule mapping. 2444 if len(set_mol_name) != len(data_matrix[0]): 2445 raise RelaxError("Failure of the mapping of new molecule names, %s new molecule names verses %s molecules in the structural data." % (len(set_mol_name), len(data_matrix[0]))) 2446 2447 # Test that the target models and structures are absent, and get the already existing model numbers. 2448 for i in range(len(set_model_num)): 2449 # Merging flag is set, so skip the checks. 2450 if merge: 2451 continue 2452 2453 # A new model, so no need to check. 2454 if not set_model_num[i] in self.structural_data.current_models: 2455 continue 2456 2457 # Loop over the structures. 2458 index = self.structural_data.current_models.index(set_model_num[i]) 2459 for j in range(len(self.structural_data[index].mol)): 2460 if self.structural_data[index].mol[j].mol_name in set_mol_name: 2461 raise RelaxError("The molecule '%s' of model %s already exists." % (self.structural_data[i].mol[j].mol_name, self.structural_data[i].num)) 2462 2463 # Loop over the models. 2464 store_mol_num_orig = [] 2465 store_mol_num_new = [] 2466 for i in range(len(set_model_num)): 2467 store_mol_num_orig.append([]) 2468 store_mol_num_new.append([]) 2469 2470 # The model doesn't currently exist. 2471 if set_model_num[i] not in self.structural_data.current_models: 2472 # Create the model. 2473 self.structural_data.add_item(set_model_num[i]) 2474 2475 # Get the model. 2476 model = self.structural_data[-1] 2477 2478 # Otherwise get the pre-existing model. 2479 else: 2480 model = self.structural_data[self.structural_data.current_models.index(set_model_num[i])] 2481 2482 # Loop over the molecules. 2483 for j in range(len(set_mol_name)): 2484 # Override the merge argument if the molecule does not exist. 2485 found = False 2486 merge_new = True 2487 for k in range(len(model.mol)): 2488 if model.mol[k].mol_name == set_mol_name[j]: 2489 found = True 2490 if not found: 2491 merge_new = False 2492 2493 # Printout. 2494 if verbosity: 2495 # Model text formatting. 2496 orig_model_text = '' 2497 if orig_model_num[i] != None: 2498 orig_model_text = " of model %s" % orig_model_num[i] 2499 new_model_text = '' 2500 if set_model_num[i] != None: 2501 if merge_new: 2502 new_model_text += ' of' 2503 else: 2504 new_model_text += ' to' 2505 new_model_text += ' model %s' % set_model_num[i] 2506 2507 # The full text. 2508 if merge_new: 2509 print("Merging with molecule '%s'%s (from the original molecule number %s%s)." % (set_mol_name[j], new_model_text, orig_mol_num[i][j], orig_model_text)) 2510 else: 2511 print("Adding molecule '%s'%s (from the original molecule number %s%s)." % (set_mol_name[j], new_model_text, orig_mol_num[i][j], orig_model_text)) 2512 2513 # The index of the new molecule to add or merge. 2514 index = len(model.mol) 2515 if merge_new: 2516 index -= 1 2517 2518 # Store the index+1 as the new molecule number, and store the original molecule number. 2519 store_mol_num_new[i].append(index+1) 2520 store_mol_num_orig[i].append(orig_mol_num[i][j]) 2521 2522 # Consistency check. 2523 if model.num != self.structural_data[0].num and self.structural_data[0].mol[index].mol_name != set_mol_name[j]: 2524 raise RelaxError("The new molecule name of '%s' in model %s does not match the corresponding molecule's name of '%s' in model %s." % (set_mol_name[j], set_model_num[i], self.structural_data[0].mol[index].mol_name, self.structural_data[0].num)) 2525 2526 # Pack the structures. 2527 if merge_new: 2528 mol = model.mol.merge_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2529 else: 2530 mol = model.mol.add_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2531 2532 # Set the molecule name and store the structure file info. 2533 mol.mol_name = set_mol_name[j] 2534 mol.file_name = file_name 2535 mol.file_path = file_path 2536 mol.file_path_abs = file_path_abs 2537 mol.file_mol_num = orig_mol_num[i][j] 2538 mol.file_model = orig_model_num[i] 2539 2540 # Sort the structural data if a merge occurred. 2541 mol._sort() 2542 2543 # Consistency checks. 2544 for model_index in range(len(store_mol_num_new)): 2545 if len(store_mol_num_new[model_index]) != len(store_mol_num_orig[model_index]): 2546 raise RelaxFault 2547 2548 # The helix secondary structure information. 2549 if helices != None: 2550 # Initialise the data structure if required. 2551 if not hasattr(self, 'helices'): 2552 self.helices = [] 2553 2554 # Loop over each helix. 2555 for i in range(len(helices)): 2556 # Append the data. 2557 self.helices.append(helices[i]) 2558 2559 # The initial molecule index remapping. 2560 mol_init_index = store_mol_num_new[0][store_mol_num_orig[0].index(helices[i][1]+1)] - 1 2561 self.helices[-1][1] = mol_init_index 2562 2563 # The end molecule index remapping. 2564 mol_end_index = store_mol_num_new[0][store_mol_num_orig[0].index(helices[i][4]+1)] - 1 2565 self.helices[-1][4] = mol_end_index 2566 2567 # The sheet secondary structure information. 2568 if sheets != None: 2569 # Initialise the data structure if required. 2570 if not hasattr(self, 'sheets'): 2571 self.sheets = [] 2572 2573 # Loop over each sheet. 2574 for i in range(len(sheets)): 2575 # Append the data. 2576 self.sheets.append(sheets[i]) 2577 2578 # The initial molecule index remappings. 2579 mol_init_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][4]+1)] - 1 2580 self.sheets[-1][4] = mol_init_index 2581 2582 2583 # The end molecule index remapping. 2584 mol_end_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][8]+1)] - 1 2585 self.sheets[-1][8] = mol_end_index 2586 2587 # The current molecule index remapping. 2588 if sheets[i][14] != None: 2589 mol_cur_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][14]+1)] - 1 2590 self.sheets[-1][14] = mol_cur_index 2591 2592 # The previous molecule index remapping. 2593 if sheets[i][19] != None: 2594 mol_prev_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][19]+1)] - 1 2595 self.sheets[-1][19] = mol_prev_index

2596 2597

2598 - def rotate(self, R=None, origin=None, model=None, selection=None):

2599 """Rotate the structural information about the given origin. 2600 2601 @keyword R: The forwards rotation matrix. 2602 @type R: numpy 3D, rank-2 array 2603 @keyword origin: The origin of the rotation. 2604 @type origin: numpy 3D, rank-1 array 2605 @keyword model: The model to rotate. If None, all models will be rotated. 2606 @type model: int 2607 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2608 @type selection: lib.structure.internal.Internal_selection instance 2609 """ 2610 2611 # Loop over the models. 2612 for model_cont in self.model_loop(model): 2613 # Loop over all molecules and atoms in the selection. 2614 for mol_index, i in selection.loop(): 2615 mol = model_cont.mol[mol_index] 2616 2617 # The origin to atom vector. 2618 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin 2619 2620 # Rotation. 2621 rot_vect = dot(R, vect) 2622 2623 # The new position. 2624 pos = rot_vect + origin 2625 mol.x[i] = pos[0] 2626 mol.y[i] = pos[1] 2627 mol.z[i] = pos[2]

2628 2629

2630 - def selection(self, atom_id=None, inv=False):

2631 """Convert the atom ID string into a special internal selection object for speed. 2632 2633 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2634 @type atom_id: str or None 2635 @keyword inv: A flag which if True will cause the selection to be inverted. 2636 @type inv: bool 2637 @return: The internal structural selection object. 2638 @rtype: Internal_selection instance 2639 """ 2640 2641 # Initialise the internal structural selection object. 2642 selection = Internal_selection() 2643 2644 # Split up the atom ID, to see if a molecule has been specified. 2645 mol_token, res_token, spin_token = tokenise(atom_id) 2646 2647 # Generate the atom ID selection object. 2648 sel_obj = None 2649 if atom_id: 2650 sel_obj = Selection(atom_id) 2651 2652 # Validate the models. 2653 self.validate_models(verbosity=0) 2654 2655 # Obtain all data from the first model (except the position data). 2656 model = self.structural_data[0] 2657 2658 # Loop over the molecules. 2659 for mol_index in range(len(model.mol)): 2660 mol = model.mol[mol_index] 2661 2662 # Skip non-matching molecules. 2663 if not inv: 2664 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2665 continue 2666 2667 # Skip matching molecules. 2668 else: 2669 if (not sel_obj) or (mol_token != None and sel_obj.contains_mol(mol.mol_name)): 2670 continue 2671 2672 # Add the molecule index. 2673 selection.add_mol(mol_index=mol_index) 2674 2675 # Loop over the atoms. 2676 for i in range(len(mol.atom_num)): 2677 # Skip non-matching atoms. 2678 if not inv: 2679 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2680 continue 2681 2682 # Skip matching atoms. 2683 else: 2684 if (not sel_obj) or sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2685 continue 2686 2687 # Add the atom index. 2688 selection.add_atom(mol_index=mol_index, atom_index=i) 2689 2690 # Return the object. 2691 return selection

2692 2693

2694 - def set_model(self, model_orig=None, model_new=None):

2695 """Set or reset the model number. 2696 @keyword model_orig: The original model number. Leave as None if no models are currently present. 2697 @type model_orig: None or int 2698 @keyword model_new: The new model number to set the model to. 2699 @type model_new: int 2700 """ 2701 2702 # Check. 2703 if model_orig == None and self.num_models() != 1: 2704 raise RelaxError("If the original model number is not supplied, only one model in the current structural object is allowed, but %s were found." % self.num_models()) 2705 2706 # Set the single model number. 2707 if model_orig == None: 2708 self.structural_data[0].num = model_new 2709 return 2710 2711 # Find the model and set the number. 2712 set = False 2713 for i in range(len(self.structural_data)): 2714 if model_orig == self.structural_data[i].num: 2715 self.structural_data[i].num = model_new 2716 set = True 2717 2718 # Sanity check. 2719 if not set: 2720 raise RelaxError("The original model number %s could not be found in the structural object." % model_orig)

2721 2722

2723 - def target_mol_name(self, set=None, target=None, index=None, mol_num=None, file=None):

2724 """Add the new molecule name to the target data structure. 2725 2726 @keyword set: The list of new molecule names. If not supplied, the names will be generated from the file name. 2727 @type set: None or list of str 2728 @keyword target: The target molecule name data structure to which the new name will be appended. 2729 @type target: list 2730 @keyword index: The molecule index, matching the set argument. 2731 @type index: int 2732 @keyword mol_num: The molecule number. 2733 @type mol_num: int 2734 @keyword file: The name of the file, excluding all directories. 2735 @type file: str 2736 """ 2737 2738 # Set the target molecule name. 2739 if set: 2740 target.append(set[index]) 2741 else: 2742 # Set the name to the file name plus the structure number. 2743 target.append(file_root(file) + '_mol' + repr(mol_num))

2744 2745

2746 - def translate(self, T=None, model=None, selection=None):

2747 """Displace the structural information by the given translation vector. 2748 2749 @keyword T: The translation vector. 2750 @type T: numpy 3D, rank-1 array 2751 @keyword model: The model to rotate. If None, all models will be rotated. 2752 @type model: int 2753 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2754 @type selection: lib.structure.internal.Internal_selection instance 2755 """ 2756 2757 # Loop over the models. 2758 for model_cont in self.model_loop(model): 2759 # Loop over all molecules and atoms in the selection. 2760 for mol_index, i in selection.loop(): 2761 mol = model_cont.mol[mol_index] 2762 2763 # Translate. 2764 mol.x[i] = mol.x[i] + T[0] 2765 mol.y[i] = mol.y[i] + T[1] 2766 mol.z[i] = mol.z[i] + T[2]

2767 2768

2769 - def to_xml(self, doc, element):

2770 """Prototype method for converting the structural object to an XML representation. 2771 2772 @param doc: The XML document object. 2773 @type doc: xml.dom.minidom.Document instance 2774 @param element: The element to add the alignment tensors XML element to. 2775 @type element: XML element object 2776 """ 2777 2778 # Create the structural element and add it to the higher level element. 2779 str_element = doc.createElement('structure') 2780 element.appendChild(str_element) 2781 2782 # Set the structural attributes. 2783 str_element.setAttribute('desc', 'Structural information') 2784 2785 # No contents to store, so pack up the structural containers. 2786 if not self.structural_data.is_empty(): 2787 self.structural_data.to_xml(doc, str_element) 2788 2789 # The structural metadata. 2790 metadata = ['helices', 'sheets'] 2791 for name in metadata: 2792 # The metadata does not exist. 2793 if not hasattr(self, name): 2794 continue 2795 2796 # Get the object. 2797 obj = getattr(self, name) 2798 2799 # Create a new element for this object, and add it to the main element. 2800 sub_elem = doc.createElement(name) 2801 str_element.appendChild(sub_elem) 2802 2803 # Add the value to the sub element. 2804 object_to_xml(doc, sub_elem, value=obj) 2805 2806 # The displacement structure. 2807 if hasattr(self, 'displacements'): 2808 # Create an XML element. 2809 disp_element = doc.createElement('displacements') 2810 str_element.appendChild(disp_element) 2811 2812 # Set the attributes. 2813 disp_element.setAttribute('desc', 'The rotational and translational displacements between models') 2814 2815 # Add the displacement data. 2816 self.displacements.to_xml(doc, disp_element)

2817 2818

2819 - def validate_models(self, verbosity=1):

2820 """Check that the models are consistent with each other. 2821 2822 This checks that the primary structure is identical between the models. 2823 2824 2825 @keyword verbosity: If 0, then all printouts will be silenced. 2826 @type verbosity: int 2827 """ 2828 2829 # Print out. 2830 if verbosity: 2831 print("Validating models:") 2832 2833 # Loop over the models. 2834 for i in range(len(self.structural_data)): 2835 # Check the molecules. 2836 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol): 2837 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol))) 2838 2839 # Loop over the molecules. 2840 for j in range(len(self.structural_data[i].mol)): 2841 # Alias the molecules. 2842 mol = self.structural_data[i].mol[j] 2843 mol_ref = self.structural_data[0].mol[j] 2844 2845 # Check the names. 2846 if mol.mol_name != mol_ref.mol_name: 2847 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name)) 2848 2849 # Loop over the atoms. 2850 for k in range(len(mol.atom_name)): 2851 # Check all data. 2852 same = True 2853 if mol.chain_id[k] != mol_ref.chain_id[k]: 2854 same = False 2855 elif mol.atom_num[k] != mol_ref.atom_num[k]: 2856 same = False 2857 elif mol.atom_name[k] != mol_ref.atom_name[k]: 2858 same = False 2859 elif mol.res_name[k] != mol_ref.res_name[k]: 2860 same = False 2861 elif mol.res_num[k] != mol_ref.res_num[k]: 2862 same = False 2863 elif mol.seg_id[k] != mol_ref.seg_id[k]: 2864 same = False 2865 elif mol.element[k] != mol_ref.element[k]: 2866 same = False 2867 2868 # Failure. 2869 if not same: 2870 print("%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '')) 2871 print("%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '')) 2872 raise RelaxError("The atoms of model %s do not match the first model." % self.structural_data[i].num) 2873 2874 # Final printout. 2875 if verbosity: 2876 print("\tAll models are consistent")

2877 2878

2879 - def write_pdb(self, file, model_num=None):

2880 """Method for the creation of a PDB file from the structural data. 2881 2882 A number of PDB records including HET, HETNAM, FORMUL, HELIX, SHEET, HETATM, TER, CONECT, MASTER, and END are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data structure 'het_data' is created. It is an array of arrays where the first dimension corresponds to a different residue and the second dimension has the elements: 2883 2884 0. Residue number. 2885 1. Residue name. 2886 2. Chain ID. 2887 3. Total number of atoms in the residue. 2888 4. Number of H atoms in the residue. 2889 5. Number of C atoms in the residue. 2890 2891 2892 @param file: The PDB file object. This object must be writable. 2893 @type file: file object 2894 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 2895 @type model_num: None or int 2896 """ 2897 2898 # Validate the structural data. 2899 self.validate() 2900 2901 # Print out. 2902 print("\nCreating the PDB records\n") 2903 2904 # Write some initial remarks. 2905 print("REMARK") 2906 pdb_write.remark(file, num=4, remark="This file complies with format v. 3.30, Jul-2011.") 2907 pdb_write.remark(file, num=40, remark=None) 2908 pdb_write.remark(file, num=40, remark="Created using relax (http://www.nmr-relax.com).") 2909 pdb_write.remark(file, num=40, remark=None) 2910 if RELAX_VERSION: 2911 pdb_write.remark(file, num=40, remark="relax version %s." % RELAX_VERSION) 2912 pdb_write.remark(file, num=40, remark="Created on %s." % asctime()) 2913 num_remark = 2 2914 2915 # Determine if model records will be created. 2916 model_records = False 2917 for model in self.model_loop(): 2918 if hasattr(model, 'num') and model.num != None: 2919 model_records = True 2920 2921 2922 #################### 2923 # Hetrogen section # 2924 #################### 2925 2926 # Initialise the hetrogen info array. 2927 het_data = [] 2928 het_data_coll = [] 2929 2930 # Loop over the molecules of the first model. 2931 index = 0 2932 for mol in self.structural_data[0].mol_loop(): 2933 # Check the validity of the data. 2934 self._validate_data_arrays(mol) 2935 2936 # Append an empty array for this molecule. 2937 het_data.append([]) 2938 2939 # Collect the non-standard residue info. 2940 for i in range(len(mol.atom_name)): 2941 # Skip non-HETATM records and HETATM records with no residue info. 2942 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None: 2943 continue 2944 2945 # Skip waters. 2946 if mol.res_name[i] == 'HOH': 2947 continue 2948 2949 # If the residue is not already stored initialise a new het_data element. 2950 # (residue number, residue name, chain ID, number of atoms, atom count array). 2951 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]: 2952 het_data[index].append([mol.res_num[i], mol.res_name[i], CHAIN_ID_LIST[index], 0, []]) 2953 2954 # Catch missing chain_ids. 2955 if het_data[index][-1][2] == None: 2956 het_data[index][-1][2] = '' 2957 2958 # Total atom count. 2959 het_data[index][-1][3] = het_data[index][-1][3] + 1 2960 2961 # Find if the atom has already a count entry. 2962 entry = False 2963 for j in range(len(het_data[index][-1][4])): 2964 if mol.element[i] == het_data[index][-1][4][j][0]: 2965 entry = True 2966 2967 # Create a new specific atom count entry. 2968 if not entry: 2969 het_data[index][-1][4].append([mol.element[i], 0]) 2970 2971 # Increment the specific atom count. 2972 for j in range(len(het_data[index][-1][4])): 2973 if mol.element[i] == het_data[index][-1][4][j][0]: 2974 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1 2975 2976 # Create the collective hetrogen info data structure. 2977 for i in range(len(het_data[index])): 2978 # Find the entry in the collective structure. 2979 found = False 2980 for j in range(len(het_data_coll)): 2981 # Different residue numbers. 2982 if het_data[index][i][0] != het_data_coll[j][0]: 2983 continue 2984 2985 # Different chain IDs. 2986 if het_data_coll[j][2] != het_data[index][i][2]: 2987 continue 2988 2989 # Change the flag. 2990 found = True 2991 2992 # If there is no match, add the new residue to the collective. 2993 if not found: 2994 het_data_coll.append(het_data[index][i]) 2995 2996 # Increment the molecule index. 2997 index = index + 1 2998 2999 3000 # The HET records. 3001 ################## 3002 3003 # Print out. 3004 print("HET") 3005 3006 # Write the HET records. 3007 for het in het_data_coll: 3008 pdb_write.het(file, het_id=het[1], chain_id=het[2], seq_num=het[0], num_het_atoms=het[3]) 3009 3010 3011 # The HETNAM records. 3012 ##################### 3013 3014 # Print out. 3015 print("HETNAM") 3016 3017 # Loop over the non-standard residues. 3018 residues = [] 3019 for het in het_data_coll: 3020 # Test if the residue HETNAM record as already been written (otherwise store its name). 3021 if het[1] in residues: 3022 continue 3023 else: 3024 residues.append(het[1]) 3025 3026 # Get the chemical name. 3027 chemical_name = self._get_chemical_name(het[1]) 3028 if not chemical_name: 3029 chemical_name = 'Unknown' 3030 3031 # Write the HETNAM records. 3032 pdb_write.hetnam(file, het_id=het[1], text=chemical_name) 3033 3034 3035 # The FORMUL records. 3036 ##################### 3037 3038 # Print out. 3039 print("FORMUL") 3040 3041 # Loop over the non-standard residues and generate and write the chemical formula. 3042 residues = [] 3043 for i in range(len(het_data_coll)): 3044 # Alias. 3045 het = het_data_coll[i] 3046 3047 # Test if the residue HETNAM record as already been written (otherwise store its name). 3048 if het[1] in residues: 3049 continue 3050 else: 3051 residues.append(het[1]) 3052 3053 # Initialise the chemical formula. 3054 formula = '' 3055 3056 # Loop over the atoms. 3057 for atom_count in het[4]: 3058 formula = formula + atom_count[0] + repr(atom_count[1]) 3059 3060 # The FORMUL record (chemical formula). 3061 pdb_write.formul(file, comp_num=i+1, het_id=het[1], text=formula) 3062 3063 3064 ############################### 3065 # Secondary structure section # 3066 ############################### 3067 3068 # The HELIX records. 3069 #################### 3070 3071 if hasattr(self, 'helices') and len(self.helices): 3072 # Printout. 3073 print("HELIX") 3074 3075 # Loop over and unpack the helix data. 3076 index = 1 3077 for helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length in self.helices: 3078 # The chain IDs. 3079 if mol_init_index == None: 3080 mol_init_index = 0 3081 if mol_end_index == None: 3082 mol_end_index = 0 3083 3084 pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=CHAIN_ID_LIST[mol_init_index], init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=CHAIN_ID_LIST[mol_end_index], end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) 3085 index += 1 3086 3087 # The SHEET records. 3088 #################### 3089 3090 if hasattr(self, 'sheets') and len(self.sheets): 3091 # Printout. 3092 print("SHEET") 3093 3094 # Loop over and unpack the helix data. 3095 index = 1 3096 for strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode in self.sheets: 3097 # Translate molecule indices to chain IDs. 3098 if mol_init_index == None: 3099 mol_init_index = 0 3100 if mol_end_index == None: 3101 mol_end_index = 0 3102 init_chain_id = CHAIN_ID_LIST[mol_init_index] 3103 end_chain_id = CHAIN_ID_LIST[mol_end_index] 3104 cur_chain_id = None 3105 if mol_cur_index != None: 3106 cur_chain_id = CHAIN_ID_LIST[mol_cur_index] 3107 prev_chain_id = None 3108 if mol_prev_index != None: 3109 prev_chain_id = CHAIN_ID_LIST[mol_prev_index] 3110 3111 # Write out. 3112 pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) 3113 index += 1 3114 3115 3116 ###################### 3117 # Coordinate section # 3118 ###################### 3119 3120 # Initial printout if models are present. 3121 if model_records: 3122 print("\nMODEL records:") 3123 3124 # Loop over the models. 3125 for model in self.model_loop(model_num): 3126 # Initialise record counts. 3127 num_hetatm = 0 3128 num_atom = 0 3129 num_ter = 0 3130 ser_num = 1 3131 3132 # MODEL record, for multiple models. 3133 #################################### 3134 3135 if model_records: 3136 # Printout. 3137 sys.stdout.write('.') 3138 3139 # Write the model record. 3140 pdb_write.model(file, serial=model.num) 3141 3142 3143 # Add the atomic coordinate records (ATOM, HETATM, and TER). 3144 ############################################################ 3145 3146 # Loop over the molecules. 3147 index = 0 3148 for mol in model.mol_loop(): 3149 # Printout. 3150 if not model_records: 3151 print("ATOM, HETATM, TER") 3152 3153 # Loop over the atomic data. 3154 atom_record = False 3155 for i in range(len(mol.atom_name)): 3156 # Write the ATOM record. 3157 if mol.pdb_record[i] in [None, 'ATOM']: 3158 atom_record = True 3159 3160 # Write out. 3161 pdb_write.atom(file, serial=ser_num, name=mol.atom_name[i], res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0, element=mol.element[i]) 3162 num_atom += 1 3163 ser_num += 1 3164 3165 # Info for the TER record. 3166 ter_num = ser_num + 1 3167 ter_name = mol.res_name[i] 3168 ter_chain_id = CHAIN_ID_LIST[index] 3169 ter_res_num = mol.res_num[i] 3170 3171 # Finish the ATOM section with the TER record. 3172 if atom_record: 3173 pdb_write.ter(file, serial=ser_num, res_name=ter_name, chain_id=CHAIN_ID_LIST[index], res_seq=ter_res_num) 3174 num_ter += 1 3175 ser_num += 1 3176 3177 # Loop over the atomic data. 3178 count_shift = False 3179 for i in range(len(mol.atom_name)): 3180 # Write the HETATM record. 3181 if mol.pdb_record[i] == 'HETATM': 3182 # Increment the atom number if a TER record was created. 3183 if atom_record and ser_num == ter_num: 3184 count_shift = True 3185 3186 # Write out. 3187 pdb_write.hetatm(file, serial=ser_num, name=self._translate(mol.atom_name[i]), res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0.0, element=mol.element[i]) 3188 num_hetatm += 1 3189 ser_num += 1 3190 3191 # Increment the molecule index. 3192 index += 1 3193 3194 3195 # ENDMDL record, for multiple structures. 3196 ######################################## 3197 3198 if model_records: 3199 if not model_records: 3200 print("ENDMDL") 3201 pdb_write.endmdl(file) 3202 3203 3204 # Create the CONECT records. 3205 ############################ 3206 3207 # Print out. 3208 if model_records: 3209 sys.stdout.write('\n') 3210 print("CONECT") 3211 3212 # Initialise record counts. 3213 num_conect = 0 3214 3215 # The per molecule incremented atom counts. 3216 atom_counts = [0] 3217 index = 0 3218 for mol in self.structural_data[0].mol_loop(): 3219 if index == 0: 3220 atom_counts.append(len(mol.atom_name)) 3221 else: 3222 atom_counts.append(atom_counts[index] + len(mol.atom_name)) 3223 index += 1 3224 3225 # Loop over the molecules of the first model. 3226 index = 0 3227 for mol in self.structural_data[0].mol_loop(): 3228 # Loop over the atoms. 3229 for i in range(len(mol.atom_name)): 3230 # No bonded atoms, hence no CONECT record is required. 3231 if not len(mol.bonded[i]): 3232 continue 3233 3234 # Initialise some data structures. 3235 flush = 0 3236 bonded_index = 0 3237 bonded = ['', '', '', ''] 3238 bonded_shifted = ['', '', '', ''] 3239 3240 # Loop over the bonded atoms. 3241 for j in range(len(mol.bonded[i])): 3242 # End of the array, hence create the CONECT record in this iteration. 3243 if j == len(mol.bonded[i])-1: 3244 flush = True 3245 3246 # Only four covalently bonded atoms allowed in one CONECT record. 3247 if bonded_index == 3: 3248 flush = True 3249 3250 # Get the bonded atom index. 3251 bonded[bonded_index] = mol.bonded[i][j] 3252 3253 # Increment the bonded_index value. 3254 bonded_index = bonded_index + 1 3255 3256 # Generate the CONECT record and increment the counter. 3257 if flush: 3258 # Convert the atom indices to atom numbers. 3259 for k in range(4): 3260 if bonded[k] != '': 3261 bonded_shifted[k] = bonded[k] + 1 + atom_counts[index] 3262 3263 # Write the CONECT record. 3264 pdb_write.conect(file, serial=i+1+atom_counts[index], bonded1=bonded_shifted[0], bonded2=bonded_shifted[1], bonded3=bonded_shifted[2], bonded4=bonded_shifted[3]) 3265 3266 # Reset the flush flag, the bonded atom count, and the bonded atom names. 3267 flush = False 3268 bonded_index = 0 3269 bonded = ['', '', '', ''] 3270 3271 # Increment the CONECT record count. 3272 num_conect = num_conect + 1 3273 3274 # Increment the molecule index. 3275 index += 1 3276 3277 3278 # MASTER record. 3279 ################ 3280 3281 print("\nMASTER") 3282 pdb_write.master(file, num_het=len(het_data_coll), num_coord=num_atom+num_hetatm, num_ter=num_ter, num_conect=num_conect) 3283 3284 3285 # END. 3286 ###### 3287 3288 print("END") 3289 pdb_write.end(file)

3290 3291

3292 - def validate(self):

3293 """Check the integrity of the structural data. 3294 3295 The number of molecules must be the same in all models. 3296 """ 3297 3298 # Reference number of molecules. 3299 num_mols = len(self.structural_data[0].mol) 3300 3301 # Loop over all other models. 3302 for i in range(1, len(self.structural_data)): 3303 # Model alias. 3304 model_i = self.structural_data[i] 3305 3306 # Size check. 3307 if num_mols != len(model_i.mol): 3308 raise RelaxError("The structural object is not valid - the number of molecules is not the same for all models.") 3309 3310 # Molecule name check. 3311 for j in range(len(model_i.mol)): 3312 if model_i.mol[j].mol_name != self.structural_data[0].mol[j].mol_name: 3313 raise RelaxError("The molecule name '%s' of model %s does not match the corresponding molecule '%s' of model %s." % (model_i.mol[j].mol_name, model_i.num, self.structural_data[0].mol[j].mol_name, self.structural_data[0].num))

Source Code for Module lib.structure.internal.object