lib.structure.internal.object

58 """The internal relax structural data object. 59 60 The structural data object for this class is a container possessing a number of different arrays corresponding to different structural information. These objects are described in the structural container docstring. 61 """ 62

63 - def __init__(self):

64 """Initialise the structural object.""" 65 66 # Initialise the empty model list. 67 self.structural_data = ModelList()

68 69

70 - def _bonded_atom(self, attached_atom, index, mol):

71 """Find the atom named attached_atom directly bonded to the atom located at the index. 72 73 @param attached_atom: The name of the attached atom to return. 74 @type attached_atom: str 75 @param index: The index of the atom which the attached atom is attached to. 76 @type index: int 77 @param mol: The molecule container. 78 @type mol: MolContainer instance 79 @return: A tuple of information about the bonded atom. 80 @rtype: tuple consisting of the atom number (int), atom name (str), element name (str), and atomic position (Numeric array of len 3) 81 """ 82 83 # Init. 84 bonded_found = False 85 86 # No bonded atoms, so determine the connectivities. 87 if not mol.bonded[index]: 88 # Determine the molecule type if needed. 89 if not hasattr(mol, 'type'): 90 self._mol_type(mol) 91 92 # Protein. 93 if mol.type == 'protein': 94 self._protein_connect(mol) 95 96 # Find everything within 2 Angstroms and say they are bonded. 97 else: 98 self._find_bonded_atoms(index, mol, radius=2) 99 100 # Loop over the bonded atoms. 101 matching_list = [] 102 for bonded_index in mol.bonded[index]: 103 if regex.search(mol.atom_name[bonded_index], attached_atom): 104 matching_list.append(bonded_index) 105 num_attached = len(matching_list) 106 107 # Problem. 108 if num_attached > 1: 109 # Get the atom names. 110 matching_names = [] 111 for i in matching_list: 112 matching_names.append(mol.atom_name[i]) 113 114 # Return nothing but a warning. 115 return None, None, None, None, None, 'More than one attached atom found: ' + repr(matching_names) 116 117 # No attached atoms. 118 if num_attached == 0: 119 return None, None, None, None, None, "No attached atom could be found" 120 121 # The bonded atom info. 122 index = matching_list[0] 123 bonded_num = mol.atom_num[index] 124 bonded_name = mol.atom_name[index] 125 element = mol.element[index] 126 pos = [mol.x[index], mol.y[index], mol.z[index]] 127 attached_name = mol.atom_name[index] 128 129 # Return the information. 130 return bonded_num, bonded_name, element, pos, attached_name, None

131 132

133 - def _find_bonded_atoms(self, index, mol, radius=1.2):

134 """Find all atoms within a sphere and say that they are attached to the central atom. 135 136 The found atoms will be added to the 'bonded' data structure. 137 138 139 @param index: The index of the central atom. 140 @type index: int 141 @param mol: The molecule container. 142 @type mol: MolContainer instance 143 """ 144 145 # Central atom info. 146 centre = array([mol.x[index], mol.y[index], mol.z[index]], float64) 147 148 # Atom loop. 149 dist_list = [] 150 connect_list = {} 151 element_list = {} 152 for i in range(len(mol.atom_num)): 153 # Skip proton to proton bonds! 154 if mol.element[index] == 'H' and mol.element[i] == 'H': 155 continue 156 157 # The atom's position. 158 pos = array([mol.x[i], mol.y[i], mol.z[i]], float64) 159 160 # The distance from the centre. 161 dist = linalg.norm(centre-pos) 162 163 # The atom is within the radius. 164 if dist < radius: 165 # Store the distance. 166 dist_list.append(dist) 167 168 # Store the atom index. 169 connect_list[dist] = i 170 171 # Store the element type. 172 element_list[dist] = mol.element[i] 173 174 # The maximum number of allowed covalent bonds. 175 max_conn = 1000 # Ridiculous default! 176 if mol.element[index] == 'H': 177 max_conn = 1 178 elif mol.element[index] == 'O': 179 max_conn = 2 180 elif mol.element[index] == 'N': 181 max_conn = 3 182 elif mol.element[index] == 'C': 183 max_conn = 4 184 185 # Sort. 186 dist_list.sort() 187 188 # Loop over the max number of connections (or the number of connected atoms, if less). 189 for i in range(min(max_conn, len(dist_list))): 190 mol.atom_connect(index, connect_list[dist_list[i]])

191 192

193 - def _get_chemical_name(self, hetID):

194 """Return the chemical name corresponding to the given residue ID. 195 196 The following names are currently returned:: 197 ___________________________________________________________ 198 | | | 199 | hetID | Chemical name | 200 |________|________________________________________________| 201 | | | 202 | TNS | Tensor | 203 | COM | Centre of mass | 204 | AXS | Tensor axes | 205 | SIM | Monte Carlo simulation tensor axes | 206 | PIV | Pivot point | 207 | AVE | Average vector | 208 | RTX | Axis of the rotor geometric object | 209 | RTB | Propeller blades of the rotor geometric object | 210 | RTL | Labels for the rotor geometric object | 211 | CON | Cone object | 212 | CNC | Apex or centre of the cone geometric object | 213 | CNX | Axis of the cone geometric object | 214 | CNE | Edge of the cone geometric object | 215 | AXE | The axis geometric object | 216 | TLE | The title for the object | 217 |________|________________________________________________| 218 219 For any other residues, no description is returned. 220 221 @param hetID: The residue ID. 222 @type hetID: str 223 @return: The chemical name. 224 @rtype: str or None 225 """ 226 227 # Translation table. 228 table = { 229 "TNS": "Tensor", 230 "COM": "Centre of mass", 231 "AXS": "Tensor axes", 232 "SIM": "Monte Carlo simulation tensor axes", 233 "PIV": "Pivot point", 234 "AVE": "Average vector", 235 "RTX": "Axis of the rotor geometric object", 236 "RTB": "Propeller blades of the rotor geometric object", 237 "RTL": "Labels for the rotor geometric object", 238 "CON": "Cone object", 239 "CNC": "Apex or centre of the cone geometric object", 240 "CNX": "Axis of the cone geometric object", 241 "CNE": "Edge of the cone geometric object", 242 "AXE": "The axis geometric object", 243 "TLE": "The title for the object", 244 } 245 246 # Return the description, if one exists. 247 if hetID in table: 248 return table[hetID]

249 250

251 - def _parse_models_gaussian(self, file_path, verbosity=1):

252 """Generator function for looping over the models in the Gaussian log file. 253 254 @param file_path: The full path of the Gaussian log file. 255 @type file_path: str 256 @return: The model number and all the records for that model. 257 @rtype: tuple of int and array of str 258 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 259 @type verbosity: int 260 """ 261 262 # Open the file. 263 file = open_read_file(file_path, verbosity=verbosity) 264 lines = file.readlines() 265 file.close() 266 267 # Check for empty files. 268 if lines == []: 269 raise RelaxError("The Gaussian log file is empty.") 270 271 # Init. 272 found = False 273 str_index = 0 274 total_atom = 0 275 model = 0 276 records = [] 277 278 # Loop over the data. 279 for i in range(len(lines)): 280 # Found a structure. 281 if search("Standard orientation", lines[i]): 282 found = True 283 str_index = 0 284 continue 285 286 # End of the model. 287 if found and str_index > 4 and search("---------", lines[i]): 288 # Yield the info 289 yield records 290 291 # Reset. 292 records = [] 293 found = False 294 295 # Not a structure line. 296 if not found: 297 continue 298 299 # Append the line as a record of the model. 300 records.append(lines[i]) 301 302 # Increment the structure line index. 303 str_index += 1

304 305

306 - def _parse_pdb_connectivity_annotation(self, lines):

307 """Loop over and parse the PDB connectivity annotation records. 308 309 These are the records identified in the U{PDB version 3.30 documentation<http://www.wwpdb.org/documentation/file-format/format33/sect6.html>}. 310 311 312 @param lines: The lines of the PDB file excluding the sections prior to the connectivity annotation section. 313 @type lines: list of str 314 @return: The remaining PDB lines with the connectivity annotation records stripped. 315 @rtype: list of str 316 """ 317 318 # The ordered list of record names in the connectivity annotation section. 319 records = [ 320 'SSBOND', 321 'LINK ', 322 'CISPEP' 323 ] 324 325 # Loop over the lines. 326 for i in range(len(lines)): 327 # No match, therefore assume to be out of the connectivity annotation section. 328 if lines[i][:6] not in records: 329 break 330 331 # Return the remaining lines. 332 return lines[i:]

333 334

335 - def _parse_pdb_coord(self, lines):

336 """Generator function for looping over the models in the PDB file. 337 338 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html}. 339 340 341 @param lines: The lines of the coordinate section. 342 @type lines: list of str 343 @return: The model number and all the records for that model. 344 @rtype: tuple of int and array of str 345 """ 346 347 # Init. 348 model = None 349 records = [] 350 351 # Loop over the data. 352 for i in range(len(lines)): 353 # A new model record. 354 if lines[i][:5] == 'MODEL': 355 try: 356 model = int(lines[i].split()[1]) 357 except: 358 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the PDB file.") 359 360 # Skip all records prior to the first ATOM or HETATM record. 361 if not (lines[i][:4] == 'ATOM' or lines[i][:6] == 'HETATM') and not len(records): 362 continue 363 364 # End of the model. 365 if lines[i][:6] == 'ENDMDL': 366 # Yield the info. 367 yield model, records 368 369 # Reset the records. 370 records = [] 371 372 # Skip the rest of this loop. 373 continue 374 375 # Append the line as a record of the model. 376 records.append(lines[i]) 377 378 # If records is not empty then there are no models, so yield the lot. 379 if len(records): 380 yield model, records

381 382

383 - def _parse_pdb_hetrogen(self, lines):

384 """Loop over and parse the PDB hetrogen records. 385 386 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect4.html}. 387 388 389 @param lines: The lines of the PDB file excluding the sections prior to the hetrogen section. 390 @type lines: list of str 391 @return: The remaining PDB lines with the hetrogen records stripped. 392 @rtype: list of str 393 """ 394 395 # The ordered list of record names in the hetrogen section. 396 records = [ 397 'HET ', 398 'FORMUL', 399 'HETNAM', 400 'HETSYN' 401 ] 402 403 # Loop over the lines. 404 for i in range(len(lines)): 405 # No match, therefore assume to be out of the hetrogen section. 406 if lines[i][:6] not in records: 407 break 408 409 # Return the remaining lines. 410 return lines[i:]

411 412

413 - def _parse_pdb_misc(self, lines):

414 """Loop over and parse the PDB miscellaneous records. 415 416 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect7.html}. 417 418 419 @param lines: The lines of the PDB file excluding the sections prior to the miscellaneous section. 420 @type lines: list of str 421 @return: The remaining PDB lines with the miscellaneous records stripped. 422 @rtype: list of str 423 """ 424 425 # The ordered list of record names in the miscellaneous section. 426 records = [ 427 'SITE ' 428 ] 429 430 # Loop over the lines. 431 for i in range(len(lines)): 432 # No match, therefore assume to be out of the miscellaneous section. 433 if lines[i][:6] not in records: 434 break 435 436 # Return the remaining lines. 437 return lines[i:]

438 439

440 - def _parse_pdb_prim_struct(self, lines):

441 """Loop over and parse the PDB primary structure records. 442 443 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect3.html}. 444 445 446 @param lines: The lines of the PDB file excluding the title section. 447 @type lines: list of str 448 @return: The remaining PDB lines with the primary structure records stripped. 449 @rtype: list of str 450 """ 451 452 # The ordered list of record names in the primary structure section. 453 records = [ 454 'DBREF ', 455 'DBREF1', 456 'DBREF2', 457 'SEQADV', 458 'SEQRES', 459 'MODRES' 460 ] 461 462 # Loop over the lines. 463 for i in range(len(lines)): 464 # No match, therefore assume to be out of the primary structure section. 465 if lines[i][:6] not in records: 466 break 467 468 # Return the remaining lines. 469 return lines[i:]

470 471

472 - def _parse_pdb_ss(self, lines, read_mol=None, helices=None, sheets=None):

473 """Loop over and parse the PDB secondary structure records. 474 475 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect5.html}. 476 477 478 @param lines: The lines of the PDB file excluding the sections prior to the secondary structure section. 479 @type lines: list of str 480 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 481 @type read_mol: None, int, or list of int 482 @keyword helices: The list of helix data to append to. 483 @type helices: list 484 @keyword sheets: The list of sheet data to append to. 485 @type sheets: list 486 @return: The remaining PDB lines with the secondary structure records stripped. 487 @rtype: list of str 488 """ 489 490 # The ordered list of record names in the secondary structure section (the depreciated TURN record is also included to handle old PDB files). 491 records = [ 492 'HELIX ', 493 'SHEET ', 494 'TURN ' 495 ] 496 497 # The number of pre-existing molecules. 498 if not len(self.structural_data): 499 mol_num = 0 500 else: 501 mol_num = len(self.structural_data[0].mol) 502 503 # Loop over the lines. 504 for i in range(len(lines)): 505 # No match, therefore assume to be out of the secondary structure section. 506 if lines[i][:6] not in records: 507 break 508 509 # A helix. 510 if lines[i][:5] == 'HELIX': 511 # Parse the record. 512 record_type, ser_num, helix_id, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, helix_class, comment, length = pdb_read.helix(lines[i]) 513 514 # The molecule indices. 515 mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) 516 mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) 517 518 # Only load the desired molecule. 519 if read_mol != None: 520 if mol_init_index + 1 not in read_mol: 521 continue 522 if mol_end_index + 1 not in read_mol: 523 continue 524 525 # Store the data. 526 helices.append([helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length]) 527 528 # A sheet. 529 if lines[i][:5] == 'SHEET': 530 # Parse the record. 531 record_type, strand, sheet_id, num_strands, init_res_name, init_chain_id, init_seq_num, init_icode, end_res_name, end_chain_id, end_seq_num, end_icode, sense, cur_atom, cur_res_name, cur_chain_id, cur_res_seq, cur_icode, prev_atom, prev_res_name, prev_chain_id, prev_res_seq, prev_icode = pdb_read.sheet(lines[i]) 532 533 # The molecule indices. 534 mol_init_index = self._pdb_chain_id_to_mol_index(init_chain_id) 535 mol_end_index = self._pdb_chain_id_to_mol_index(end_chain_id) 536 537 # Only load the desired molecule. 538 if read_mol != None: 539 if mol_init_index + 1 not in read_mol: 540 continue 541 if mol_end_index + 1 not in read_mol: 542 continue 543 544 # New molecule indices based on currently loaded data. 545 mol_cur_index = None 546 if cur_chain_id: 547 mol_cur_index = self._pdb_chain_id_to_mol_index(cur_chain_id) 548 mol_prev_index = None 549 if prev_chain_id: 550 mol_prev_index = self._pdb_chain_id_to_mol_index(prev_chain_id) 551 552 # Store the data. 553 sheets.append([strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode]) 554 555 # Return the remaining lines. 556 return lines[i:]

557 558

559 - def _parse_pdb_title(self, lines):

560 """Loop over and parse the PDB title records. 561 562 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect2.html}. 563 564 565 @param lines: All lines of the PDB file. 566 @type lines: list of str 567 @return: The remaining PDB lines with the title records stripped. 568 @rtype: list of str 569 """ 570 571 # The ordered list of (sometimes truncated) record names in the title section. 572 records = [ 573 'HEADER', 574 'OBSLTE', 575 'TITLE ', 576 'SPLT ', 577 'CAVEAT', 578 'COMPND', 579 'SOURCE', 580 'KEYWDS', 581 'EXPDTA', 582 'NUMMDL', 583 'MDLTYP', 584 'AUTHOR', 585 'REVDAT', 586 'SPRSDE', 587 'JRNL ', 588 'REMARK' 589 ] 590 591 # Loop over the lines. 592 for i in range(len(lines)): 593 # No match, therefore assume to be out of the title section. 594 if lines[i][:6] not in records: 595 break 596 597 # Return the remaining lines. 598 return lines[i:]

599 600

601 - def _parse_pdb_transform(self, lines):

602 """Loop over and parse the PDB transform records. 603 604 These are the records identified in the PDB version 3.30 documentation at U{http://www.wwpdb.org/documentation/file-format/format33/sect8.html}. 605 606 607 @param lines: The lines of the PDB file excluding the sections prior to the transform section. 608 @type lines: list of str 609 @return: The remaining PDB lines with the transform records stripped. 610 @rtype: list of str 611 """ 612 613 # The ordered list of record names in the transform section. 614 records = [ 615 'CRYST', 616 'MTRIX', 617 'ORIGX', 618 'SCALE', 619 ] 620 621 # Loop over the lines. 622 for i in range(len(lines)): 623 # No match, therefore assume to be out of the transform section. 624 if lines[i][0: 5] not in records: 625 break 626 627 # Return the remaining lines. 628 return lines[i:]

629 630

631 - def _parse_models_xyz(self, file_path, verbosity=1):

632 """Generator function for looping over the models in the XYZ file. 633 634 @param file_path: The full path of the XYZ file. 635 @type file_path: str 636 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 637 @type verbosity: int 638 @return: The model number and all the records for that model. 639 @rtype: tuple of int and array of str 640 """ 641 642 # Open the file. 643 file = open_read_file(file_path, verbosity=verbosity) 644 lines = file.readlines() 645 file.close() 646 647 # Check for empty files. 648 if lines == []: 649 raise RelaxError("The XYZ file is empty.") 650 651 # Init. 652 total_atom = 0 653 model = 0 654 records = [] 655 656 # Loop over the data. 657 for i in range(len(lines)): 658 num=0 659 word = lines[i].split() 660 # Find the total atom number and the first model. 661 if (i==0) and (len(word)==1): 662 try: 663 total_atom = int(word[0]) 664 num = 1 665 except: 666 raise RelaxError("The MODEL record " + repr(lines[i]) + " is corrupt, cannot read the XYZ file.") 667 668 # End of the model. 669 if (len(records) == total_atom): 670 # Yield the info 671 yield records 672 673 # Reset the records. 674 records = [] 675 676 # Skip all records prior to atom coordinates record. 677 if (len(word) != 4): 678 continue 679 680 # Append the line as a record of the model. 681 records.append(lines[i]) 682 683 # If records is not empty then there are no models, so yield the lot. 684 if len(records): 685 yield records

686 687

688 - def _parse_mols_pdb(self, records):

689 """Generator function for looping over the molecules in the PDB records of a model. 690 691 @param records: The list of PDB records for the model, or if no models exist the entire PDB file. 692 @type records: list of str 693 @return: The molecule number and all the records for that molecule. 694 @rtype: tuple of int and list of str 695 """ 696 697 # Check for empty records. 698 if records == []: 699 raise RelaxError("There are no PDB records for this model.") 700 701 # Init. 702 mol_count = 1 703 mol_records = [[]] 704 end = False 705 706 # Loop over the data. 707 for i in range(len(records)): 708 # A PDB termination record. 709 if records[i][:3] == 'END': 710 break 711 712 # A master record, so we are done. 713 if records[i][:6] == 'MASTER': 714 break 715 716 # A model termination record. 717 if records[i][:6] == 'ENDMDL': 718 end = True 719 720 # A molecule termination record with no trailing HETATM or CONECT. 721 elif i < len(records)-1 and records[i][:3] == 'TER' and not records[i+1][:6] == 'HETATM' and not records[i+1][:6] == 'CONECT': 722 end = True 723 724 # A HETATM followed by an ATOM record. 725 elif i < len(records)-1 and records[i][:6] == 'HETATM' and records[i+1][:4] == 'ATOM': 726 end = True 727 728 # End. 729 if end: 730 # Increment the molecule counter. 731 mol_count = mol_count + 1 732 733 # Reset the flag. 734 end = False 735 736 # The molecule number. 737 chain_id = records[i][21] 738 if chain_id == ' ': 739 mol_index = mol_count - 1 740 else: 741 mol_index = self._pdb_chain_id_to_mol_index(chain_id) 742 743 # Add a new records list as required. 744 while True: 745 if len(mol_records) <= mol_index: 746 mol_records.append([]) 747 else: 748 break 749 750 # Append the line as a record of the molecule. 751 mol_records[mol_index].append(records[i]) 752 753 # Loop over the molecules and yield the molecule number and records. 754 for i in range(len(mol_records)): 755 if mol_records[i] != []: 756 yield i+1, mol_records[i]

757 758

759 - def _pdb_chain_id_to_mol_index(self, chain_id=None):

760 """Convert the PDB chain ID into the molecule index in a regular way. 761 762 @keyword chain_id: The PDB chain ID string. 763 @type chain_id: str 764 @return: The corresponding molecule index. 765 @rtype: int 766 """ 767 768 # Initialise. 769 mol_index = 0 770 771 # Convert to the molecule index. 772 if chain_id: 773 mol_index = CHAIN_ID_LIST.index(chain_id) 774 775 # Return the index. 776 return mol_index

777 778

779 - def _residue_data(self, res_nums=None, res_names=None):

780 """Convert the residue info into a dictionary of unique residues with numbers as keys. 781 782 @keyword res_nums: The list of residue numbers. 783 @type res_nums: list of int 784 @keyword res_names: The list of residue names matching the numbers. 785 @type res_names: list of str 786 @return: The dictionary of residue names with residue numbers as keys. 787 @rtype: dict of str 788 """ 789 790 # Initialise. 791 data = {} 792 793 # Loop over the data. 794 for i in range(len(res_nums)): 795 # The residue data already exists. 796 if res_nums[i] in data: 797 continue 798 799 # Add the data. 800 data[res_nums[i]] = res_names[i] 801 802 # Return the dictionary. 803 return data

804 805

806 - def _validate_data_arrays(self, struct):

807 """Check the validity of the data arrays in the given structure object. 808 809 @param struct: The structural object. 810 @type struct: Structure_container instance 811 """ 812 813 # The number of atoms. 814 num = len(struct.atom_name) 815 816 # Check the other lengths. 817 if len(struct.bonded) != num and len(struct.chain_id) != num and len(struct.element) != num and len(struct.pdb_record) != num and len(struct.res_name) != num and len(struct.res_num) != num and len(struct.seg_id) != num and len(struct.x) != num and len(struct.y) != num and len(struct.z) != num: 818 raise RelaxError("The structural data is invalid.")

819 820

821 - def _validate_records(self, lines):

822 """Make sure all PDB records are 80 char in length, padding with whitespace when needed. 823 824 All newline characters are stripped from the records as well. 825 826 827 @param lines: All lines of the PDB file. 828 @type lines: list of str 829 @return: The padded PDB lines. 830 @rtype: list of str 831 """ 832 833 # Loop over the lines. 834 for i in range(len(lines)): 835 # Strip the newline character. 836 lines[i] = lines[i].rstrip('\r\n') 837 838 # Pad if needed. 839 if len(lines[i]) != 80: 840 lines[i] = "%-80s" % lines[i] 841 842 # Return the fixed lines. 843 return lines

844 845

846 - def _mol_type(self, mol):

847 """Determine the type of molecule. 848 849 @param mol: The molecule data container. 850 @type mol: MolContainer instance 851 """ 852 853 # Amino acids. 854 aa = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 855 856 # Set the molecule type to default to 'other'. 857 mol.type = 'other' 858 859 # Loop over the residues. 860 for res in mol.res_name: 861 # Protein. 862 if res in aa: 863 # Set the molecule type and return. 864 mol.type = 'protein' 865 return

866 867

868 - def _protein_connect(self, mol):

869 """Set up the connectivities for the protein. 870 871 @param mol: The molecule data container. 872 @type mol: MolContainer instance 873 """ 874 875 # Initialise some residue data. 876 curr_res_num = None 877 res_atoms = [] 878 879 # Loop over all atoms. 880 for i in range(len(mol.atom_num)): 881 # New residue. 882 if mol.res_num[i] != curr_res_num: 883 # Intra-residue connectivites. 884 if len(res_atoms): 885 self._protein_intra_connect(mol, res_atoms) 886 887 # Update the residue number. 888 curr_res_num = mol.res_num[i] 889 890 # Reset the residue atom index list. 891 res_atoms = [] 892 893 # Add the atom index to the list. 894 res_atoms.append(i) 895 896 # Last atom. 897 if i == len(mol.atom_num) - 1 and len(res_atoms): 898 self._protein_intra_connect(mol, res_atoms)

899 900

901 - def _protein_intra_connect(self, mol, res_atoms):

902 """Set up the connectivities for the protein. 903 904 @param mol: The molecule data container. 905 @type mol: MolContainer instance 906 @param res_atoms: The list of atom indices corresponding to the residue. 907 @type res_atoms: list of int 908 """ 909 910 # Back bond connectivity. 911 indices = { 912 'N': None, 913 'C': None, 914 'O': None, 915 'CA': None, 916 'HN': None, 917 'H': None, # Same as HN. 918 'HA': None 919 } 920 921 # Loop over all atoms to find the indices. 922 for index in res_atoms: 923 if mol.atom_name[index] in indices: 924 indices[mol.atom_name[index]] = index 925 926 # Connect the atom pairs. 927 pairs = [ 928 ['N', 'HN'], 929 ['N', 'H'], 930 ['N', 'CA'], 931 ['CA', 'HA'], 932 ['CA', 'C'], 933 ['C', 'O'] 934 ] 935 936 # Loop over the atoms pairs and connect them. 937 for pair in pairs: 938 if indices[pair[0]] != None and indices[pair[1]] != None: 939 mol.atom_connect(indices[pair[0]], indices[pair[1]])

940 941

942 - def _translate(self, data, format='str'):

943 """Convert the data into a format for writing to file. 944 945 @param data: The data to convert to the required format. 946 @type data: anything 947 @keyword format: The format to convert to. This can be 'str', 'float', or 'int'. 948 @type format: str 949 @return: The converted version of the data. 950 @rtype: str 951 """ 952 953 # Conversion to string. 954 if format == 'str': 955 # None values. 956 if data == None: 957 data = '' 958 959 # Force convert to string. 960 if not isinstance(data, str): 961 data = repr(data) 962 963 # Conversion to float. 964 if format == 'float': 965 # None values. 966 if data == None: 967 data = 0.0 968 969 # Force convert to float. 970 if not isinstance(data, float): 971 data = float(data) 972 973 # Return the converted data. 974 return data

975 976

977 - def _trim_helix(self, helix=None, trim_res_list=[], res_data=None):

978 """Trim the given helix based on the list of deleted residue numbers. 979 980 @keyword helix: The single helix metadata structure. 981 @type helix: list 982 @keyword trim_res_list: The list of residue numbers which no longer exist. 983 @type trim_res_list: list of int 984 @keyword res_data: The dictionary of residue names with residue numbers as keys. 985 @type res_data: dict of str 986 @return: The trimmed helix metadata structure, or None if the whole helix is to be deleted. 987 @rtype: list or None 988 """ 989 990 # Unpack the helix residue numbers. 991 start_res = helix[3] 992 end_res = helix[6] 993 994 # The reverse residue list. 995 trim_res_list_rev = deepcopy(trim_res_list) 996 trim_res_list_rev.reverse() 997 998 # The helix residues. 999 helix_res = list(range(start_res, end_res+1)) 1000 1001 # Trim forwards. 1002 for res_num in trim_res_list: 1003 if res_num == start_res: 1004 # Remove the residue. 1005 helix_res.pop(0) 1006 1007 # No helix left. 1008 if len(helix_res) == 0: 1009 break 1010 1011 # Realias the starting residue. 1012 start_res = helix_res[0] 1013 1014 # No helix left. 1015 if len(helix_res) == 0: 1016 return None 1017 1018 # Trim backwards. 1019 for res_num in trim_res_list_rev: 1020 if res_num == end_res: 1021 helix_res.pop(-1) 1022 end_res = helix_res[-1] 1023 1024 # Replace the starting and ending residues. 1025 if start_res != helix[3]: 1026 helix[3] = start_res 1027 helix[2] = res_data[start_res] 1028 if end_res != helix[6]: 1029 helix[6] = end_res 1030 helix[5] = res_data[end_res] 1031 1032 # The helix length. 1033 helix[-1] = len(helix_res) 1034 1035 # Return the modified helix. 1036 return helix

1037 1038

1039 - def _trim_sheet(self, sheet=None, trim_res_list=[], res_data=None):

1040 """Trim the given sheet based on the list of deleted residue numbers. 1041 1042 @keyword sheet: The single sheet metadata structure. 1043 @type sheet: list 1044 @keyword trim_res_list: The list of residue numbers which no longer exist. 1045 @type trim_res_list: list of int 1046 @keyword res_data: The dictionary of residue names with residue numbers as keys. 1047 @type res_data: dict of str 1048 @return: The trimmed sheet metadata structure, or None if the whole sheet is to be deleted. 1049 @rtype: list or None 1050 """ 1051 1052 # Unpack the sheet residue numbers. 1053 start_res = sheet[5] 1054 end_res = sheet[9] 1055 1056 # The reverse residue list. 1057 trim_res_list_rev = deepcopy(trim_res_list) 1058 trim_res_list_rev.reverse() 1059 1060 # The sheet residues. 1061 sheet_res = list(range(start_res, end_res+1)) 1062 1063 # Trim forwards. 1064 for res_num in trim_res_list: 1065 if res_num == start_res: 1066 # Remove the residue. 1067 sheet_res.pop(0) 1068 1069 # No sheet left. 1070 if len(sheet_res) == 0: 1071 break 1072 1073 # Realias the starting residue. 1074 start_res = sheet_res[0] 1075 1076 # No sheet left. 1077 if len(sheet_res) == 0: 1078 return None 1079 1080 # Trim backwards. 1081 for res_num in trim_res_list_rev: 1082 if res_num == end_res: 1083 sheet_res.pop(-1) 1084 end_res = sheet_res[-1] 1085 1086 # Replace the starting and ending residues. 1087 if start_res != sheet[5]: 1088 sheet[5] = start_res 1089 sheet[3] = res_data[start_res] 1090 if end_res != sheet[9]: 1091 sheet[9] = end_res 1092 sheet[7] = res_data[end_res] 1093 1094 # Return the modified sheet. 1095 return sheet

1096 1097

1098 - def add_atom(self, mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None, sort=False):

1099 """Add a new atom to the structural data object. 1100 1101 @keyword mol_name: The name of the molecule. 1102 @type mol_name: str 1103 @keyword atom_name: The atom name, e.g. 'H1'. 1104 @type atom_name: str or None 1105 @keyword res_name: The residue name. 1106 @type res_name: str or None 1107 @keyword res_num: The residue number. 1108 @type res_num: int or None 1109 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 1110 @type pos: rank-1 or rank-2 array or list of float 1111 @keyword element: The element symbol. 1112 @type element: str or None 1113 @keyword atom_num: The atom number. 1114 @type atom_num: int or None 1115 @keyword chain_id: The chain identifier. 1116 @type chain_id: str or None 1117 @keyword segment_id: The segment identifier. 1118 @type segment_id: str or None 1119 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 1120 @type pdb_record: str or None 1121 @keyword sort: A flag which if True will cause the structural data to be sorted after adding the atom. 1122 @type sort: bool 1123 """ 1124 1125 # Add a model if not present. 1126 if len(self.structural_data) == 0: 1127 self.add_model() 1128 1129 # Check the position. 1130 if is_float(pos[0]): 1131 if len(pos) != 3: 1132 raise RelaxError("The single atomic position %s must be a 3D list." % pos) 1133 else: 1134 if len(pos) != len(self.structural_data): 1135 raise RelaxError("The %s atomic positions does not match the %s models present." % (len(pos), len(self.structural_data))) 1136 1137 # Loop over each model. 1138 for i in range(len(self.structural_data)): 1139 # Alias the model. 1140 model = self.structural_data[i] 1141 1142 # Specific molecule. 1143 mol = self.get_molecule(mol_name, model=model.num) 1144 1145 # Add the molecule, if it does not exist. 1146 if mol == None: 1147 self.add_molecule(name=mol_name) 1148 mol = self.get_molecule(mol_name, model=model.num) 1149 1150 # Split up the position if needed. 1151 if is_float(pos[0]): 1152 model_pos = pos 1153 else: 1154 model_pos = pos[i] 1155 1156 # Add the atom. 1157 mol.atom_add(atom_name=atom_name, res_name=res_name, res_num=res_num, pos=model_pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record) 1158 1159 # Sort. 1160 if sort: 1161 mol._sort()

1162 1163

1164 - def add_coordinates(self, coord=None, mol_names=None, res_names=None, res_nums=None, atom_names=None, elements=None, set_mol_name=None, set_model_num=None):

1165 """Add a set of coordinates to the structural object. 1166 1167 @keyword coord: The array of atomic coordinates (first dimension is the atoms and the second are the coordinates). 1168 @type coord: numpy rank-2 float64 array 1169 @keyword mol_names: The list of molecule names corresponding to the first dimension of the coordinate array. 1170 @type mol_names: list of str 1171 @keyword res_names: The list of residue names corresponding to the first dimension of the coordinate array. 1172 @type res_names: list of str 1173 @keyword res_nums: The list of residue numbers corresponding to the first dimension of the coordinate array. 1174 @type res_nums: list of str 1175 @keyword atom_names: The list of atom names corresponding to the first dimension of the coordinate array. 1176 @type atom_names: list of str 1177 @keyword elements: The list of elements corresponding to the first dimension of the coordinate array. 1178 @type elements: list of str 1179 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then all molecule names must be identical and the new molecule will have the same name. 1180 @type set_mol_name: None or str 1181 @keyword set_model_num: Set the model number for the coordinates. 1182 @type set_model_num: None or int 1183 """ 1184 1185 # The new molecule name. 1186 if not set_mol_name: 1187 set_mol_name = mol_names[0] 1188 for name in mol_names: 1189 if set_mol_name != name: 1190 raise RelaxError("No unique molecule name can be found in the list %s for storing the new molecule structure.") 1191 1192 # No model set, so delete all current data. 1193 if set_mol_name == None and set_model_num == None: 1194 print("Deleting all structural data so that only the new molecule will be present.") 1195 self.delete(verbosity=0) 1196 1197 # Generate a molecule container for the new data. 1198 mol = MolContainer() 1199 1200 # Add the data. 1201 for i in range(len(coord)): 1202 mol.atom_add(atom_name=atom_names[i], res_name=res_names[i], res_num=res_nums[i], pos=coord[i], element=elements[i]) 1203 1204 # Create the structural data data structures. 1205 self.pack_structs([[mol]], orig_model_num=[None], set_model_num=[set_model_num], orig_mol_num=[[None]], set_mol_name=[set_mol_name])

1206 1207

1208 - def add_model(self, model=None, coords_from=None):

1209 """Add a new model to the store. 1210 1211 The new model will be constructed with the structural information from the other models currently present. The coords_from argument allows the atomic positions to be taken from a certain model. If this argument is not set, then the atomic positions from the first model will be used. 1212 1213 @keyword model: The number of the model to create. 1214 @type model: int or None 1215 @keyword coords_from: The model number to take the coordinates from. 1216 @type coords_from: int or None 1217 @return: The model container. 1218 @rtype: ModelContainer instance 1219 """ 1220 1221 # Check if the model currently exists. 1222 if model != None: 1223 for i in range(len(self.structural_data)): 1224 if model == self.structural_data[i].num: 1225 raise RelaxError("The model '%s' already exists." % model) 1226 1227 # Add a new model. 1228 model = self.structural_data.add_item(model_num=model) 1229 1230 # The model to duplicate. 1231 if coords_from == None: 1232 model_from = self.structural_data[0] 1233 else: 1234 for i in range(len(self.structural_data)): 1235 if self.structural_data[i].num == coords_from: 1236 model_from = self.structural_data[i] 1237 break 1238 1239 # Duplicate all data from the MolList object down. 1240 for mol_index in range(len(model_from.mol)): 1241 # Create a new molecule container. 1242 model.mol.add_item(mol_name=model_from.mol[mol_index].mol_name, mol_cont=MolContainer()) 1243 mol = model.mol[mol_index] 1244 mol_from = model_from.mol[mol_index] 1245 1246 # Loop over the atomic data. 1247 for i in range(len(mol_from.atom_num)): 1248 mol.atom_num.append(mol_from.atom_num[i]) 1249 mol.atom_name.append(mol_from.atom_name[i]) 1250 mol.bonded.append(mol_from.bonded[i]) 1251 mol.chain_id.append(mol_from.chain_id[i]) 1252 mol.element.append(mol_from.element[i]) 1253 mol.pdb_record.append(mol_from.pdb_record[i]) 1254 mol.res_name.append(mol_from.res_name[i]) 1255 mol.res_num.append(mol_from.res_num[i]) 1256 mol.seg_id.append(mol_from.seg_id[i]) 1257 mol.x.append(mol_from.x[i]) 1258 mol.y.append(mol_from.y[i]) 1259 mol.z.append(mol_from.z[i]) 1260 1261 # Return the model. 1262 return self.structural_data[-1]

1263 1264

1265 - def add_molecule(self, model_num=None, name=None):

1266 """Add a new molecule to the store. 1267 1268 @keyword model_num: The optional model to add the molecule to. If not supplied, the molecule will be added to all models. 1269 @type model_num: None or int 1270 @keyword name: The molecule identifier string. 1271 @type name: str 1272 """ 1273 1274 # Add a model if necessary. 1275 if len(self.structural_data) == 0: 1276 self.add_model() 1277 1278 # Add the molecule to each model. 1279 for model in self.model_loop(model=model_num): 1280 model.mol.add_item(mol_name=name, mol_cont=MolContainer())

1281 1282

1283 - def are_bonded(self, atom_id1=None, atom_id2=None):

1284 """Determine if two atoms are directly bonded to each other. 1285 1286 @keyword atom_id1: The molecule, residue, and atom identifier string of the first atom. 1287 @type atom_id1: str 1288 @keyword atom_id2: The molecule, residue, and atom identifier string of the second atom. 1289 @type atom_id2: str 1290 @return: True if the atoms are directly bonded. 1291 @rtype: bool 1292 """ 1293 1294 # Generate the selection objects. 1295 sel_obj1 = Selection(atom_id1) 1296 sel_obj2 = Selection(atom_id2) 1297 1298 # Build the connectivities if needed. 1299 for mol in self.structural_data[0].mol: 1300 for i in range(len(mol.atom_num)): 1301 if not len(mol.bonded[i]): 1302 self._find_bonded_atoms(i, mol, radius=2) 1303 1304 # Loop over the molecules. 1305 for mol in self.structural_data[0].mol: 1306 # Skip non-matching molecules. 1307 if not sel_obj1.contains_mol(mol.mol_name): 1308 continue 1309 if not sel_obj2.contains_mol(mol.mol_name): 1310 continue 1311 1312 # Find the first atom. 1313 index1 = None 1314 index2 = None 1315 for i in range(len(mol.atom_num)): 1316 # Matching first atom. 1317 if index1 == None and sel_obj1.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1318 index1 = i 1319 1320 # Matching second atom. 1321 if index2 == None and sel_obj2.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 1322 index2 = i 1323 1324 # Nothing left to do. 1325 if index1 != None and index2 != None: 1326 break 1327 1328 # Connectivities exist. 1329 if index1 < len(mol.bonded): 1330 if index2 in mol.bonded[index1]: 1331 return True 1332 else: 1333 return False

1334 1335

1336 - def are_bonded_index(self, mol_index1=None, atom_index1=None, mol_index2=None, atom_index2=None):

1337 """Determine if two atoms, given as indices, are directly bonded to each other. 1338 1339 @keyword mol_index1: The molecule index of the first atom. 1340 @type mol_index1: int 1341 @keyword atom_index1: The index of the first atom. 1342 @type atom_index1: int 1343 @keyword mol_index2: The molecule index of the second atom. 1344 @type mol_index2: int 1345 @keyword atom_index2: The index of the second atom. 1346 @type atom_index2: int 1347 @return: True if the atoms are directly bonded. 1348 @rtype: bool 1349 """ 1350 1351 # Alias the molecule. 1352 mol1 = self.structural_data[0].mol[mol_index1] 1353 mol2 = self.structural_data[0].mol[mol_index2] 1354 1355 # Build the connectivities if needed. 1356 if not len(mol1.bonded[atom_index1]): 1357 self._find_bonded_atoms(atom_index1, mol1, radius=2) 1358 if not len(mol2.bonded[atom_index2]): 1359 self._find_bonded_atoms(atom_index2, mol2, radius=2) 1360 1361 # Is the second atom in the bonded list of the first? 1362 if atom_index2 in mol1.bonded[atom_index1]: 1363 return True 1364 1365 # No! 1366 return False

1367 1368

1369 - def atom_loop(self, selection=None, str_id=None, model_num=None, mol_name_flag=False, res_num_flag=False, res_name_flag=False, atom_num_flag=False, atom_name_flag=False, element_flag=False, pos_flag=False, mol_index_flag=False, index_flag=False, ave=False):

1370 """Generator function for looping over all atoms in the internal relax structural object. 1371 1372 This method should be designed as a U{generator<http://www.python.org/dev/peps/pep-0255/>}. It should loop over all atoms of the system yielding the following atomic information, if the corresponding flag is True, in tuple form: 1373 1374 1. Model number. 1375 2. Molecule name. 1376 3. Residue number. 1377 4. Residue name. 1378 5. Atom number. 1379 6. Atom name. 1380 7. The element name (its atomic symbol and optionally the isotope, e.g. 'N', 'Mg', '17O', '13C', etc). 1381 8. The position of the atom in Euclidean space. 1382 1383 1384 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 1385 @type selection: lib.structure.internal.Internal_selection instance 1386 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. If None, then all structures will be looped over. 1387 @type str_id: str, int, or None 1388 @keyword model_num: Only loop over a specific model. 1389 @type model_num: int or None 1390 @keyword mol_name_flag: A flag which if True will cause the molecule name to be yielded. 1391 @type mol_name_flag: bool 1392 @keyword res_num_flag: A flag which if True will cause the residue number to be yielded. 1393 @type res_num_flag: bool 1394 @keyword res_name_flag: A flag which if True will cause the residue name to be yielded. 1395 @type res_name_flag: bool 1396 @keyword atom_num_flag: A flag which if True will cause the atom number to be yielded. 1397 @type atom_num_flag: bool 1398 @keyword atom_name_flag: A flag which if True will cause the atom name to be yielded. 1399 @type atom_name_flag: bool 1400 @keyword element_flag: A flag which if True will cause the element name to be yielded. 1401 @type element_flag: bool 1402 @keyword pos_flag: A flag which if True will cause the atomic position to be yielded. 1403 @type pos_flag: bool 1404 @keyword mol_index_flag: A flag which if True will cause the molecule index to be yielded. 1405 @type mol_index_flag: bool 1406 @keyword index_flag: A flag which if True will cause the atomic index to be yielded. 1407 @type index_flag: bool 1408 @keyword ave: A flag which if True will result in this method returning the average atom properties across all loaded structures. 1409 @type ave: bool 1410 @return: A tuple of atomic information, as described in the docstring. 1411 @rtype: tuple consisting of optional molecule name (str), residue number (int), residue name (str), atom number (int), atom name(str), element name (str), and atomic position (array of len 3). 1412 """ 1413 1414 # Check that the structure is loaded. 1415 if not len(self.structural_data): 1416 raise RelaxNoPdbError 1417 1418 # Obtain all data from the first model (except the position data). 1419 model = self.structural_data[0] 1420 1421 # Loop over all molecules and atoms in the selection. 1422 for mol_index, i in selection.loop(): 1423 mol = model.mol[mol_index] 1424 1425 # Initialise. 1426 res_num = mol.res_num[i] 1427 res_name = mol.res_name[i] 1428 atom_num = mol.atom_num[i] 1429 atom_name = mol.atom_name[i] 1430 element = mol.element[i] 1431 1432 # The atom position. 1433 if pos_flag: 1434 # Average the position. 1435 if ave: 1436 # Initialise. 1437 pos = zeros(3, float64) 1438 1439 # Loop over the models. 1440 for model in self.model_loop(model=model_num): 1441 # Alias. 1442 mol2 = model.mol[mol_index] 1443 1444 # Some sanity checks. 1445 if mol2.atom_num[i] != atom_num: 1446 raise RelaxError("The loaded structures do not contain the same atoms. The average structural properties can not be calculated.") 1447 1448 # Sum the atom positions. 1449 pos = pos + array([mol2.x[i], mol2.y[i], mol2.z[i]], float64) 1450 1451 # Average the position array (divide by the number of models). 1452 pos = pos / len(self.structural_data) 1453 1454 # All positions. 1455 else: 1456 # Initialise. 1457 pos = [] 1458 1459 # Loop over the models. 1460 for model in self.model_loop(model=model_num): 1461 # Alias. 1462 mol2 = model.mol[mol_index] 1463 1464 # Append the position. 1465 pos.append([mol2.x[i], mol2.y[i], mol2.z[i]]) 1466 1467 # Convert. 1468 pos = array(pos, float64) 1469 1470 # The molecule name. 1471 mol_name = mol.mol_name 1472 1473 # Build the tuple to be yielded. 1474 atomic_tuple = () 1475 if mol_name_flag: 1476 atomic_tuple = atomic_tuple + (mol_name,) 1477 if res_num_flag: 1478 atomic_tuple = atomic_tuple + (res_num,) 1479 if res_name_flag: 1480 atomic_tuple = atomic_tuple + (res_name,) 1481 if atom_num_flag: 1482 atomic_tuple = atomic_tuple + (atom_num,) 1483 if atom_name_flag: 1484 atomic_tuple = atomic_tuple + (atom_name,) 1485 if element_flag: 1486 atomic_tuple = atomic_tuple + (element,) 1487 if pos_flag: 1488 atomic_tuple = atomic_tuple + (pos,) 1489 if mol_index_flag: 1490 atomic_tuple += (mol_index,) 1491 if index_flag: 1492 atomic_tuple += (i,) 1493 1494 # Yield the information. 1495 if len(atomic_tuple) == 1: 1496 atomic_tuple = atomic_tuple[0] 1497 yield atomic_tuple

1498 1499

1500 - def bond_vectors(self, attached_atom=None, model_num=None, mol_name=None, res_num=None, res_name=None, spin_num=None, spin_name=None, return_name=False, return_warnings=False):

1501 """Find the bond vector between the atoms of 'attached_atom' and 'atom_id'. 1502 1503 @keyword attached_atom: The name of the bonded atom. 1504 @type attached_atom: str 1505 @keyword model_num: The model of which to return the vectors from. If not supplied and multiple models exist, then vectors from all models will be returned. 1506 @type model_num: None or int 1507 @keyword mol_name: The name of the molecule that attached_atom belongs to. 1508 @type mol_name: str 1509 @keyword res_num: The number of the residue that attached_atom belongs to. 1510 @type res_num: str 1511 @keyword res_name: The name of the residue that attached_atom belongs to. 1512 @type res_name: str 1513 @keyword spin_num: The number of the spin that attached_atom is attached to. 1514 @type spin_num: str 1515 @keyword spin_name: The name of the spin that attached_atom is attached to. 1516 @type spin_name: str 1517 @keyword return_name: A flag which if True will cause the name of the attached atom to be returned together with the bond vectors. 1518 @type return_name: bool 1519 @keyword return_warnings: A flag which if True will cause warning messages to be returned. 1520 @type return_warnings: bool 1521 @return: The list of bond vectors for each model. 1522 @rtype: list of numpy arrays (or a tuple if return_name or return_warnings are set) 1523 """ 1524 1525 # Initialise some objects. 1526 vectors = [] 1527 attached_name = None 1528 warnings = None 1529 1530 # Use the first model for the atom matching. 1531 model = self.structural_data[0] 1532 1533 # Loop over the molecules. 1534 for mol_index in range(len(model.mol)): 1535 # Alias. 1536 mol = model.mol[mol_index] 1537 1538 # Skip non-matching molecules. 1539 if mol_name and mol_name != mol.mol_name: 1540 continue 1541 1542 # Find the atomic index of the base atom. 1543 index = None 1544 for i in range(len(mol.atom_name)): 1545 # Residues don't match. 1546 if (res_num != None and mol.res_num[i] != res_num) or (res_name != None and mol.res_name[i] != res_name): 1547 continue 1548 1549 # Atoms don't match. 1550 if (spin_num != None and mol.atom_num[i] != spin_num) or (spin_name != None and mol.atom_name[i] != spin_name): 1551 continue 1552 1553 # Update the index and stop searching. 1554 index = i 1555 break 1556 1557 # Found the atom. 1558 if index != None: 1559 # Loop over the models. 1560 for model in self.model_loop(model=model_num): 1561 # Alias. 1562 mol = model.mol[mol_index] 1563 1564 # Get the atom bonded to this model/molecule/residue/atom. 1565 bonded_num, bonded_name, element, pos, attached_name, warnings = self._bonded_atom(attached_atom, index, mol) 1566 1567 # No bonded atom. 1568 if (bonded_num, bonded_name, element) == (None, None, None): 1569 continue 1570 1571 # The bond vector. 1572 vector = array(pos, float64) - array([mol.x[index], mol.y[index], mol.z[index]], float64) 1573 1574 # Append the vector to the vectors array. 1575 vectors.append(vector) 1576 1577 # Not found. 1578 else: 1579 warnings = "Cannot find the atom in the structure" 1580 1581 # Build the tuple to be yielded. 1582 data = (vectors,) 1583 if return_name: 1584 data = data + (attached_name,) 1585 if return_warnings: 1586 data = data + (warnings,) 1587 1588 # Return the data. 1589 return data

1590 1591

1592 - def collapse_ensemble(self, model_num=None, model_to=1):

1593 """Collapse the ensemble into a single model. 1594 1595 @keyword model_num: The number of the model to keep. All other models will be removed. 1596 @type model_num: int 1597 @keyword model_to: The model number for the sole remaining model. 1598 @type model_to: int 1599 """ 1600 1601 # Store all the model numbers. 1602 models = [] 1603 for model_cont in self.model_loop(): 1604 if model_cont.num != model_num: 1605 models.append(model_cont.num) 1606 1607 # Delete all models. 1608 for model in models: 1609 self.delete(model) 1610 1611 # Renumber the remaining model. 1612 self.set_model(model_orig=model_num, model_new=model_to)

1613 1614

1615 - def connect_atom(self, mol_name=None, index1=None, index2=None):

1616 """Connect two atoms in the structural data object. 1617 1618 @keyword mol_name: The name of the molecule. 1619 @type mol_name: str 1620 @keyword index1: The global index of the first atom. 1621 @type index1: str 1622 @keyword index2: The global index of the first atom. 1623 @type index2: str 1624 """ 1625 1626 # Add the molecule, if it does not exist. 1627 if self.get_molecule(mol_name) == None: 1628 self.add_molecule(name=mol_name) 1629 1630 # Loop over each model. 1631 for model in self.structural_data: 1632 # Specific molecule. 1633 mol = self.get_molecule(mol_name) 1634 1635 # Add the atom. 1636 mol.atom_connect(index1=index1, index2=index2)

1637 1638

1639 - def delete(self, model=None, selection=None, verbosity=1):

1640 """Deletion of structural information. 1641 1642 @keyword model: Individual structural models from a loaded ensemble can be deleted by specifying the model number. 1643 @type model: None or int 1644 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 1645 @type selection: lib.structure.internal.Internal_selection instance 1646 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 1647 @type verbosity: int 1648 """ 1649 1650 # All data. 1651 if model == None and selection == None: 1652 # Printout. 1653 if verbosity: 1654 print("Deleting the following structural data:\n") 1655 print(self.structural_data) 1656 1657 # Delete the structural data. 1658 del self.structural_data 1659 1660 # Initialise the empty model list. 1661 self.structural_data = ModelList() 1662 1663 # Delete a whole model. 1664 elif selection == None: 1665 self.structural_data.delete_model(model_num=model) 1666 1667 # Atom subset deletion. 1668 else: 1669 # Loop over the atoms and find the indices of the atoms to delete. 1670 indices = {} 1671 for mol_index, i in selection.loop(): 1672 if mol_index not in indices: 1673 indices[mol_index] = [] 1674 indices[mol_index].append(i) 1675 for mol_index in indices: 1676 indices[mol_index].reverse() 1677 1678 # Loop over the models. 1679 del_res_nums = [] 1680 for model_cont in self.model_loop(): 1681 # Skip models. 1682 if model != None and model_cont.num == model: 1683 continue 1684 1685 # Loop over the molecules. 1686 for mol_index in indices: 1687 mol = model_cont.mol[mol_index] 1688 1689 # Generate a residue data dictionary for the metadata trimming (prior to atom deletion). 1690 res_data = self._residue_data(res_nums=mol.res_num, res_names=mol.res_name) 1691 1692 # Loop over the reverse indices and pop out the data. 1693 for i in indices[mol_index]: 1694 mol.atom_num.pop(i) 1695 mol.atom_name.pop(i) 1696 mol.bonded.pop(i) 1697 mol.chain_id.pop(i) 1698 mol.element.pop(i) 1699 mol.pdb_record.pop(i) 1700 mol.res_name.pop(i) 1701 res_num = mol.res_num.pop(i) 1702 mol.seg_id.pop(i) 1703 mol.x.pop(i) 1704 mol.y.pop(i) 1705 mol.z.pop(i) 1706 1707 # The residue no longer exists. 1708 if res_num not in mol.res_num and res_num not in del_res_nums: 1709 del_res_nums.append(res_num) 1710 1711 # Second atom of the bonded pair. 1712 for j in range(len(mol.bonded)): 1713 if i in mol.bonded[j]: 1714 mol.bonded[j].pop(mol.bonded[j].index(i)) 1715 1716 # Update the bonded lists, as the indices need to be shifted. 1717 for j in range(i, len(mol.bonded)): 1718 for k in range(len(mol.bonded[j])): 1719 mol.bonded[j][k] -= 1 1720 1721 # Reset the metadata if nothing remains. 1722 if mol.atom_num == []: 1723 if hasattr(mol, 'file_name'): 1724 del mol.file_name 1725 if hasattr(mol, 'file_path'): 1726 del mol.file_path 1727 if hasattr(mol, 'file_mol_num'): 1728 del mol.file_mol_num 1729 if hasattr(mol, 'file_model'): 1730 del mol.file_model 1731 1732 # Nothing more to do. 1733 if not len(del_res_nums): 1734 return 1735 if model != None and len(self.structural_data) > 1: 1736 return 1737 1738 # Fix the deleted residue number order. 1739 del_res_nums.reverse() 1740 1741 # Handle the helix metadata. 1742 if hasattr(self, 'helices'): 1743 del_helix_indices = [] 1744 for i in range(len(self.helices)): 1745 # Trim the helix. 1746 helix = self._trim_helix(helix=self.helices[i], trim_res_list=del_res_nums, res_data=res_data) 1747 1748 # Trimmed helix. 1749 if helix != None: 1750 self.helices[i] = helix 1751 1752 # No helix left. 1753 else: 1754 del_helix_indices.append(i) 1755 1756 # Loop over the reverse helix indices and pop out the data. 1757 del_helix_indices.reverse() 1758 for i in del_helix_indices: 1759 self.helices.pop(i) 1760 1761 # Handle the sheet metadata. 1762 if hasattr(self, 'sheets'): 1763 del_sheet_indices = [] 1764 for i in range(len(self.sheets)): 1765 # Trim the sheet. 1766 sheet = self._trim_sheet(sheet=self.sheets[i], trim_res_list=del_res_nums, res_data=res_data) 1767 1768 # Trimmed sheet. 1769 if sheet != None: 1770 self.sheets[i] = sheet 1771 1772 # No sheet left. 1773 else: 1774 del_sheet_indices.append(i) 1775 1776 # Loop over the reverse sheet indices and pop out the data. 1777 del_sheet_indices.reverse() 1778 for i in del_sheet_indices: 1779 self.sheets.pop(i)

1780 1781

1782 - def delete_ss(self, verbosity=1):

1783 """Deletion of all secondary structure information. 1784 1785 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 1786 @type verbosity: int 1787 """ 1788 1789 # Reset the structures. 1790 self.helices = [] 1791 self.sheets = []

1792 1793

1794 - def empty(self):

1795 """Report if the structural data structure is empty or not. 1796 1797 @return: True if empty, False otherwise. 1798 @rtype: bool 1799 """ 1800 1801 # Check the ModelList structure. 1802 if len(self.structural_data) == 0: 1803 return True 1804 else: 1805 return False

1806 1807

1808 - def from_xml(self, str_node, dir=None, file_version=1):

1809 """Recreate the structural object from the XML structural object node. 1810 1811 @param str_node: The structural object XML node. 1812 @type str_node: xml.dom.minicompat.Element instance 1813 @keyword dir: The name of the directory containing the results file. 1814 @type dir: str 1815 @keyword file_version: The relax XML version of the XML file. 1816 @type file_version: int 1817 """ 1818 1819 # Recreate all base objects (i.e. metadata). 1820 xml_to_object(str_node, self, file_version=file_version, blacklist=['model', 'displacements']) 1821 1822 # Recreate the model / molecule data structure. 1823 model_nodes = str_node.getElementsByTagName('model') 1824 self.structural_data.from_xml(model_nodes, file_version=file_version) 1825 1826 # The displacement structure. 1827 disp_nodes = str_node.getElementsByTagName('displacements') 1828 if len(disp_nodes): 1829 # Initialise the object. 1830 self.displacements = Displacements() 1831 1832 # Recreate the molecule data structures for the current model. 1833 self.displacements.from_xml(disp_nodes[0], file_version=file_version)

1834 1835

1836 - def get_model(self, model):

1837 """Return or create the model. 1838 1839 @param model: The model number. 1840 @type model: int or None 1841 @return: The ModelContainer corresponding to the model number or that newly created. 1842 @rtype: ModelContainer instance 1843 """ 1844 1845 # Check if the target is a single model. 1846 if model == None and self.num_models() > 1: 1847 raise RelaxError("The target model cannot be determined as there are %s models already present." % self.num_modes()) 1848 1849 # No model specified. 1850 if model == None: 1851 # Create the first model, if necessary. 1852 if self.num_models(): 1853 self.structural_data.add_item() 1854 1855 # Alias the first model. 1856 model_cont = self.structural_data[0] 1857 1858 # The model has been specified. 1859 else: 1860 # Get the preexisting model. 1861 found = False 1862 for model_cont in self.structural_data: 1863 if model_cont.num == model: 1864 found = True 1865 break 1866 1867 # Add the model if it doesn't exist. 1868 if not found: 1869 self.structural_data.add_item(model) 1870 model_cont = self.structural_data[-1] 1871 1872 # Return the container. 1873 return model_cont

1874 1875

1876 - def get_molecule(self, molecule, model=None):

1877 """Return the molecule. 1878 1879 Only one model can be specified. 1880 1881 1882 @param molecule: The molecule name. 1883 @type molecule: int or None 1884 @keyword model: The model number. 1885 @type model: int or None 1886 @raises RelaxError: If the model is not specified and there is more than one model loaded. 1887 @return: The MolContainer corresponding to the molecule name and model number. 1888 @rtype: MolContainer instance or None 1889 """ 1890 1891 # Check if the target is a single molecule. 1892 if model == None and self.num_models() > 1: 1893 raise RelaxError("The target molecule cannot be determined as there are %s models already present." % self.num_models()) 1894 1895 # Check the model argument. 1896 if not isinstance(model, int) and not model == None: 1897 raise RelaxNoneIntError 1898 1899 # No models. 1900 if not len(self.structural_data): 1901 return 1902 1903 # Loop over the models. 1904 for model_cont in self.model_loop(model): 1905 # Loop over the molecules. 1906 for mol in model_cont.mol: 1907 # Return the matching molecule. 1908 if mol.mol_name == molecule: 1909 return mol

1910 1911

1912 - def has_molecule(self, model_num=None, name=None):

1913 """Check if the molecule name exists. 1914 1915 @keyword model_num: The optional model to check. If not supplied, the molecule will be searched for across all models. 1916 @type model_num: None or int 1917 @param name: The molecule name. 1918 @type name: str 1919 @return: True if the molecule exists, False otherwise. 1920 @rtype: bool 1921 """ 1922 1923 # No models. 1924 if not len(self.structural_data): 1925 return False 1926 1927 # Loop over the models. 1928 for model_cont in self.model_loop(model=model_num): 1929 # Loop over the molecules. 1930 for mol in model_cont.mol: 1931 # Matching molecule. 1932 if mol.mol_name == name: 1933 return True 1934 1935 # No match. 1936 return False

1937 1938

1939 - def load_gaussian(self, file_path, set_mol_name=None, set_model_num=None, verbosity=False):

1940 """Method for loading structures from a Gaussian log file. 1941 1942 @param file_path: The full path of the Gaussian log file. 1943 @type file_path: str 1944 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1945 @type set_mol_name: None, str, or list of str 1946 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the Gaussian log model numbers will be preserved, if they exist. 1947 @type set_model_num: None, int, or list of int 1948 @keyword verbosity: A flag which if True will cause messages to be printed. 1949 @type verbosity: bool 1950 @return: The status of the loading of the Gaussian log file. 1951 @rtype: bool 1952 """ 1953 1954 # Initial printout. 1955 if verbosity: 1956 print("\nInternal relax Gaussian log parser.") 1957 1958 # Test if the file exists. 1959 if not access(file_path, F_OK): 1960 # Exit indicating failure. 1961 return False 1962 1963 # Separate the file name and path. 1964 path, file_name = os.path.split(file_path) 1965 1966 # The absolute path. 1967 path_abs = abspath(curdir) + sep + path 1968 1969 # Set up the molecule name data structure. 1970 if set_mol_name: 1971 if not isinstance(set_mol_name, list): 1972 set_mol_name = [set_mol_name] 1973 else: 1974 set_mol_name = [file_root(file_name) + '_mol1'] 1975 1976 # Set up the model number data structure. 1977 if set_model_num: 1978 if not isinstance(set_model_num, list): 1979 set_model_num = [set_model_num] 1980 else: 1981 set_model_num = [1] 1982 1983 # Loop over all models in the Gaussian log file, doing nothing so the last model records are stored. 1984 for model_records in self._parse_models_gaussian(file_path, verbosity=verbosity): 1985 pass 1986 1987 # Generate the molecule container. 1988 mol = MolContainer() 1989 1990 # Fill the molecular data object. 1991 mol.fill_object_from_gaussian(model_records) 1992 1993 # Create the structural data data structures. 1994 self.pack_structs([[mol]], orig_model_num=[1], set_model_num=set_model_num, orig_mol_num=[[0]], set_mol_name=set_mol_name, file_name=file_name, file_path=path, file_path_abs=path_abs, verbosity=verbosity) 1995 1996 # Loading worked. 1997 return True

1998 1999

2000 - def load_pdb(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=False, merge=False):

2001 """Method for loading structures from a PDB file. 2002 2003 @param file_path: The full path of the PDB file. 2004 @type file_path: str 2005 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 2006 @type read_mol: None, int, or list of int 2007 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 2008 @type set_mol_name: None, str, or list of str 2009 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 2010 @type read_model: None, int, or list of int 2011 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 2012 @type set_model_num: None, int, or list of int 2013 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 2014 @type alt_loc: str or None 2015 @keyword verbosity: A flag which if True will cause messages to be printed. 2016 @type verbosity: bool 2017 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 2018 @type merge: bool 2019 @return: The status of the loading of the PDB file. 2020 @rtype: bool 2021 """ 2022 2023 # Initial printout. 2024 if verbosity: 2025 print("\nInternal relax PDB parser.") 2026 2027 # Test if the file exists. 2028 if not access(file_path, F_OK): 2029 # Exit indicating failure. 2030 return False 2031 2032 # Separate the file name and path. 2033 path, file = os.path.split(file_path) 2034 2035 # The absolute path. 2036 path_abs = abspath(curdir) + sep + path 2037 2038 # Convert the structure reading args into lists. 2039 if read_mol and not isinstance(read_mol, list): 2040 read_mol = [read_mol] 2041 if set_mol_name and not isinstance(set_mol_name, list): 2042 set_mol_name = [set_mol_name] 2043 if read_mol: 2044 set_mol_name *= len(read_mol) 2045 if read_model and not isinstance(read_model, list): 2046 read_model = [read_model] 2047 if set_model_num and not isinstance(set_model_num, list): 2048 set_model_num = [set_model_num] 2049 if read_model: 2050 set_model_num *= len(read_model) 2051 2052 # Open the PDB file. 2053 pdb_file = open_read_file(file_path, verbosity=verbosity) 2054 pdb_lines = pdb_file.readlines() 2055 pdb_file.close() 2056 2057 # Check for empty files. 2058 if pdb_lines == []: 2059 raise RelaxError("The PDB file is empty.") 2060 2061 # Pre-process the lines, fixing PDB violations. 2062 pdb_lines = self._validate_records(pdb_lines) 2063 2064 # Secondary structure data. 2065 helices = [] 2066 sheets = [] 2067 2068 # Process the different sections. 2069 pdb_lines = self._parse_pdb_title(pdb_lines) 2070 pdb_lines = self._parse_pdb_prim_struct(pdb_lines) 2071 pdb_lines = self._parse_pdb_hetrogen(pdb_lines) 2072 pdb_lines = self._parse_pdb_ss(pdb_lines, read_mol=read_mol, helices=helices, sheets=sheets) 2073 pdb_lines = self._parse_pdb_connectivity_annotation(pdb_lines) 2074 pdb_lines = self._parse_pdb_misc(pdb_lines) 2075 pdb_lines = self._parse_pdb_transform(pdb_lines) 2076 2077 # Loop over all models in the PDB file. 2078 model_index = 0 2079 orig_model_num = [] 2080 mol_conts = [] 2081 orig_mol_num = [] 2082 for model_num, model_records in self._parse_pdb_coord(pdb_lines): 2083 # Only load the desired model. 2084 if read_model and model_num not in read_model: 2085 continue 2086 2087 # Store the original model number. 2088 orig_model_num.append(model_num) 2089 2090 # Loop over the molecules of the model. 2091 mol_conts.append([]) 2092 orig_mol_num.append([]) 2093 mol_index = 0 2094 new_mol_name = [] 2095 reset_serial = True 2096 for mol_num, mol_records in self._parse_mols_pdb(model_records): 2097 # Only load the desired model. 2098 if read_mol and mol_num not in read_mol: 2099 continue 2100 2101 # Set the target molecule name. 2102 if set_mol_name: 2103 # Single name. 2104 if len(set_mol_name) == 1: 2105 new_mol_name.append(set_mol_name[0]) 2106 2107 # Multiple names. 2108 else: 2109 new_mol_name.append(set_mol_name[mol_index]) 2110 2111 # Auto-generated molecule name. 2112 else: 2113 # Number of structures already present for the model. 2114 num_struct = 0 2115 if self.structural_data != None and len(self.structural_data) and (not set_model_num or (model_index <= len(set_model_num) and set_model_num[model_index] == self.structural_data[0].num)): 2116 num_struct = len(self.structural_data[0].mol) 2117 2118 # Set the name to the file name plus the structure number. 2119 new_mol_name.append(file_root(file) + '_mol' + repr(mol_num+num_struct)) 2120 2121 # Generate the molecule container. 2122 mol = MolContainer() 2123 2124 # Fill the molecular data object. 2125 mol.fill_object_from_pdb(mol_records, alt_loc_select=alt_loc, reset_serial=reset_serial) 2126 2127 # Store the molecule container. 2128 mol_conts[model_index].append(mol) 2129 2130 # Store the original molecule number. 2131 orig_mol_num[model_index].append(mol_num) 2132 2133 # Increment the molecule index. 2134 mol_index = mol_index + 1 2135 2136 # No longer reset the serial number to 1. 2137 reset_serial = False 2138 2139 # Increment the model index. 2140 model_index = model_index + 1 2141 2142 # No data, so throw a warning and exit. 2143 if not len(mol_conts): 2144 warn(RelaxWarning("No structural data could be read from the file '%s'." % file_path)) 2145 return False 2146 2147 # Create the structural data data structures. 2148 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, merge=merge, helices=helices, sheets=sheets, verbosity=verbosity) 2149 2150 # Loading worked. 2151 return True

2152 2153

2154 - def load_xyz(self, file_path, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=False):

2155 """Method for loading structures from a XYZ file. 2156 2157 @param file_path: The full path of the XYZ file. 2158 @type file_path: str 2159 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 2160 @type read_mol: None, int, or list of int 2161 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 2162 @type set_mol_name: None, str, or list of str 2163 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models will be loaded. 2164 @type read_model: None, int, or list of int 2165 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the XYZ model numbers will be preserved, if they exist. 2166 @type set_model_num: None, int, or list of int 2167 @keyword verbosity: A flag which if True will cause messages to be printed. 2168 @type verbosity: bool 2169 @return: The status of the loading of the XYZ file. 2170 @rtype: bool 2171 """ 2172 2173 # Initial printout. 2174 if verbosity: 2175 print("\nInternal relax XYZ parser.") 2176 2177 # Test if the file exists. 2178 if not access(file_path, F_OK): 2179 # Exit indicating failure. 2180 return False 2181 2182 # Separate the file name and path. 2183 path, file = os.path.split(file_path) 2184 2185 # The absolute path. 2186 path_abs = abspath(curdir) + sep + path 2187 2188 # Convert the structure reading args into lists. 2189 if read_mol and not isinstance(read_mol, list): 2190 read_mol = [read_mol] 2191 if set_mol_name and not isinstance(set_mol_name, list): 2192 set_mol_name = [set_mol_name] 2193 if read_model and not isinstance(read_model, list): 2194 read_model = [read_model] 2195 if set_model_num and not isinstance(set_model_num, list): 2196 set_model_num = [set_model_num] 2197 2198 # Loop over all models in the XYZ file. 2199 mol_index = 0 2200 model_index = 0 2201 xyz_model_increment = 0 2202 orig_model_num = [] 2203 mol_conts = [] 2204 orig_mol_num = [] 2205 new_mol_name = [] 2206 for model_records in self._parse_models_xyz(file_path, verbosity=verbosity): 2207 # Increment the xyz_model_increment 2208 xyz_model_increment = xyz_model_increment +1 2209 2210 # Only load the desired model. 2211 if read_model and xyz_model_increment not in read_model: 2212 continue 2213 2214 # Store the original model number. 2215 orig_model_num.append(model_index) 2216 2217 # Loop over the molecules of the model. 2218 if read_mol and mol_index not in read_mol: 2219 continue 2220 2221 # Set the target molecule name. 2222 if set_mol_name: 2223 new_mol_name.append(set_mol_name[mol_index]) 2224 else: 2225 if mol_index == 0: 2226 #Set the name to the file name plus the structure number. 2227 new_mol_name.append(file_root(file) + '_mol' + repr(mol_index+1)) 2228 2229 # Store the original mol number. 2230 orig_mol_num.append(mol_index) 2231 2232 # Generate the molecule container. 2233 mol = MolContainer() 2234 2235 # Fill the molecular data object. 2236 mol.fill_object_from_xyz(model_records) 2237 2238 # Store the molecule container. 2239 mol_conts.append([]) 2240 mol_conts[model_index].append(mol) 2241 2242 # Increment the molecule index. 2243 mol_index = mol_index + 1 2244 2245 # Increment the model index. 2246 model_index = model_index + 1 2247 2248 # Create the structural data data structures. 2249 orig_mol_num = [[0]] 2250 self.pack_structs(mol_conts, orig_model_num=orig_model_num, set_model_num=set_model_num, orig_mol_num=orig_mol_num, set_mol_name=new_mol_name, file_name=file, file_path=path, file_path_abs=path_abs, verbosity=verbosity) 2251 2252 # Loading worked. 2253 return True

2254 2255

2256 - def mean(self):

2257 """Calculate the mean structure from all models in the structural data object.""" 2258 2259 # Create a new model for the mean structure. 2260 num = self.num_models() 2261 self.add_model() 2262 mean_model = self.structural_data[-1] 2263 2264 # The selection object. 2265 selection = self.selection() 2266 2267 # Loop over the molecules and atoms. 2268 for mol_index, i in selection.loop(): 2269 # Set the mean structure coordinate to zero. 2270 mean_model.mol[mol_index].x[i] = 0.0 2271 mean_model.mol[mol_index].y[i] = 0.0 2272 mean_model.mol[mol_index].z[i] = 0.0 2273 2274 # Loop over the models and sum the coordinates. 2275 for model_index in range(num): 2276 model_cont = self.structural_data[model_index] 2277 mean_model.mol[mol_index].x[i] += model_cont.mol[mol_index].x[i] 2278 mean_model.mol[mol_index].y[i] += model_cont.mol[mol_index].y[i] 2279 mean_model.mol[mol_index].z[i] += model_cont.mol[mol_index].z[i] 2280 2281 # Averages. 2282 mean_model.mol[mol_index].x[i] /= num 2283 mean_model.mol[mol_index].y[i] /= num 2284 mean_model.mol[mol_index].z[i] /= num 2285 2286 # Delete all models but the mean. 2287 for model_index in reversed(list(range(num))): 2288 self.delete(model=self.structural_data[model_index].num)

2289 2290

2291 - def model_list(self):

2292 """Create a list of all models. 2293 2294 @return: The list of all models. 2295 @rtype: list of int 2296 """ 2297 2298 # Assemble the list. 2299 models = [] 2300 for model in self.model_loop(): 2301 models.append(model.num) 2302 2303 # Return the list. 2304 return models

2305 2306

2307 - def model_loop(self, model=None):

2308 """Generator method for looping over the models in numerical order. 2309 2310 @keyword model: Limit the loop to a single number. 2311 @type model: int 2312 @return: The model structural object. 2313 @rtype: ModelContainer container 2314 """ 2315 2316 # A single model. 2317 if model: 2318 for i in range(len(self.structural_data)): 2319 if self.structural_data[i].num == model: 2320 yield self.structural_data[i] 2321 2322 # All models. 2323 else: 2324 # The models. 2325 model_nums = [] 2326 for i in range(len(self.structural_data)): 2327 if self.structural_data[i].num != None: 2328 model_nums.append(self.structural_data[i].num) 2329 2330 # Sort. 2331 if model_nums: 2332 model_nums.sort() 2333 2334 # Loop over the models in order. 2335 for model_num in model_nums: 2336 # Find the model. 2337 for i in range(len(self.structural_data)): 2338 # Yield the model. 2339 if self.structural_data[i].num == model_num: 2340 yield self.structural_data[i] 2341 2342 # No models, so just yield the single container. 2343 if not model_nums: 2344 yield self.structural_data[0]

2345 2346

2347 - def num_models(self):

2348 """Method for returning the number of models. 2349 2350 @return: The number of models in the structural object. 2351 @rtype: int 2352 """ 2353 2354 return len(self.structural_data)

2355 2356

2357 - def num_molecules(self):

2358 """Method for returning the number of molecules. 2359 2360 @return: The number of molecules in the structural object. 2361 @rtype: int 2362 """ 2363 2364 # No data. 2365 if self.empty(): 2366 return 0 2367 2368 # Validate the structural object. 2369 self.validate() 2370 2371 # Return the number. 2372 return len(self.structural_data[0].mol)

2373 2374

2375 - def one_letter_codes(self, mol_name=None, selection=None):

2376 """Generate and return the one letter code sequence for the given molecule. 2377 2378 @keyword mol_name: The name of the molecule to return the one letter codes for. 2379 @type mol_name: str 2380 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2381 @type selection: lib.structure.internal.Internal_selection instance 2382 @return: The one letter code sequence for the given molecule. 2383 @rtype: str 2384 """ 2385 2386 # Initialise. 2387 codes = '' 2388 2389 # Use the first model. 2390 model = self.structural_data[0] 2391 2392 # Residue numbers. 2393 res_nums = [] 2394 2395 # Loop over the molecules. 2396 for mol_index, i in selection.loop(): 2397 # Alias. 2398 mol = model.mol[mol_index] 2399 2400 # Skip non-matching molecules. 2401 if mol_name and mol_name != mol.mol_name: 2402 continue 2403 2404 # Not a new residue. 2405 if mol.res_num[i] in res_nums: 2406 continue 2407 2408 # Convert to the one letter code and store the residue number. 2409 codes += aa_codes_three_to_one(mol.res_name[i]) 2410 res_nums.append(mol.res_num[i]) 2411 2412 # Return the codes. 2413 return codes

2414 2415

2416 - def pack_structs(self, data_matrix, orig_model_num=None, set_model_num=None, orig_mol_num=None, set_mol_name=None, file_name=None, file_path=None, file_path_abs=None, helices=None, sheets=None, verbosity=1, merge=False):

2417 """From the given structural data, expand the structural data data structure. 2418 2419 @param data_matrix: A matrix of structural objects. The first dimension is the models, the second is the molecules. 2420 @type data_matrix: list of lists of structural objects 2421 @keyword orig_model_num: The original model numbers (for storage). 2422 @type orig_model_num: list of int 2423 @keyword set_model_num: The new model numbers (for model renumbering). 2424 @type set_model_num: list of int 2425 @keyword orig_mol_num: The original molecule numbers (for storage). The dimensions match the data matrix. 2426 @type orig_mol_num: list of list of int 2427 @keyword set_mol_name: The new molecule names. 2428 @type set_mol_name: list of str 2429 @keyword file_name: The name of the file from which the molecular data has been extracted. 2430 @type file_name: None or str 2431 @keyword file_path: The full path to the file specified by 'file_name'. 2432 @type file_path: None or str 2433 @keyword file_path_abs: The absolute path to the file specified by 'file_name'. This is a fallback mechanism in case results or save files are located somewhere other than the working directory. 2434 @type file_path_abs: None or str 2435 @keyword helices: The helix secondary structure information to store. The first dimension corresponds to each helix element. The second dimension consists of the helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, and length. 2436 @type helices: list of lists 2437 @keyword sheets: The sheet secondary structure information to store. 2438 @type sheets: list of lists 2439 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 2440 @type verbosity: int 2441 @keyword merge: A flag which if set to True will try to merge the structure into the currently loaded structures. 2442 @type merge: bool 2443 """ 2444 2445 # Test the number of models. 2446 if len(orig_model_num) != len(data_matrix): 2447 raise RelaxError("Structural data mismatch, %s original models verses %s in the structural data." % (len(orig_model_num), len(data_matrix))) 2448 2449 # Test the number of molecules. 2450 for i in range(len(data_matrix)): 2451 if len(orig_mol_num[i]) != len(data_matrix[i]): 2452 raise RelaxError("Structural data mismatch, %s original molecules verses %s in the structural data." % (len(orig_mol_num[i]), len(data_matrix[i]))) 2453 2454 # Model numbers do not change. 2455 if not set_model_num: 2456 set_model_num = orig_model_num 2457 2458 # Test the model mapping. 2459 if len(set_model_num) != len(data_matrix): 2460 raise RelaxError("Failure of the mapping of new model numbers, %s new model numbers verses %s models in the structural data." % (len(set_model_num), len(data_matrix))) 2461 2462 # Test the molecule mapping. 2463 if len(set_mol_name) != len(data_matrix[0]): 2464 raise RelaxError("Failure of the mapping of new molecule names, %s new molecule names verses %s molecules in the structural data." % (len(set_mol_name), len(data_matrix[0]))) 2465 2466 # Test that the target models and structures are absent, and get the already existing model numbers. 2467 for i in range(len(set_model_num)): 2468 # Merging flag is set, so skip the checks. 2469 if merge: 2470 continue 2471 2472 # A new model, so no need to check. 2473 if not set_model_num[i] in self.structural_data.current_models: 2474 continue 2475 2476 # Loop over the structures. 2477 index = self.structural_data.current_models.index(set_model_num[i]) 2478 for j in range(len(self.structural_data[index].mol)): 2479 if self.structural_data[index].mol[j].mol_name in set_mol_name: 2480 raise RelaxError("The molecule '%s' of model %s already exists." % (self.structural_data[i].mol[j].mol_name, self.structural_data[i].num)) 2481 2482 # Loop over the models. 2483 store_mol_num_orig = [] 2484 store_mol_num_new = [] 2485 for i in range(len(set_model_num)): 2486 store_mol_num_orig.append([]) 2487 store_mol_num_new.append([]) 2488 2489 # The model doesn't currently exist. 2490 if set_model_num[i] not in self.structural_data.current_models: 2491 # Create the model. 2492 self.structural_data.add_item(set_model_num[i]) 2493 2494 # Get the model. 2495 model = self.structural_data[-1] 2496 2497 # Otherwise get the pre-existing model. 2498 else: 2499 model = self.structural_data[self.structural_data.current_models.index(set_model_num[i])] 2500 2501 # Loop over the molecules. 2502 for j in range(len(set_mol_name)): 2503 # Override the merge argument if the molecule does not exist. 2504 found = False 2505 merge_new = True 2506 for k in range(len(model.mol)): 2507 if model.mol[k].mol_name == set_mol_name[j]: 2508 found = True 2509 index = k 2510 if not found: 2511 merge_new = False 2512 2513 # Printout. 2514 if verbosity: 2515 # Model text formatting. 2516 orig_model_text = '' 2517 if orig_model_num[i] != None: 2518 orig_model_text = " of model %s" % orig_model_num[i] 2519 new_model_text = '' 2520 if set_model_num[i] != None: 2521 if merge_new: 2522 new_model_text += ' of' 2523 else: 2524 new_model_text += ' to' 2525 new_model_text += ' model %s' % set_model_num[i] 2526 2527 # The full text. 2528 if merge_new: 2529 print("Merging with molecule '%s'%s (from the original molecule number %s%s)." % (set_mol_name[j], new_model_text, orig_mol_num[i][j], orig_model_text)) 2530 else: 2531 print("Adding molecule '%s'%s (from the original molecule number %s%s)." % (set_mol_name[j], new_model_text, orig_mol_num[i][j], orig_model_text)) 2532 2533 # The index of the new molecule to add or merge. 2534 if not merge_new: 2535 index = len(model.mol) 2536 2537 # Store the index+1 as the new molecule number, and store the original molecule number. 2538 store_mol_num_new[i].append(index+1) 2539 store_mol_num_orig[i].append(orig_mol_num[i][j]) 2540 2541 # Consistency check. 2542 if model.num != self.structural_data[0].num and self.structural_data[0].mol[index].mol_name != set_mol_name[j]: 2543 raise RelaxError("The new molecule name of '%s' in model %s does not match the corresponding molecule's name of '%s' in model %s." % (set_mol_name[j], set_model_num[i], self.structural_data[0].mol[index].mol_name, self.structural_data[0].num)) 2544 2545 # Pack the structures. 2546 if merge_new: 2547 mol = model.mol.merge_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2548 else: 2549 mol = model.mol.add_item(mol_name=set_mol_name[j], mol_cont=data_matrix[i][j]) 2550 2551 # Set the molecule name and store the structure file info. 2552 mol.mol_name = set_mol_name[j] 2553 mol.file_name = file_name 2554 mol.file_path = file_path 2555 mol.file_path_abs = file_path_abs 2556 mol.file_mol_num = orig_mol_num[i][j] 2557 mol.file_model = orig_model_num[i] 2558 2559 # Sort the structural data if a merge occurred. 2560 mol._sort() 2561 2562 # Consistency checks. 2563 for model_index in range(len(store_mol_num_new)): 2564 if len(store_mol_num_new[model_index]) != len(store_mol_num_orig[model_index]): 2565 raise RelaxFault 2566 2567 # The helix secondary structure information. 2568 if helices != None: 2569 # Initialise the data structure if required. 2570 if not hasattr(self, 'helices'): 2571 self.helices = [] 2572 2573 # Loop over each helix. 2574 for i in range(len(helices)): 2575 # Append the data. 2576 self.helices.append(helices[i]) 2577 2578 # The initial molecule index remapping. 2579 mol_init_index = store_mol_num_new[0][store_mol_num_orig[0].index(helices[i][1]+1)] - 1 2580 self.helices[-1][1] = mol_init_index 2581 2582 # The end molecule index remapping. 2583 mol_end_index = store_mol_num_new[0][store_mol_num_orig[0].index(helices[i][4]+1)] - 1 2584 self.helices[-1][4] = mol_end_index 2585 2586 # The sheet secondary structure information. 2587 if sheets != None: 2588 # Initialise the data structure if required. 2589 if not hasattr(self, 'sheets'): 2590 self.sheets = [] 2591 2592 # Loop over each sheet. 2593 for i in range(len(sheets)): 2594 # Append the data. 2595 self.sheets.append(sheets[i]) 2596 2597 # The initial molecule index remappings. 2598 mol_init_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][4]+1)] - 1 2599 self.sheets[-1][4] = mol_init_index 2600 2601 2602 # The end molecule index remapping. 2603 mol_end_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][8]+1)] - 1 2604 self.sheets[-1][8] = mol_end_index 2605 2606 # The current molecule index remapping. 2607 if sheets[i][14] != None: 2608 mol_cur_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][14]+1)] - 1 2609 self.sheets[-1][14] = mol_cur_index 2610 2611 # The previous molecule index remapping. 2612 if sheets[i][19] != None: 2613 mol_prev_index = store_mol_num_new[0][store_mol_num_orig[0].index(sheets[i][19]+1)] - 1 2614 self.sheets[-1][19] = mol_prev_index

2615 2616

2617 - def rotate(self, R=None, origin=None, model=None, selection=None):

2618 """Rotate the structural information about the given origin. 2619 2620 @keyword R: The forwards rotation matrix. 2621 @type R: numpy 3D, rank-2 array 2622 @keyword origin: The origin of the rotation. 2623 @type origin: numpy 3D, rank-1 array 2624 @keyword model: The model to rotate. If None, all models will be rotated. 2625 @type model: int 2626 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2627 @type selection: lib.structure.internal.Internal_selection instance 2628 """ 2629 2630 # Loop over the models. 2631 for model_cont in self.model_loop(model): 2632 # Loop over all molecules and atoms in the selection. 2633 for mol_index, i in selection.loop(): 2634 mol = model_cont.mol[mol_index] 2635 2636 # The origin to atom vector. 2637 vect = array([mol.x[i], mol.y[i], mol.z[i]], float64) - origin 2638 2639 # Rotation. 2640 rot_vect = dot(R, vect) 2641 2642 # The new position. 2643 pos = rot_vect + origin 2644 mol.x[i] = pos[0] 2645 mol.y[i] = pos[1] 2646 mol.z[i] = pos[2]

2647 2648

2649 - def selection(self, atom_id=None, inv=False):

2650 """Convert the atom ID string into a special internal selection object for speed. 2651 2652 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 2653 @type atom_id: str or None 2654 @keyword inv: A flag which if True will cause the selection to be inverted. 2655 @type inv: bool 2656 @return: The internal structural selection object. 2657 @rtype: Internal_selection instance 2658 """ 2659 2660 # Initialise the internal structural selection object. 2661 selection = Internal_selection() 2662 2663 # Split up the atom ID, to see if a molecule has been specified. 2664 mol_token, res_token, spin_token = tokenise(atom_id) 2665 2666 # Generate the atom ID selection object. 2667 sel_obj = None 2668 if atom_id: 2669 sel_obj = Selection(atom_id) 2670 2671 # Obtain all data from the first model (except the position data). 2672 model = self.structural_data[0] 2673 2674 # Loop over the molecules. 2675 for mol_index in range(len(model.mol)): 2676 mol = model.mol[mol_index] 2677 2678 # Skip non-matching molecules. 2679 if not inv: 2680 if sel_obj and not sel_obj.contains_mol(mol.mol_name): 2681 continue 2682 2683 # Skip matching molecules. 2684 else: 2685 if (not sel_obj) or (mol_token != None and sel_obj.contains_mol(mol.mol_name)): 2686 continue 2687 2688 # Add the molecule index. 2689 selection.add_mol(mol_index=mol_index) 2690 2691 # Loop over the atoms. 2692 for i in range(len(mol.atom_num)): 2693 # Skip non-matching atoms. 2694 if not inv: 2695 if sel_obj and not sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2696 continue 2697 2698 # Skip matching atoms. 2699 else: 2700 if (not sel_obj) or sel_obj.contains_spin(mol.atom_num[i], mol.atom_name[i], mol.res_num[i], mol.res_name[i], mol.mol_name): 2701 continue 2702 2703 # Add the atom index. 2704 selection.add_atom(mol_index=mol_index, atom_index=i) 2705 2706 # Return the object. 2707 return selection

2708 2709

2710 - def set_model(self, model_orig=None, model_new=None):

2711 """Set or reset the model number. 2712 @keyword model_orig: The original model number. Leave as None if no models are currently present. 2713 @type model_orig: None or int 2714 @keyword model_new: The new model number to set the model to. 2715 @type model_new: int 2716 """ 2717 2718 # Check. 2719 if model_orig == None and self.num_models() != 1: 2720 raise RelaxError("If the original model number is not supplied, only one model in the current structural object is allowed, but %s were found." % self.num_models()) 2721 2722 # Set the single model number. 2723 if model_orig == None: 2724 self.structural_data[0].num = model_new 2725 return 2726 2727 # Find the model and set the number. 2728 set = False 2729 for i in range(len(self.structural_data)): 2730 if model_orig == self.structural_data[i].num: 2731 self.structural_data[i].num = model_new 2732 set = True 2733 2734 # Sanity check. 2735 if not set: 2736 raise RelaxError("The original model number %s could not be found in the structural object." % model_orig)

2737 2738

2739 - def target_mol_name(self, set=None, target=None, index=None, mol_num=None, file=None):

2740 """Add the new molecule name to the target data structure. 2741 2742 @keyword set: The list of new molecule names. If not supplied, the names will be generated from the file name. 2743 @type set: None or list of str 2744 @keyword target: The target molecule name data structure to which the new name will be appended. 2745 @type target: list 2746 @keyword index: The molecule index, matching the set argument. 2747 @type index: int 2748 @keyword mol_num: The molecule number. 2749 @type mol_num: int 2750 @keyword file: The name of the file, excluding all directories. 2751 @type file: str 2752 """ 2753 2754 # Set the target molecule name. 2755 if set: 2756 target.append(set[index]) 2757 else: 2758 # Set the name to the file name plus the structure number. 2759 target.append(file_root(file) + '_mol' + repr(mol_num))

2760 2761

2762 - def translate(self, T=None, model=None, selection=None):

2763 """Displace the structural information by the given translation vector. 2764 2765 @keyword T: The translation vector. 2766 @type T: numpy 3D, rank-1 array 2767 @keyword model: The model to rotate. If None, all models will be rotated. 2768 @type model: int 2769 @keyword selection: The internal structural selection object. This is obtained by calling the selection() method with the atom ID string. 2770 @type selection: lib.structure.internal.Internal_selection instance 2771 """ 2772 2773 # Loop over the models. 2774 for model_cont in self.model_loop(model): 2775 # Loop over all molecules and atoms in the selection. 2776 for mol_index, i in selection.loop(): 2777 mol = model_cont.mol[mol_index] 2778 2779 # Translate. 2780 mol.x[i] = mol.x[i] + T[0] 2781 mol.y[i] = mol.y[i] + T[1] 2782 mol.z[i] = mol.z[i] + T[2]

2783 2784

2785 - def to_xml(self, doc, element):

2786 """Prototype method for converting the structural object to an XML representation. 2787 2788 @param doc: The XML document object. 2789 @type doc: xml.dom.minidom.Document instance 2790 @param element: The element to add the alignment tensors XML element to. 2791 @type element: XML element object 2792 """ 2793 2794 # Create the structural element and add it to the higher level element. 2795 str_element = doc.createElement('structure') 2796 element.appendChild(str_element) 2797 2798 # Set the structural attributes. 2799 str_element.setAttribute('desc', 'Structural information') 2800 2801 # No contents to store, so pack up the structural containers. 2802 if not self.structural_data.is_empty(): 2803 self.structural_data.to_xml(doc, str_element) 2804 2805 # The structural metadata. 2806 metadata = ['helices', 'sheets'] 2807 for name in metadata: 2808 # The metadata does not exist. 2809 if not hasattr(self, name): 2810 continue 2811 2812 # Get the object. 2813 obj = getattr(self, name) 2814 2815 # Create a new element for this object, and add it to the main element. 2816 sub_elem = doc.createElement(name) 2817 str_element.appendChild(sub_elem) 2818 2819 # Add the value to the sub element. 2820 object_to_xml(doc, sub_elem, value=obj) 2821 2822 # The displacement structure. 2823 if hasattr(self, 'displacements'): 2824 # Create an XML element. 2825 disp_element = doc.createElement('displacements') 2826 str_element.appendChild(disp_element) 2827 2828 # Set the attributes. 2829 disp_element.setAttribute('desc', 'The rotational and translational displacements between models') 2830 2831 # Add the displacement data. 2832 self.displacements.to_xml(doc, disp_element) 2833 2834 # Blacklisted objects. 2835 blacklist = ['structural_data', 'displacements'] + metadata + list(Internal.__dict__.keys()) + list(self.__class__.__dict__.keys()) + list(object.__dict__.keys()) 2836 2837 # Add all simple python objects within the container to the XML element. 2838 fill_object_contents(doc, str_element, object=self, blacklist=blacklist)

2839 2840

2841 - def validate_models(self, verbosity=1):

2842 """Check that the models are consistent with each other. 2843 2844 This checks that the primary structure is identical between the models. 2845 2846 2847 @keyword verbosity: If 0, then all printouts will be silenced. 2848 @type verbosity: int 2849 """ 2850 2851 # Print out. 2852 if verbosity: 2853 print("Validating models:") 2854 2855 # Loop over the models. 2856 for i in range(len(self.structural_data)): 2857 # Check the molecules. 2858 if len(self.structural_data[0].mol) != len(self.structural_data[i].mol): 2859 raise RelaxError("The number of molecules, %i, in model %i does not match the %i molecules of the first model." % (len(self.structural_data[i].mol), self.structural_data[i].num, len(self.structural_data[0].mol))) 2860 2861 # Loop over the molecules. 2862 for j in range(len(self.structural_data[i].mol)): 2863 # Alias the molecules. 2864 mol = self.structural_data[i].mol[j] 2865 mol_ref = self.structural_data[0].mol[j] 2866 2867 # Check the names. 2868 if mol.mol_name != mol_ref.mol_name: 2869 raise RelaxError("The molecule name '%s' of model %i does not match the name '%s' of the first model." % (mol.mol_name, self.structural_data[i].num, mol_ref.mol_name)) 2870 2871 # Loop over the atoms. 2872 for k in range(len(mol.atom_name)): 2873 # Check all data. 2874 same = True 2875 if mol.chain_id[k] != mol_ref.chain_id[k]: 2876 same = False 2877 elif mol.atom_num[k] != mol_ref.atom_num[k]: 2878 same = False 2879 elif mol.atom_name[k] != mol_ref.atom_name[k]: 2880 same = False 2881 elif mol.res_name[k] != mol_ref.res_name[k]: 2882 same = False 2883 elif mol.res_num[k] != mol_ref.res_num[k]: 2884 same = False 2885 elif mol.seg_id[k] != mol_ref.seg_id[k]: 2886 same = False 2887 elif mol.element[k] != mol_ref.element[k]: 2888 same = False 2889 2890 # Failure. 2891 if not same: 2892 print("%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol.atom_num[k], self._translate(mol.atom_name[k]), '', self._translate(mol.res_name[k]), self._translate(mol.chain_id[k]), self._translate(mol.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol.seg_id[k]), self._translate(mol.element[k]), '')) 2893 print("%-6s%5s %4s%1s%3s %1s%4s%1s %8s%8s%8s%6.2f%6.2f %4s%2s%2s" % ('ATOM', mol_ref.atom_num[k], self._translate(mol_ref.atom_name[k]), '', self._translate(mol_ref.res_name[k]), self._translate(mol_ref.chain_id[k]), self._translate(mol_ref.res_num[k]), '', '#', '#', '#', 1.0, 0, self._translate(mol_ref.seg_id[k]), self._translate(mol_ref.element[k]), '')) 2894 raise RelaxError("The atoms of model %s do not match the first model." % self.structural_data[i].num) 2895 2896 # Final printout. 2897 if verbosity: 2898 print("\tAll models are consistent")

2899 2900

2901 - def write_pdb(self, file, model_num=None):

2902 """Method for the creation of a PDB file from the structural data. 2903 2904 A number of PDB records including HET, HETNAM, FORMUL, HELIX, SHEET, HETATM, TER, CONECT, MASTER, and END are created. To create the non-standard residue records HET, HETNAM, and FORMUL, the data structure 'het_data' is created. It is an array of arrays where the first dimension corresponds to a different residue and the second dimension has the elements: 2905 2906 0. Residue number. 2907 1. Residue name. 2908 2. Chain ID. 2909 3. Total number of atoms in the residue. 2910 4. Number of H atoms in the residue. 2911 5. Number of C atoms in the residue. 2912 2913 2914 @param file: The PDB file object. This object must be writable. 2915 @type file: file object 2916 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 2917 @type model_num: None or int 2918 """ 2919 2920 # Validate the structural data. 2921 self.validate() 2922 2923 # Print out. 2924 print("\nCreating the PDB records\n") 2925 2926 # Write some initial remarks. 2927 print("REMARK") 2928 pdb_write.remark(file, num=4, remark="This file complies with format v. 3.30, Jul-2011.") 2929 pdb_write.remark(file, num=40, remark=None) 2930 pdb_write.remark(file, num=40, remark="Created using relax (http://www.nmr-relax.com).") 2931 pdb_write.remark(file, num=40, remark=None) 2932 if RELAX_VERSION: 2933 pdb_write.remark(file, num=40, remark="relax version %s." % RELAX_VERSION) 2934 pdb_write.remark(file, num=40, remark="Created on %s." % asctime()) 2935 num_remark = 2 2936 2937 # Determine if model records will be created. 2938 model_records = False 2939 for model in self.model_loop(): 2940 if hasattr(model, 'num') and model.num != None: 2941 model_records = True 2942 2943 2944 #################### 2945 # Hetrogen section # 2946 #################### 2947 2948 # Initialise the hetrogen info array. 2949 het_data = [] 2950 het_data_coll = [] 2951 2952 # Loop over the molecules of the first model. 2953 index = 0 2954 for mol in self.structural_data[0].mol_loop(): 2955 # Check the validity of the data. 2956 self._validate_data_arrays(mol) 2957 2958 # Append an empty array for this molecule. 2959 het_data.append([]) 2960 2961 # Collect the non-standard residue info. 2962 for i in range(len(mol.atom_name)): 2963 # Skip non-HETATM records and HETATM records with no residue info. 2964 if mol.pdb_record[i] != 'HETATM' or mol.res_name[i] == None: 2965 continue 2966 2967 # Skip waters. 2968 if mol.res_name[i] == 'HOH': 2969 continue 2970 2971 # If the residue is not already stored initialise a new het_data element. 2972 # (residue number, residue name, chain ID, number of atoms, atom count array). 2973 if not het_data[index] or not mol.res_num[i] == het_data[index][-1][0]: 2974 het_data[index].append([mol.res_num[i], mol.res_name[i], CHAIN_ID_LIST[index], 0, []]) 2975 2976 # Catch missing chain_ids. 2977 if het_data[index][-1][2] == None: 2978 het_data[index][-1][2] = '' 2979 2980 # Total atom count. 2981 het_data[index][-1][3] = het_data[index][-1][3] + 1 2982 2983 # Find if the atom has already a count entry. 2984 entry = False 2985 for j in range(len(het_data[index][-1][4])): 2986 if mol.element[i] == het_data[index][-1][4][j][0]: 2987 entry = True 2988 2989 # Create a new specific atom count entry. 2990 if not entry: 2991 het_data[index][-1][4].append([mol.element[i], 0]) 2992 2993 # Increment the specific atom count. 2994 for j in range(len(het_data[index][-1][4])): 2995 if mol.element[i] == het_data[index][-1][4][j][0]: 2996 het_data[index][-1][4][j][1] = het_data[index][-1][4][j][1] + 1 2997 2998 # Create the collective hetrogen info data structure. 2999 for i in range(len(het_data[index])): 3000 # Find the entry in the collective structure. 3001 found = False 3002 for j in range(len(het_data_coll)): 3003 # Different residue numbers. 3004 if het_data[index][i][0] != het_data_coll[j][0]: 3005 continue 3006 3007 # Different chain IDs. 3008 if het_data_coll[j][2] != het_data[index][i][2]: 3009 continue 3010 3011 # Change the flag. 3012 found = True 3013 3014 # If there is no match, add the new residue to the collective. 3015 if not found: 3016 het_data_coll.append(het_data[index][i]) 3017 3018 # Increment the molecule index. 3019 index = index + 1 3020 3021 3022 # The HET records. 3023 ################## 3024 3025 # Print out. 3026 print("HET") 3027 3028 # Write the HET records. 3029 for het in het_data_coll: 3030 pdb_write.het(file, het_id=het[1], chain_id=het[2], seq_num=het[0], num_het_atoms=het[3]) 3031 3032 3033 # The HETNAM records. 3034 ##################### 3035 3036 # Print out. 3037 print("HETNAM") 3038 3039 # Loop over the non-standard residues. 3040 residues = [] 3041 for het in het_data_coll: 3042 # Test if the residue HETNAM record as already been written (otherwise store its name). 3043 if het[1] in residues: 3044 continue 3045 else: 3046 residues.append(het[1]) 3047 3048 # Get the chemical name. 3049 chemical_name = self._get_chemical_name(het[1]) 3050 if not chemical_name: 3051 chemical_name = 'Unknown' 3052 3053 # Write the HETNAM records. 3054 pdb_write.hetnam(file, het_id=het[1], text=chemical_name) 3055 3056 3057 # The FORMUL records. 3058 ##################### 3059 3060 # Print out. 3061 print("FORMUL") 3062 3063 # Loop over the non-standard residues and generate and write the chemical formula. 3064 residues = [] 3065 for i in range(len(het_data_coll)): 3066 # Alias. 3067 het = het_data_coll[i] 3068 3069 # Test if the residue HETNAM record as already been written (otherwise store its name). 3070 if het[1] in residues: 3071 continue 3072 else: 3073 residues.append(het[1]) 3074 3075 # Initialise the chemical formula. 3076 formula = '' 3077 3078 # Loop over the atoms. 3079 for atom_count in het[4]: 3080 formula = formula + atom_count[0] + repr(atom_count[1]) 3081 3082 # The FORMUL record (chemical formula). 3083 pdb_write.formul(file, comp_num=i+1, het_id=het[1], text=formula) 3084 3085 3086 ############################### 3087 # Secondary structure section # 3088 ############################### 3089 3090 # The HELIX records. 3091 #################### 3092 3093 if hasattr(self, 'helices') and len(self.helices): 3094 # Printout. 3095 print("HELIX") 3096 3097 # Loop over and unpack the helix data. 3098 index = 1 3099 for helix_id, mol_init_index, init_res_name, init_seq_num, mol_end_index, end_res_name, end_seq_num, helix_class, length in self.helices: 3100 # The chain IDs. 3101 if mol_init_index == None: 3102 mol_init_index = 0 3103 if mol_end_index == None: 3104 mol_end_index = 0 3105 3106 pdb_write.helix(file, ser_num=index, helix_id=helix_id, init_chain_id=CHAIN_ID_LIST[mol_init_index], init_res_name=init_res_name, init_seq_num=init_seq_num, end_chain_id=CHAIN_ID_LIST[mol_end_index], end_res_name=end_res_name, end_seq_num=end_seq_num, helix_class=helix_class, length=length) 3107 index += 1 3108 3109 # The SHEET records. 3110 #################### 3111 3112 if hasattr(self, 'sheets') and len(self.sheets): 3113 # Printout. 3114 print("SHEET") 3115 3116 # Loop over and unpack the helix data. 3117 index = 1 3118 for strand, sheet_id, num_strands, init_res_name, mol_init_index, init_seq_num, init_icode, end_res_name, mol_end_index, end_seq_num, end_icode, sense, cur_atom, cur_res_name, mol_cur_index, cur_res_seq, cur_icode, prev_atom, prev_res_name, mol_prev_index, prev_res_seq, prev_icode in self.sheets: 3119 # Translate molecule indices to chain IDs. 3120 if mol_init_index == None: 3121 mol_init_index = 0 3122 if mol_end_index == None: 3123 mol_end_index = 0 3124 init_chain_id = CHAIN_ID_LIST[mol_init_index] 3125 end_chain_id = CHAIN_ID_LIST[mol_end_index] 3126 cur_chain_id = None 3127 if mol_cur_index != None: 3128 cur_chain_id = CHAIN_ID_LIST[mol_cur_index] 3129 prev_chain_id = None 3130 if mol_prev_index != None: 3131 prev_chain_id = CHAIN_ID_LIST[mol_prev_index] 3132 3133 # Write out. 3134 pdb_write.sheet(file, strand=strand, sheet_id=sheet_id, num_strands=num_strands, init_res_name=init_res_name, init_chain_id=init_chain_id, init_seq_num=init_seq_num, init_icode=init_icode, end_res_name=end_res_name, end_chain_id=end_chain_id, end_seq_num=end_seq_num, end_icode=end_icode, sense=sense, cur_atom=cur_atom, cur_res_name=cur_res_name, cur_chain_id=cur_chain_id, cur_res_seq=cur_res_seq, cur_icode=cur_icode, prev_atom=prev_atom, prev_res_name=prev_res_name, prev_chain_id=prev_chain_id, prev_res_seq=prev_res_seq, prev_icode=prev_icode) 3135 index += 1 3136 3137 3138 ###################### 3139 # Coordinate section # 3140 ###################### 3141 3142 # Initial printout if models are present. 3143 if model_records: 3144 print("\nMODEL records:") 3145 3146 # Loop over the models. 3147 for model in self.model_loop(model_num): 3148 # Initialise record counts. 3149 num_hetatm = 0 3150 num_atom = 0 3151 num_ter = 0 3152 ser_num = 1 3153 3154 # MODEL record, for multiple models. 3155 #################################### 3156 3157 if model_records: 3158 # Printout. 3159 sys.stdout.write('.') 3160 3161 # Write the model record. 3162 pdb_write.model(file, serial=model.num) 3163 3164 3165 # Add the atomic coordinate records (ATOM, HETATM, and TER). 3166 ############################################################ 3167 3168 # Loop over the molecules. 3169 index = 0 3170 for mol in model.mol_loop(): 3171 # Printout. 3172 if not model_records: 3173 print("ATOM, HETATM, TER") 3174 3175 # Loop over the atomic data. 3176 atom_record = False 3177 for i in range(len(mol.atom_name)): 3178 # Write the ATOM record. 3179 if mol.pdb_record[i] in [None, 'ATOM']: 3180 atom_record = True 3181 3182 # Write out. 3183 pdb_write.atom(file, serial=ser_num, name=mol.atom_name[i], res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0, element=mol.element[i]) 3184 num_atom += 1 3185 ser_num += 1 3186 3187 # Info for the TER record. 3188 ter_num = ser_num + 1 3189 ter_name = mol.res_name[i] 3190 ter_chain_id = CHAIN_ID_LIST[index] 3191 ter_res_num = mol.res_num[i] 3192 3193 # Finish the ATOM section with the TER record. 3194 if atom_record: 3195 pdb_write.ter(file, serial=ser_num, res_name=ter_name, chain_id=CHAIN_ID_LIST[index], res_seq=ter_res_num) 3196 num_ter += 1 3197 ser_num += 1 3198 3199 # Loop over the atomic data. 3200 count_shift = False 3201 for i in range(len(mol.atom_name)): 3202 # Write the HETATM record. 3203 if mol.pdb_record[i] == 'HETATM': 3204 # Increment the atom number if a TER record was created. 3205 if atom_record and ser_num == ter_num: 3206 count_shift = True 3207 3208 # Write out. 3209 pdb_write.hetatm(file, serial=ser_num, name=self._translate(mol.atom_name[i]), res_name=mol.res_name[i], chain_id=CHAIN_ID_LIST[index], res_seq=mol.res_num[i], x=mol.x[i], y=mol.y[i], z=mol.z[i], occupancy=1.0, temp_factor=0.0, element=mol.element[i]) 3210 num_hetatm += 1 3211 ser_num += 1 3212 3213 # Increment the molecule index. 3214 index += 1 3215 3216 3217 # ENDMDL record, for multiple structures. 3218 ######################################## 3219 3220 if model_records: 3221 if not model_records: 3222 print("ENDMDL") 3223 pdb_write.endmdl(file) 3224 3225 3226 # Create the CONECT records. 3227 ############################ 3228 3229 # Print out. 3230 if model_records: 3231 sys.stdout.write('\n') 3232 print("CONECT") 3233 3234 # Initialise record counts. 3235 num_conect = 0 3236 3237 # The per molecule incremented atom counts. 3238 atom_counts = [0] 3239 index = 0 3240 for mol in self.structural_data[0].mol_loop(): 3241 if index == 0: 3242 atom_counts.append(len(mol.atom_name)) 3243 else: 3244 atom_counts.append(atom_counts[index] + len(mol.atom_name)) 3245 index += 1 3246 3247 # Loop over the molecules of the first model. 3248 index = 0 3249 for mol in self.structural_data[0].mol_loop(): 3250 # Loop over the atoms. 3251 for i in range(len(mol.atom_name)): 3252 # No bonded atoms, hence no CONECT record is required. 3253 if not len(mol.bonded[i]): 3254 continue 3255 3256 # Initialise some data structures. 3257 flush = 0 3258 bonded_index = 0 3259 bonded = ['', '', '', ''] 3260 bonded_shifted = ['', '', '', ''] 3261 3262 # Loop over the bonded atoms. 3263 for j in range(len(mol.bonded[i])): 3264 # End of the array, hence create the CONECT record in this iteration. 3265 if j == len(mol.bonded[i])-1: 3266 flush = True 3267 3268 # Only four covalently bonded atoms allowed in one CONECT record. 3269 if bonded_index == 3: 3270 flush = True 3271 3272 # Get the bonded atom index. 3273 bonded[bonded_index] = mol.bonded[i][j] 3274 3275 # Increment the bonded_index value. 3276 bonded_index = bonded_index + 1 3277 3278 # Generate the CONECT record and increment the counter. 3279 if flush: 3280 # Convert the atom indices to atom numbers. 3281 for k in range(4): 3282 if bonded[k] != '': 3283 bonded_shifted[k] = bonded[k] + 1 + atom_counts[index] 3284 3285 # Write the CONECT record. 3286 pdb_write.conect(file, serial=i+1+atom_counts[index], bonded1=bonded_shifted[0], bonded2=bonded_shifted[1], bonded3=bonded_shifted[2], bonded4=bonded_shifted[3]) 3287 3288 # Reset the flush flag, the bonded atom count, and the bonded atom names. 3289 flush = False 3290 bonded_index = 0 3291 bonded = ['', '', '', ''] 3292 3293 # Increment the CONECT record count. 3294 num_conect = num_conect + 1 3295 3296 # Increment the molecule index. 3297 index += 1 3298 3299 3300 # MASTER record. 3301 ################ 3302 3303 print("\nMASTER") 3304 pdb_write.master(file, num_het=len(het_data_coll), num_coord=num_atom+num_hetatm, num_ter=num_ter, num_conect=num_conect) 3305 3306 3307 # END. 3308 ###### 3309 3310 print("END") 3311 pdb_write.end(file)

3312 3313

3314 - def validate(self):

3315 """Check the integrity of the structural data. 3316 3317 The number of molecules must be the same in all models. 3318 """ 3319 3320 # Reference number of molecules. 3321 num_mols = len(self.structural_data[0].mol) 3322 3323 # Loop over all other models. 3324 for i in range(1, len(self.structural_data)): 3325 # Model alias. 3326 model_i = self.structural_data[i] 3327 3328 # Size check. 3329 if num_mols != len(model_i.mol): 3330 raise RelaxError("The structural object is not valid - the number of molecules is not the same for all models.") 3331 3332 # Molecule name check. 3333 for j in range(len(model_i.mol)): 3334 if model_i.mol[j].mol_name != self.structural_data[0].mol[j].mol_name: 3335 raise RelaxError("The molecule name '%s' of model %s does not match the corresponding molecule '%s' of model %s." % (model_i.mol[j].mol_name, model_i.num, self.structural_data[0].mol[j].mol_name, self.structural_data[0].num))

Source Code for Module lib.structure.internal.object