lib.structure.internal.molecules

39 """The container for the molecular information. 40 41 The structural data object for this class is a container possessing a number of different arrays 42 corresponding to different structural information. These objects include: 43 44 - atom_num: The atom name. 45 - atom_name: The atom name. 46 - bonded: Each element an array of bonded atom indices. 47 - chain_id: The chain ID. 48 - element: The element symbol. 49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 50 - res_name: The residue name. 51 - res_num: The residue number. 52 - seg_id: The segment ID. 53 - x: The x coordinate of the atom. 54 - y: The y coordinate of the atom. 55 - z: The z coordinate of the atom. 56 57 All arrays should be of equal length so that an atom index can retrieve all the corresponding 58 data. Only the atom identification string is compulsory, all other arrays can contain None. 59 """ 60 61

62 - def __init__(self):

63 """Initialise the molecular container.""" 64 65 # The atom num (array of int). 66 self.atom_num = [] 67 68 # The atom name (array of str). 69 self.atom_name = [] 70 71 # The bonded atom indices (array of arrays of int). 72 self.bonded = [] 73 74 # The chain ID (array of str). 75 self.chain_id = [] 76 77 # The element symbol (array of str). 78 self.element = [] 79 80 # The optional PDB record name (array of str). 81 self.pdb_record = [] 82 83 # The residue name (array of str). 84 self.res_name = [] 85 86 # The residue number (array of int). 87 self.res_num = [] 88 89 # The segment ID (array of int). 90 self.seg_id = [] 91 92 # The x coordinate (array of float). 93 self.x = [] 94 95 # The y coordinate (array of float). 96 self.y = [] 97 98 # The z coordinate (array of float). 99 self.z = []

100 101

102 - def _atom_index(self, atom_num):

103 """Find the atom index corresponding to the given atom number. 104 105 @param atom_num: The atom number to find the index of. 106 @type atom_num: int 107 @return: The atom index corresponding to the atom. 108 @rtype: int 109 """ 110 111 # Loop over the atoms. 112 for j in range(len(self.atom_num)): 113 # Return the index. 114 if self.atom_num[j] == atom_num: 115 return j

116 117

118 - def _det_pdb_element(self, atom_name):

119 """Try to determine the element from the PDB atom name. 120 121 @param atom_name: The PDB atom name. 122 @type atom_name: str 123 @return: The element name, or None if unsuccessful. 124 @rtype: str or None 125 """ 126 127 # Strip away the "'" character (for RNA, etc.). 128 element = atom_name.strip("'") 129 130 # Strip away atom numbering, from the front and end. 131 element = element.strip(digits) 132 133 # Amino acid atom translation table (note, numbers have been stripped already!). 134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 135 'N': ['ND', 'NE', 'NH', 'NZ'], 136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 138 'S': ['SD', 'SG'] 139 } 140 141 # Translate amino acids. 142 for key in table: 143 if element in table[key]: 144 element = key 145 break 146 147 # Return the element if it is in the periodic table. 148 if periodic_table.has_element(symbol=element): 149 return element 150 151 # Else, throw a warning. 152 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))

153 154

155 - def _parse_gaussian_record(self, record):

156 """Parse the Gaussian log record string and return an array of the corresponding atomic information. 157 158 The format of the Gaussian log records is:: 159 __________________________________________________________________________________________ 160 | | | | 161 | Columns | Data type | Description | 162 |_________|______________|_______________________________________________________________| 163 | | | | 164 | 1 | int | "Center Number" - the sequential atom number. | 165 | 2 | int | "Atomic Number" - the atomic number. | 166 | 3 | int | "Atomic Type" - the atomic type? | 167 | 4 | float | X coordinate in Angstrom | 168 | 5 | float | Y coordinate in Angstrom | 169 | 6 | float | Z coordinate in Angstrom | 170 |_________|______________|_______________________________________________________________| 171 172 173 @param record: The single line Gaussian record. 174 @type record: str 175 @return: The list of atomic information 176 @rtype: list of str 177 """ 178 179 # Skip the header. 180 if search("---------", record): 181 return None 182 if search("Center", record): 183 return None 184 if search("Number", record): 185 return None 186 187 # Initialise. 188 word = record.split() 189 190 # Proper records. 191 if len(word) == 6: 192 # Convert strings to numbers. 193 atom_number = int(word[0]) 194 atomic_num = int(word[1]) 195 x = float(word[3]) 196 y = float(word[4]) 197 z = float(word[5]) 198 199 # Return the atomic info. 200 return atom_number, atomic_num, x, y, z

201 202

203 - def _parse_xyz_record(self, record):

204 """Parse the XYZ record string and return an array of the corresponding atomic information. 205 206 The format of the XYZ records is:: 207 __________________________________________________________________________________________ 208 | | | | | 209 | Columns | Data type | Field | Definition | 210 |_________|______________|______________|________________________________________________| 211 | | | | | 212 | 1 | String | element | | 213 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 214 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 215 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 216 |_________|______________|______________|________________________________________________| 217 218 219 @param record: The single line PDB record. 220 @type record: str 221 @return: The list of atomic information 222 @rtype: list of str 223 """ 224 225 # Initialise. 226 fields = [] 227 word = record.split() 228 229 # ATOM and HETATM records. 230 if len(word)==4: 231 # Split up the record. 232 fields.append(word[0]) 233 fields.append(word[1]) 234 fields.append(word[2]) 235 fields.append(word[3]) 236 237 # Loop over the fields. 238 for i in range(len(fields)): 239 # Strip all whitespace. 240 fields[i] = fields[i].strip() 241 242 # Replace nothingness with None. 243 if fields[i] == '': 244 fields[i] = None 245 246 # Convert strings to numbers. 247 if fields[1]: 248 fields[1] = float(fields[1]) 249 if fields[2]: 250 fields[2] = float(fields[2]) 251 if fields[3]: 252 fields[3] = float(fields[3]) 253 254 # Return the atomic info. 255 return fields

256 257

258 - def _sort(self):

259 """Sort all structural data.""" 260 261 # Create an index list for sorting the structural data. 262 indices = list(range(len(self.atom_name))) 263 indices.sort(key=self._sort_key) 264 265 # Sort all lists. 266 self.atom_num = [self.atom_num[i] for i in indices] 267 self.atom_name = [self.atom_name[i] for i in indices] 268 self.bonded = [self.bonded[i] for i in indices] 269 self.chain_id = [self.chain_id[i] for i in indices] 270 self.element = [self.element[i] for i in indices] 271 self.pdb_record = [self.pdb_record[i] for i in indices] 272 self.res_name = [self.res_name[i] for i in indices] 273 self.res_num = [self.res_num[i] for i in indices] 274 self.seg_id = [self.seg_id[i] for i in indices] 275 self.x = [self.x[i] for i in indices] 276 self.y = [self.y[i] for i in indices] 277 self.z = [self.z[i] for i in indices] 278 279 # Change the bonded numbers, as the indices are now different. 280 for i in range(len(self.bonded)): 281 for j in range(len(self.bonded[i])): 282 self.bonded[i][j] = indices.index(self.bonded[i][j])

283 284

285 - def _sort_key(self, i):

286 """Return the information for sorting the sequence data.""" 287 288 # Python 3 - return 0 instead of None. 289 if self.res_num[i] == None: 290 return 0 291 292 # Sort based on residue number. 293 return self.res_num[i]

294 295

296 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

297 """Method for adding an atom to the structural data object. 298 299 This method will create the key-value pair for the given atom. 300 301 302 @keyword atom_name: The atom name, e.g. 'H1'. 303 @type atom_name: str or None 304 @keyword res_name: The residue name. 305 @type res_name: str or None 306 @keyword res_num: The residue number. 307 @type res_num: int or None 308 @keyword pos: The position vector of coordinates. 309 @type pos: list (length = 3) 310 @keyword element: The element symbol. 311 @type element: str or None 312 @keyword atom_num: The atom number. 313 @type atom_num: int or None 314 @keyword chain_id: The chain identifier. 315 @type chain_id: str or None 316 @keyword segment_id: The segment identifier. 317 @type segment_id: str or None 318 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 319 @type pdb_record: str or None 320 @return: The index of the added atom. 321 @rtype: int 322 """ 323 324 # Append to all the arrays. 325 self.atom_num.append(atom_num) 326 self.atom_name.append(atom_name) 327 self.bonded.append([]) 328 self.chain_id.append(chain_id) 329 self.element.append(element) 330 self.pdb_record.append(pdb_record) 331 self.res_name.append(res_name) 332 self.res_num.append(res_num) 333 self.seg_id.append(segment_id) 334 self.x.append(pos[0]) 335 self.y.append(pos[1]) 336 self.z.append(pos[2]) 337 338 # Return the index. 339 return len(self.atom_num) - 1

340 341

342 - def atom_connect(self, index1=None, index2=None):

343 """Method for connecting two atoms within the data structure object. 344 345 This method will append index2 to the array at bonded[index1] and vice versa. 346 347 348 @keyword index1: The index of the first atom. 349 @type index1: int 350 @keyword index2: The index of the second atom. 351 @type index2: int 352 """ 353 354 # Update the bonded array structure, if necessary. 355 if index2 not in self.bonded[index1]: 356 self.bonded[index1].append(index2) 357 if index1 not in self.bonded[index2]: 358 self.bonded[index2].append(index1)

359 360

361 - def fill_object_from_gaussian(self, records):

362 """Method for generating a complete Structure_container object from the given Gaussian log records. 363 364 @param records: A list of structural Gaussian log records. 365 @type records: list of str 366 """ 367 368 # Loop over the records. 369 for record in records: 370 # Parse the record. 371 data = self._parse_gaussian_record(record) 372 373 # Nothing to do. 374 if data == None: 375 continue 376 377 # Unpack. 378 atom_number, atomic_num, x, y, z = data 379 380 # Translate the atomic number to the atom name. 381 atom_name = periodic_table.lookup_symbol(atomic_num) 382 383 # Add. 384 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)

385 386

387 - def fill_object_from_pdb(self, records, alt_loc_select=None, reset_serial=True):

388 """Method for generating a complete Structure_container object from the given PDB records. 389 390 @param records: A list of structural PDB records. 391 @type records: list of str 392 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 393 @type alt_loc_select: str or None 394 @keyword reset_serial: A flag which if True will cause the first serial number (or atom number) to be reset to 1, and all other numbers shifted. 395 @type reset_serial: bool 396 """ 397 398 # Loop over the records. 399 water = [] 400 missing_connect = [] 401 number_offset = None 402 for record in records: 403 # Nothing to do. 404 if not record or record == '\n': 405 continue 406 407 # Add the atom. 408 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 409 # Parse the record. 410 if record[:4] == 'ATOM': 411 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 412 if record[:6] == 'HETATM': 413 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 414 415 # The serial number. 416 if reset_serial: 417 # The first number. 418 if number_offset == None: 419 number_offset = serial - 1 420 421 # Reset. 422 serial -= number_offset 423 424 # Skip waters. 425 if res_name == 'HOH': 426 water.append(res_seq) 427 continue 428 429 # Handle the alternate locations. 430 if alt_loc != None: 431 # Don't know what to do. 432 if alt_loc_select == None: 433 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 434 435 # Skip non-matching locations. 436 if alt_loc != alt_loc_select: 437 continue 438 439 # Attempt at determining the element, if missing. 440 if not element: 441 element = self._det_pdb_element(name) 442 443 # Add. 444 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, res_num=res_seq, pos=[x, y, z], element=element) 445 446 # Connect atoms. 447 if record[:6] == 'CONECT': 448 # Parse the record. 449 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 450 451 # Reset the serial numbers. 452 if reset_serial: 453 serial -= number_offset 454 bonded1 -= number_offset 455 if bonded2 != None: 456 bonded2 -= number_offset 457 if bonded3 != None: 458 bonded3 -= number_offset 459 if bonded4 != None: 460 bonded4 -= number_offset 461 462 # Loop over the atoms of the record. 463 for bonded in [bonded1, bonded2, bonded3, bonded4]: 464 # Skip if there is no record. 465 if not bonded: 466 continue 467 468 # The atom indices. 469 serial_index = self._atom_index(serial) 470 bonded_index = self._atom_index(bonded) 471 472 # Skip broken CONECT records (for when the record points to a non-existent atom). 473 if serial_index == None: 474 if serial not in missing_connect: 475 missing_connect.append(serial) 476 continue 477 if bonded_index == None: 478 if bonded not in missing_connect: 479 missing_connect.append(bonded) 480 continue 481 482 # Make the connection. 483 self.atom_connect(index1=serial_index, index2=bonded_index) 484 485 # Warnings. 486 if len(missing_connect): 487 missing_connect.sort() 488 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect)) 489 if len(water): 490 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))

491 492

493 - def fill_object_from_xyz(self, records):

494 """Method for generating a complete Structure_container object from the given xyz records. 495 496 @param records: A list of structural xyz records. 497 @type records: list of str 498 """ 499 500 # initialisation for atom number 501 atom_number = 1 502 503 # Loop over the records. 504 for record in records: 505 # Parse the record. 506 record = self._parse_xyz_record(record) 507 508 # Nothing to do. 509 if not record: 510 continue 511 512 # Add the atom. 513 if len(record) == 4: 514 # Add. 515 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 516 517 # Increment of atom number 518 atom_number = atom_number + 1

519 520

521 - def from_xml(self, mol_node, file_version=1):

522 """Recreate the MolContainer from the XML molecule node. 523 524 @param mol_node: The molecule XML node. 525 @type mol_node: xml.dom.minicompat.NodeList instance 526 @keyword file_version: The relax XML version of the XML file. 527 @type file_version: int 528 """ 529 530 # Recreate the current molecule container. 531 xml_to_object(mol_node, self, file_version=file_version)

532 533

534 - def is_empty(self):

535 """Check if the container is empty.""" 536 537 # Set attributes. 538 if hasattr(self, 'file_name'): return False 539 if hasattr(self, 'file_path'): return False 540 if hasattr(self, 'file_mol_num'): return False 541 if hasattr(self, 'file_model'): return False 542 543 # Internal data structures. 544 if not self.atom_num == []: return False 545 if not self.atom_name == []: return False 546 if not self.bonded == []: return False 547 if not self.chain_id == []: return False 548 if not self.element == []: return False 549 if not self.pdb_record == []: return False 550 if not self.res_name == []: return False 551 if not self.res_num == []: return False 552 if not self.seg_id == []: return False 553 if not self.x == []: return False 554 if not self.y == []: return False 555 if not self.z == []: return False 556 557 # Ok, now this thing must be empty. 558 return True

559 560

561 - def last_residue(self):

562 """Return the number of the last residue. 563 564 @return: The last residue number. 565 @rtype: int 566 """ 567 568 # No residues yet. 569 if not len(self.res_num): 570 return 0 571 572 # Return the number. 573 return self.res_num[-1]

574 575

576 - def loop_residues(self):

577 """Generator method for looping over the individual residues of the molecule. 578 579 @return: The residue name and number. 580 @rtype: str, int 581 """ 582 583 # Loop over the atoms. 584 last_res = None 585 for i in range(len(self.atom_num)): 586 # A new residue. 587 if self.res_num[i] != last_res: 588 last_res = self.res_num[i] 589 yield self.res_name[i], self.res_num[i]

590 591

592 - def merge(self, mol_cont=None):

593 """Merge the contents of the given molecule container into here. 594 595 @keyword mol_cont: The data structure for the molecule to merge. 596 @type mol_cont: MolContainer instance 597 """ 598 599 # The current index. 600 curr_index = len(self.atom_num) 601 602 # Loop over all data. 603 for i in range(len(mol_cont.atom_num)): 604 # Add the atom. 605 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 606 607 # Connect the atoms. 608 for j in range(len(mol_cont.bonded[i])): 609 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)

610 611

612 - def to_xml(self, doc, element):

613 """Create XML elements for the contents of this molecule container. 614 615 @param doc: The XML document object. 616 @type doc: xml.dom.minidom.Document instance 617 @param element: The element to add the molecule XML elements to. 618 @type element: XML element object 619 """ 620 621 # Create an XML element for this molecule and add it to the higher level element. 622 mol_element = doc.createElement('mol_cont') 623 element.appendChild(mol_element) 624 625 # Set the molecule attributes. 626 mol_element.setAttribute('desc', 'Molecule container') 627 mol_element.setAttribute('name', str(self.mol_name)) 628 629 # Add all simple python objects within the MolContainer to the XML element. 630 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))

635 """List type data container for holding the different molecules of one model.""" 636

637 - def __repr__(self):

638 """The string representation of the object. 639 640 Rather than using the standard Python conventions (either the string representation of the 641 value or the "<...desc...>" notation), a rich-formatted description of the object is given. 642 """ 643 644 text = "Molecules.\n\n" 645 text = text + "%-8s%-8s" % ("Index", "Name") + "\n" 646 for i in range(len(self)): 647 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n" 648 return text

649 650

651 - def add_item(self, mol_name=None, mol_cont=None):

652 """Append the given MolContainer instance to the MolList. 653 654 @keyword mol_name: The molecule number. 655 @type mol_name: int 656 @keyword mol_cont: The data structure for the molecule. 657 @type mol_cont: MolContainer instance 658 @return: The new molecule container. 659 @rtype: MolContainer instance 660 """ 661 662 # If no molecule data exists, replace the empty first molecule with this molecule (just a renaming). 663 if len(self) and self.is_empty(): 664 self[0].mol_name = mol_name 665 666 # Otherwise append an empty MolContainer. 667 else: 668 # Test if the molecule already exists. 669 for i in range(len(self)): 670 if self[i].mol_name == mol_name: 671 raise RelaxError("The molecule '%s' already exists." % mol_name) 672 673 # Append an empty MolContainer. 674 self.append(mol_cont) 675 676 # Set the name. 677 self[-1].mol_name = mol_name 678 679 # Return the container. 680 return self[-1]

681 682

683 - def is_empty(self):

684 """Method for testing if this MolList object is empty. 685 686 @return: True if this list only has one MolContainer and the molecule name has not 687 been set, False otherwise. 688 @rtype: bool 689 """ 690 691 # No MolContainers. 692 if len(self) == 0: 693 return True 694 695 # There is only one MolContainer and it is empty. 696 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty(): 697 return True 698 699 # Otherwise. 700 return False

701 702

703 - def from_xml(self, mol_nodes, file_version=1):

704 """Recreate a molecule list data structure from the XML molecule nodes. 705 706 @param mol_nodes: The molecule XML nodes. 707 @type mol_nodes: xml.dom.minicompat.NodeList instance 708 @keyword file_version: The relax XML version of the XML file. 709 @type file_version: int 710 """ 711 712 # Test if empty. 713 if not self.is_empty(): 714 raise RelaxFromXMLNotEmptyError(self.__class__.__name__) 715 716 # Loop over the molecules. 717 for mol_node in mol_nodes: 718 # Initialise a MolContainer instance. 719 mol_cont = MolContainer() 720 721 # Get the molecule name. 722 name = mol_node.getAttribute('name') 723 if name == 'None': 724 name = None 725 726 # Add the molecule to the MolList structure. 727 self.add_item(mol_name=name, mol_cont=mol_cont) 728 729 # Execute the specific MolContainer from_xml() method. 730 self[-1].from_xml(mol_node, file_version=file_version)

731 732

733 - def merge_item(self, mol_name=None, mol_cont=None):

734 """Mege the given MolContainer instance into a pre-existing molecule container. 735 736 @keyword mol_name: The molecule number. 737 @type mol_name: int 738 @keyword mol_cont: The data structure for the molecule. 739 @type mol_cont: MolContainer instance 740 @return: The new molecule container. 741 @rtype: MolContainer instance 742 """ 743 744 # Find the molecule to merge. 745 index = None 746 for i in range(len(self)): 747 if self[i].mol_name == mol_name: 748 index = i 749 break 750 751 # No molecule found. 752 if index == None: 753 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name) 754 755 # Merge the molecules. 756 self[index].merge(mol_cont) 757 758 # Return the container. 759 return self[index]

760 761

762 - def to_xml(self, doc, element):

763 """Create XML elements for each molecule. 764 765 @param doc: The XML document object. 766 @type doc: xml.dom.minidom.Document instance 767 @param element: The element to add the molecule XML elements to. 768 @type element: XML element object 769 """ 770 771 # Loop over the molecules. 772 for i in range(len(self)): 773 # Add the molecule data. 774 self[i].to_xml(doc, element)

Source Code for Module lib.structure.internal.molecules