lib.structure.internal.molecules

39 """The container for the molecular information. 40 41 The structural data object for this class is a container possessing a number of different arrays 42 corresponding to different structural information. These objects include: 43 44 - atom_num: The atom name. 45 - atom_name: The atom name. 46 - bonded: Each element an array of bonded atom indices. 47 - chain_id: The chain ID. 48 - element: The element symbol. 49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 50 - res_name: The residue name. 51 - res_num: The residue number. 52 - seg_id: The segment ID. 53 - x: The x coordinate of the atom. 54 - y: The y coordinate of the atom. 55 - z: The z coordinate of the atom. 56 57 All arrays should be of equal length so that an atom index can retrieve all the corresponding 58 data. Only the atom identification string is compulsory, all other arrays can contain None. 59 """ 60 61

62 - def __init__(self):

63 """Initialise the molecular container.""" 64 65 # The atom num (array of int). 66 self.atom_num = [] 67 68 # The atom name (array of str). 69 self.atom_name = [] 70 71 # The bonded atom indices (array of arrays of int). 72 self.bonded = [] 73 74 # The chain ID (array of str). 75 self.chain_id = [] 76 77 # The element symbol (array of str). 78 self.element = [] 79 80 # The optional PDB record name (array of str). 81 self.pdb_record = [] 82 83 # The residue name (array of str). 84 self.res_name = [] 85 86 # The residue number (array of int). 87 self.res_num = [] 88 89 # The segment ID (array of int). 90 self.seg_id = [] 91 92 # The x coordinate (array of float). 93 self.x = [] 94 95 # The y coordinate (array of float). 96 self.y = [] 97 98 # The z coordinate (array of float). 99 self.z = []

100 101

102 - def _atom_index(self, atom_num):

103 """Find the atom index corresponding to the given atom number. 104 105 @param atom_num: The atom number to find the index of. 106 @type atom_num: int 107 @return: The atom index corresponding to the atom. 108 @rtype: int 109 """ 110 111 # Loop over the atoms. 112 for j in range(len(self.atom_num)): 113 # Return the index. 114 if self.atom_num[j] == atom_num: 115 return j

116 117

118 - def _det_pdb_element(self, atom_name):

119 """Try to determine the element from the PDB atom name. 120 121 @param atom_name: The PDB atom name. 122 @type atom_name: str 123 @return: The element name, or None if unsuccessful. 124 @rtype: str or None 125 """ 126 127 # Strip away the "'" character (for RNA, etc.). 128 element = atom_name.strip("'") 129 130 # Strip away atom numbering, from the front and end. 131 element = element.strip(digits) 132 133 # Amino acid atom translation table (note, numbers have been stripped already!). 134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 135 'N': ['ND', 'NE', 'NH', 'NZ'], 136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 138 'S': ['SD', 'SG'] 139 } 140 141 # NMR Pseudo-atoms (with trailing numbers stripped). 142 table['H'] += ['QA', 'QB', 'QD', 'QE', 'QG', 'QH', 'QQD', 'QQG', 'QR', 'QZ'] 143 144 # Translate amino acids. 145 for key in table: 146 if element in table[key]: 147 element = key 148 break 149 150 # Return the element if it is in the periodic table. 151 if periodic_table.has_element(symbol=element): 152 return element 153 154 # Else, throw a warning. 155 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))

156 157

158 - def _parse_gaussian_record(self, record):

159 """Parse the Gaussian log record string and return an array of the corresponding atomic information. 160 161 The format of the Gaussian log records is:: 162 __________________________________________________________________________________________ 163 | | | | 164 | Columns | Data type | Description | 165 |_________|______________|_______________________________________________________________| 166 | | | | 167 | 1 | int | "Center Number" - the sequential atom number. | 168 | 2 | int | "Atomic Number" - the atomic number. | 169 | 3 | int | "Atomic Type" - the atomic type? | 170 | 4 | float | X coordinate in Angstrom | 171 | 5 | float | Y coordinate in Angstrom | 172 | 6 | float | Z coordinate in Angstrom | 173 |_________|______________|_______________________________________________________________| 174 175 176 @param record: The single line Gaussian record. 177 @type record: str 178 @return: The list of atomic information 179 @rtype: list of str 180 """ 181 182 # Skip the header. 183 if search("---------", record): 184 return None 185 if search("Center", record): 186 return None 187 if search("Number", record): 188 return None 189 190 # Initialise. 191 word = record.split() 192 193 # Proper records. 194 if len(word) == 6: 195 # Convert strings to numbers. 196 atom_number = int(word[0]) 197 atomic_num = int(word[1]) 198 x = float(word[3]) 199 y = float(word[4]) 200 z = float(word[5]) 201 202 # Return the atomic info. 203 return atom_number, atomic_num, x, y, z

204 205

206 - def _parse_xyz_record(self, record):

207 """Parse the XYZ record string and return an array of the corresponding atomic information. 208 209 The format of the XYZ records is:: 210 __________________________________________________________________________________________ 211 | | | | | 212 | Columns | Data type | Field | Definition | 213 |_________|______________|______________|________________________________________________| 214 | | | | | 215 | 1 | String | element | | 216 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 217 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 218 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 219 |_________|______________|______________|________________________________________________| 220 221 222 @param record: The single line PDB record. 223 @type record: str 224 @return: The list of atomic information 225 @rtype: list of str 226 """ 227 228 # Initialise. 229 fields = [] 230 word = record.split() 231 232 # ATOM and HETATM records. 233 if len(word)==4: 234 # Split up the record. 235 fields.append(word[0]) 236 fields.append(word[1]) 237 fields.append(word[2]) 238 fields.append(word[3]) 239 240 # Loop over the fields. 241 for i in range(len(fields)): 242 # Strip all whitespace. 243 fields[i] = fields[i].strip() 244 245 # Replace nothingness with None. 246 if fields[i] == '': 247 fields[i] = None 248 249 # Convert strings to numbers. 250 if fields[1]: 251 fields[1] = float(fields[1]) 252 if fields[2]: 253 fields[2] = float(fields[2]) 254 if fields[3]: 255 fields[3] = float(fields[3]) 256 257 # Return the atomic info. 258 return fields

259 260

261 - def _sort(self):

262 """Sort all structural data.""" 263 264 # Create an index list for sorting the structural data. 265 indices = list(range(len(self.atom_name))) 266 indices.sort(key=self._sort_key) 267 268 # Sort all lists. 269 self.atom_num = [self.atom_num[i] for i in indices] 270 self.atom_name = [self.atom_name[i] for i in indices] 271 self.bonded = [self.bonded[i] for i in indices] 272 self.chain_id = [self.chain_id[i] for i in indices] 273 self.element = [self.element[i] for i in indices] 274 self.pdb_record = [self.pdb_record[i] for i in indices] 275 self.res_name = [self.res_name[i] for i in indices] 276 self.res_num = [self.res_num[i] for i in indices] 277 self.seg_id = [self.seg_id[i] for i in indices] 278 self.x = [self.x[i] for i in indices] 279 self.y = [self.y[i] for i in indices] 280 self.z = [self.z[i] for i in indices] 281 282 # Change the bonded numbers, as the indices are now different. 283 for i in range(len(self.bonded)): 284 for j in range(len(self.bonded[i])): 285 self.bonded[i][j] = indices.index(self.bonded[i][j])

286 287

288 - def _sort_key(self, i):

289 """Return the information for sorting the sequence data.""" 290 291 # Python 3 - return 0 instead of None. 292 if self.res_num[i] == None: 293 return 0 294 295 # Sort based on residue number. 296 return self.res_num[i]

297 298

299 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

300 """Method for adding an atom to the structural data object. 301 302 This method will create the key-value pair for the given atom. 303 304 305 @keyword atom_name: The atom name, e.g. 'H1'. 306 @type atom_name: str or None 307 @keyword res_name: The residue name. 308 @type res_name: str or None 309 @keyword res_num: The residue number. 310 @type res_num: int or None 311 @keyword pos: The position vector of coordinates. 312 @type pos: list (length = 3) 313 @keyword element: The element symbol. 314 @type element: str or None 315 @keyword atom_num: The atom number. 316 @type atom_num: int or None 317 @keyword chain_id: The chain identifier. 318 @type chain_id: str or None 319 @keyword segment_id: The segment identifier. 320 @type segment_id: str or None 321 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 322 @type pdb_record: str or None 323 @return: The index of the added atom. 324 @rtype: int 325 """ 326 327 # Append to all the arrays. 328 self.atom_num.append(atom_num) 329 self.atom_name.append(atom_name) 330 self.bonded.append([]) 331 self.chain_id.append(chain_id) 332 self.element.append(element) 333 self.pdb_record.append(pdb_record) 334 self.res_name.append(res_name) 335 self.res_num.append(res_num) 336 self.seg_id.append(segment_id) 337 self.x.append(pos[0]) 338 self.y.append(pos[1]) 339 self.z.append(pos[2]) 340 341 # Return the index. 342 return len(self.atom_num) - 1

343 344

345 - def atom_connect(self, index1=None, index2=None):

346 """Method for connecting two atoms within the data structure object. 347 348 This method will append index2 to the array at bonded[index1] and vice versa. 349 350 351 @keyword index1: The index of the first atom. 352 @type index1: int 353 @keyword index2: The index of the second atom. 354 @type index2: int 355 """ 356 357 # Update the bonded array structure, if necessary. 358 if index2 not in self.bonded[index1]: 359 self.bonded[index1].append(index2) 360 if index1 not in self.bonded[index2]: 361 self.bonded[index2].append(index1)

362 363

364 - def fill_object_from_gaussian(self, records):

365 """Method for generating a complete Structure_container object from the given Gaussian log records. 366 367 @param records: A list of structural Gaussian log records. 368 @type records: list of str 369 """ 370 371 # Loop over the records. 372 for record in records: 373 # Parse the record. 374 data = self._parse_gaussian_record(record) 375 376 # Nothing to do. 377 if data == None: 378 continue 379 380 # Unpack. 381 atom_number, atomic_num, x, y, z = data 382 383 # Translate the atomic number to the atom name. 384 atom_name = periodic_table.lookup_symbol(atomic_num) 385 386 # Add. 387 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)

388 389

390 - def fill_object_from_pdb(self, records, alt_loc_select=None, reset_serial=True):

391 """Method for generating a complete Structure_container object from the given PDB records. 392 393 @param records: A list of structural PDB records. 394 @type records: list of str 395 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 396 @type alt_loc_select: str or None 397 @keyword reset_serial: A flag which if True will cause the first serial number (or atom number) to be reset to 1, and all other numbers shifted. 398 @type reset_serial: bool 399 """ 400 401 # Loop over the records. 402 water = [] 403 missing_connect = [] 404 number_offset = None 405 for record in records: 406 # Nothing to do. 407 if not record or record == '\n': 408 continue 409 410 # Add the atom. 411 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 412 # Parse the record. 413 if record[:4] == 'ATOM': 414 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 415 if record[:6] == 'HETATM': 416 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 417 418 # The serial number. 419 if reset_serial: 420 # The first number. 421 if number_offset == None: 422 number_offset = serial - 1 423 424 # Reset. 425 serial -= number_offset 426 427 # Skip waters. 428 if res_name == 'HOH': 429 water.append(res_seq) 430 continue 431 432 # Handle the alternate locations. 433 if alt_loc != None: 434 # Don't know what to do. 435 if alt_loc_select == None: 436 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 437 438 # Skip non-matching locations. 439 if alt_loc != alt_loc_select: 440 continue 441 442 # Attempt at determining the element, if missing. 443 if not element: 444 element = self._det_pdb_element(name) 445 446 # Add. 447 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, res_num=res_seq, pos=[x, y, z], element=element) 448 449 # Connect atoms. 450 if record[:6] == 'CONECT': 451 # Parse the record. 452 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 453 454 # Reset the serial numbers. 455 if reset_serial: 456 serial -= number_offset 457 bonded1 -= number_offset 458 if bonded2 != None: 459 bonded2 -= number_offset 460 if bonded3 != None: 461 bonded3 -= number_offset 462 if bonded4 != None: 463 bonded4 -= number_offset 464 465 # Loop over the atoms of the record. 466 for bonded in [bonded1, bonded2, bonded3, bonded4]: 467 # Skip if there is no record. 468 if not bonded: 469 continue 470 471 # The atom indices. 472 serial_index = self._atom_index(serial) 473 bonded_index = self._atom_index(bonded) 474 475 # Skip broken CONECT records (for when the record points to a non-existent atom). 476 if serial_index == None: 477 if serial not in missing_connect: 478 missing_connect.append(serial) 479 continue 480 if bonded_index == None: 481 if bonded not in missing_connect: 482 missing_connect.append(bonded) 483 continue 484 485 # Make the connection. 486 self.atom_connect(index1=serial_index, index2=bonded_index) 487 488 # Warnings. 489 if len(missing_connect): 490 missing_connect.sort() 491 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect)) 492 if len(water): 493 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))

494 495

496 - def fill_object_from_xyz(self, records):

497 """Method for generating a complete Structure_container object from the given xyz records. 498 499 @param records: A list of structural xyz records. 500 @type records: list of str 501 """ 502 503 # initialisation for atom number 504 atom_number = 1 505 506 # Loop over the records. 507 for record in records: 508 # Parse the record. 509 record = self._parse_xyz_record(record) 510 511 # Nothing to do. 512 if not record: 513 continue 514 515 # Add the atom. 516 if len(record) == 4: 517 # Add. 518 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 519 520 # Increment of atom number 521 atom_number = atom_number + 1

522 523

524 - def from_xml(self, mol_node, file_version=1):

525 """Recreate the MolContainer from the XML molecule node. 526 527 @param mol_node: The molecule XML node. 528 @type mol_node: xml.dom.minicompat.NodeList instance 529 @keyword file_version: The relax XML version of the XML file. 530 @type file_version: int 531 """ 532 533 # Recreate the current molecule container. 534 xml_to_object(mol_node, self, file_version=file_version)

535 536

537 - def is_empty(self):

538 """Check if the container is empty.""" 539 540 # Set attributes. 541 if hasattr(self, 'file_name'): return False 542 if hasattr(self, 'file_path'): return False 543 if hasattr(self, 'file_mol_num'): return False 544 if hasattr(self, 'file_model'): return False 545 546 # Internal data structures. 547 if not self.atom_num == []: return False 548 if not self.atom_name == []: return False 549 if not self.bonded == []: return False 550 if not self.chain_id == []: return False 551 if not self.element == []: return False 552 if not self.pdb_record == []: return False 553 if not self.res_name == []: return False 554 if not self.res_num == []: return False 555 if not self.seg_id == []: return False 556 if not self.x == []: return False 557 if not self.y == []: return False 558 if not self.z == []: return False 559 560 # Ok, now this thing must be empty. 561 return True

562 563

564 - def last_residue(self):

565 """Return the number of the last residue. 566 567 @return: The last residue number. 568 @rtype: int 569 """ 570 571 # No residues yet. 572 if not len(self.res_num): 573 return 0 574 575 # Return the number. 576 return self.res_num[-1]

577 578

579 - def loop_residues(self):

580 """Generator method for looping over the individual residues of the molecule. 581 582 @return: The residue name and number. 583 @rtype: str, int 584 """ 585 586 # Loop over the atoms. 587 last_res = None 588 for i in range(len(self.atom_num)): 589 # A new residue. 590 if self.res_num[i] != last_res: 591 last_res = self.res_num[i] 592 yield self.res_name[i], self.res_num[i]

593 594

595 - def merge(self, mol_cont=None):

596 """Merge the contents of the given molecule container into here. 597 598 @keyword mol_cont: The data structure for the molecule to merge. 599 @type mol_cont: MolContainer instance 600 """ 601 602 # The current index. 603 curr_index = len(self.atom_num) 604 605 # Loop over all data. 606 for i in range(len(mol_cont.atom_num)): 607 # Add the atom. 608 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 609 610 # Connect the atoms. 611 for j in range(len(mol_cont.bonded[i])): 612 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)

613 614

615 - def to_xml(self, doc, element):

616 """Create XML elements for the contents of this molecule container. 617 618 @param doc: The XML document object. 619 @type doc: xml.dom.minidom.Document instance 620 @param element: The element to add the molecule XML elements to. 621 @type element: XML element object 622 """ 623 624 # Create an XML element for this molecule and add it to the higher level element. 625 mol_element = doc.createElement('mol_cont') 626 element.appendChild(mol_element) 627 628 # Set the molecule attributes. 629 mol_element.setAttribute('desc', 'Molecule container') 630 mol_element.setAttribute('name', str(self.mol_name)) 631 632 # Add all simple python objects within the MolContainer to the XML element. 633 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))

638 """List type data container for holding the different molecules of one model.""" 639

640 - def __repr__(self):

641 """The string representation of the object. 642 643 Rather than using the standard Python conventions (either the string representation of the 644 value or the "<...desc...>" notation), a rich-formatted description of the object is given. 645 """ 646 647 text = "Molecules.\n\n" 648 text = text + "%-8s%-8s" % ("Index", "Name") + "\n" 649 for i in range(len(self)): 650 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n" 651 return text

652 653

654 - def add_item(self, mol_name=None, mol_cont=None):

655 """Append the given MolContainer instance to the MolList. 656 657 @keyword mol_name: The molecule number. 658 @type mol_name: int 659 @keyword mol_cont: The data structure for the molecule. 660 @type mol_cont: MolContainer instance 661 @return: The new molecule container. 662 @rtype: MolContainer instance 663 """ 664 665 # If no molecule data exists, replace the empty first molecule with this molecule (just a renaming). 666 if len(self) and self.is_empty(): 667 self[0].mol_name = mol_name 668 669 # Otherwise append an empty MolContainer. 670 else: 671 # Test if the molecule already exists. 672 for i in range(len(self)): 673 if self[i].mol_name == mol_name: 674 raise RelaxError("The molecule '%s' already exists." % mol_name) 675 676 # Append an empty MolContainer. 677 self.append(mol_cont) 678 679 # Set the name. 680 self[-1].mol_name = mol_name 681 682 # Return the container. 683 return self[-1]

684 685

686 - def is_empty(self):

687 """Method for testing if this MolList object is empty. 688 689 @return: True if this list only has one MolContainer and the molecule name has not 690 been set, False otherwise. 691 @rtype: bool 692 """ 693 694 # No MolContainers. 695 if len(self) == 0: 696 return True 697 698 # There is only one MolContainer and it is empty. 699 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty(): 700 return True 701 702 # Otherwise. 703 return False

704 705

706 - def from_xml(self, mol_nodes, file_version=1):

707 """Recreate a molecule list data structure from the XML molecule nodes. 708 709 @param mol_nodes: The molecule XML nodes. 710 @type mol_nodes: xml.dom.minicompat.NodeList instance 711 @keyword file_version: The relax XML version of the XML file. 712 @type file_version: int 713 """ 714 715 # Test if empty. 716 if not self.is_empty(): 717 raise RelaxFromXMLNotEmptyError(self.__class__.__name__) 718 719 # Loop over the molecules. 720 for mol_node in mol_nodes: 721 # Initialise a MolContainer instance. 722 mol_cont = MolContainer() 723 724 # Get the molecule name. 725 name = mol_node.getAttribute('name') 726 if name == 'None': 727 name = None 728 729 # Add the molecule to the MolList structure. 730 self.add_item(mol_name=name, mol_cont=mol_cont) 731 732 # Execute the specific MolContainer from_xml() method. 733 self[-1].from_xml(mol_node, file_version=file_version)

734 735

736 - def merge_item(self, mol_name=None, mol_cont=None):

737 """Mege the given MolContainer instance into a pre-existing molecule container. 738 739 @keyword mol_name: The molecule number. 740 @type mol_name: int 741 @keyword mol_cont: The data structure for the molecule. 742 @type mol_cont: MolContainer instance 743 @return: The new molecule container. 744 @rtype: MolContainer instance 745 """ 746 747 # Find the molecule to merge. 748 index = None 749 for i in range(len(self)): 750 if self[i].mol_name == mol_name: 751 index = i 752 break 753 754 # No molecule found. 755 if index == None: 756 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name) 757 758 # Merge the molecules. 759 self[index].merge(mol_cont) 760 761 # Return the container. 762 return self[index]

763 764

765 - def to_xml(self, doc, element):

766 """Create XML elements for each molecule. 767 768 @param doc: The XML document object. 769 @type doc: xml.dom.minidom.Document instance 770 @param element: The element to add the molecule XML elements to. 771 @type element: XML element object 772 """ 773 774 # Loop over the molecules. 775 for i in range(len(self)): 776 # Add the molecule data. 777 self[i].to_xml(doc, element)

Source Code for Module lib.structure.internal.molecules