lib.structure.internal.molecules

39 """The container for the molecular information. 40 41 The structural data object for this class is a container possessing a number of different arrays 42 corresponding to different structural information. These objects include: 43 44 - atom_num: The atom name. 45 - atom_name: The atom name. 46 - bonded: Each element an array of bonded atom indices. 47 - chain_id: The chain ID. 48 - element: The element symbol. 49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 50 - res_name: The residue name. 51 - res_num: The residue number. 52 - seg_id: The segment ID. 53 - x: The x coordinate of the atom. 54 - y: The y coordinate of the atom. 55 - z: The z coordinate of the atom. 56 57 All arrays should be of equal length so that an atom index can retrieve all the corresponding 58 data. Only the atom identification string is compulsory, all other arrays can contain None. 59 """ 60 61

62 - def __init__(self):

63 """Initialise the molecular container.""" 64 65 # The atom num (array of int). 66 self.atom_num = [] 67 68 # The atom name (array of str). 69 self.atom_name = [] 70 71 # The bonded atom indices (array of arrays of int). 72 self.bonded = [] 73 74 # The chain ID (array of str). 75 self.chain_id = [] 76 77 # The element symbol (array of str). 78 self.element = [] 79 80 # The optional PDB record name (array of str). 81 self.pdb_record = [] 82 83 # The residue name (array of str). 84 self.res_name = [] 85 86 # The residue number (array of int). 87 self.res_num = [] 88 89 # The segment ID (array of int). 90 self.seg_id = [] 91 92 # The x coordinate (array of float). 93 self.x = [] 94 95 # The y coordinate (array of float). 96 self.y = [] 97 98 # The z coordinate (array of float). 99 self.z = []

100 101

102 - def _atom_index(self, atom_num):

103 """Find the atom index corresponding to the given atom number. 104 105 @param atom_num: The atom number to find the index of. 106 @type atom_num: int 107 @return: The atom index corresponding to the atom. 108 @rtype: int 109 """ 110 111 # Loop over the atoms. 112 for j in range(len(self.atom_num)): 113 # Return the index. 114 if self.atom_num[j] == atom_num: 115 return j

116 117

118 - def _det_pdb_element(self, atom_name):

119 """Try to determine the element from the PDB atom name. 120 121 @param atom_name: The PDB atom name. 122 @type atom_name: str 123 @return: The element name, or None if unsuccessful. 124 @rtype: str or None 125 """ 126 127 # Strip away the "'" character (for RNA, etc.). 128 element = atom_name.strip("'") 129 130 # Strip away atom numbering, from the front and end. 131 element = element.strip(digits) 132 133 # Amino acid atom translation table (note, numbers have been stripped already!). 134 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 135 'N': ['ND', 'NE', 'NH', 'NZ'], 136 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 137 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 138 'S': ['SD', 'SG'] 139 } 140 141 # Translate amino acids. 142 for key in table: 143 if element in table[key]: 144 element = key 145 break 146 147 # Return the element if it is in the periodic table. 148 if periodic_table.has_element(symbol=element): 149 return element 150 151 # Else, throw a warning. 152 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))

153 154

155 - def _parse_gaussian_record(self, record):

156 """Parse the Gaussian log record string and return an array of the corresponding atomic information. 157 158 The format of the Gaussian log records is:: 159 __________________________________________________________________________________________ 160 | | | | 161 | Columns | Data type | Description | 162 |_________|______________|_______________________________________________________________| 163 | | | | 164 | 1 | int | "Center Number" - the sequential atom number. | 165 | 2 | int | "Atomic Number" - the atomic number. | 166 | 3 | int | "Atomic Type" - the atomic type? | 167 | 4 | float | X coordinate in Angstrom | 168 | 5 | float | Y coordinate in Angstrom | 169 | 6 | float | Z coordinate in Angstrom | 170 |_________|______________|_______________________________________________________________| 171 172 173 @param record: The single line Gaussian record. 174 @type record: str 175 @return: The list of atomic information 176 @rtype: list of str 177 """ 178 179 # Skip the header. 180 if search("---------", record): 181 return None 182 if search("Center", record): 183 return None 184 if search("Number", record): 185 return None 186 187 # Initialise. 188 word = record.split() 189 190 # Proper records. 191 if len(word) == 6: 192 # Convert strings to numbers. 193 atom_number = int(word[0]) 194 atomic_num = int(word[1]) 195 x = float(word[3]) 196 y = float(word[4]) 197 z = float(word[5]) 198 199 # Return the atomic info. 200 return atom_number, atomic_num, x, y, z

201 202

203 - def _parse_xyz_record(self, record):

204 """Parse the XYZ record string and return an array of the corresponding atomic information. 205 206 The format of the XYZ records is:: 207 __________________________________________________________________________________________ 208 | | | | | 209 | Columns | Data type | Field | Definition | 210 |_________|______________|______________|________________________________________________| 211 | | | | | 212 | 1 | String | element | | 213 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 214 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 215 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 216 |_________|______________|______________|________________________________________________| 217 218 219 @param record: The single line PDB record. 220 @type record: str 221 @return: The list of atomic information 222 @rtype: list of str 223 """ 224 225 # Initialise. 226 fields = [] 227 word = record.split() 228 229 # ATOM and HETATM records. 230 if len(word)==4: 231 # Split up the record. 232 fields.append(word[0]) 233 fields.append(word[1]) 234 fields.append(word[2]) 235 fields.append(word[3]) 236 237 # Loop over the fields. 238 for i in range(len(fields)): 239 # Strip all whitespace. 240 fields[i] = fields[i].strip() 241 242 # Replace nothingness with None. 243 if fields[i] == '': 244 fields[i] = None 245 246 # Convert strings to numbers. 247 if fields[1]: 248 fields[1] = float(fields[1]) 249 if fields[2]: 250 fields[2] = float(fields[2]) 251 if fields[3]: 252 fields[3] = float(fields[3]) 253 254 # Return the atomic info. 255 return fields

256 257

258 - def _sort(self):

259 """Sort all structural data.""" 260 261 # Create an index list for sorting the structural data. 262 indices = list(range(len(self.atom_name))) 263 indices.sort(key=self._sort_key) 264 265 # Sort all lists. 266 self.atom_num = [self.atom_num[i] for i in indices] 267 self.atom_name = [self.atom_name[i] for i in indices] 268 self.bonded = [self.bonded[i] for i in indices] 269 self.chain_id = [self.chain_id[i] for i in indices] 270 self.element = [self.element[i] for i in indices] 271 self.pdb_record = [self.pdb_record[i] for i in indices] 272 self.res_name = [self.res_name[i] for i in indices] 273 self.res_num = [self.res_num[i] for i in indices] 274 self.seg_id = [self.seg_id[i] for i in indices] 275 self.x = [self.x[i] for i in indices] 276 self.y = [self.y[i] for i in indices] 277 self.z = [self.z[i] for i in indices] 278 279 # Change the bonded numbers, as the indices are now different. 280 for i in range(len(self.bonded)): 281 for j in range(len(self.bonded[i])): 282 self.bonded[i][j] = indices.index(self.bonded[i][j])

283 284

285 - def _sort_key(self, i):

286 """Return the information for sorting the sequence data.""" 287 288 # Python 3 - return 0 instead of None. 289 if self.res_num[i] == None: 290 return 0 291 292 # Sort based on residue number. 293 return self.res_num[i]

294 295

296 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

297 """Method for adding an atom to the structural data object. 298 299 This method will create the key-value pair for the given atom. 300 301 302 @keyword atom_name: The atom name, e.g. 'H1'. 303 @type atom_name: str or None 304 @keyword res_name: The residue name. 305 @type res_name: str or None 306 @keyword res_num: The residue number. 307 @type res_num: int or None 308 @keyword pos: The position vector of coordinates. 309 @type pos: list (length = 3) 310 @keyword element: The element symbol. 311 @type element: str or None 312 @keyword atom_num: The atom number. 313 @type atom_num: int or None 314 @keyword chain_id: The chain identifier. 315 @type chain_id: str or None 316 @keyword segment_id: The segment identifier. 317 @type segment_id: str or None 318 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 319 @type pdb_record: str or None 320 @return: The index of the added atom. 321 @rtype: int 322 """ 323 324 # Append to all the arrays. 325 self.atom_num.append(atom_num) 326 self.atom_name.append(atom_name) 327 self.bonded.append([]) 328 self.chain_id.append(chain_id) 329 self.element.append(element) 330 self.pdb_record.append(pdb_record) 331 self.res_name.append(res_name) 332 self.res_num.append(res_num) 333 self.seg_id.append(segment_id) 334 self.x.append(pos[0]) 335 self.y.append(pos[1]) 336 self.z.append(pos[2]) 337 338 # Return the index. 339 return len(self.atom_num) - 1

340 341

342 - def atom_connect(self, index1=None, index2=None):

343 """Method for connecting two atoms within the data structure object. 344 345 This method will append index2 to the array at bonded[index1] and vice versa. 346 347 348 @keyword index1: The index of the first atom. 349 @type index1: int 350 @keyword index2: The index of the second atom. 351 @type index2: int 352 """ 353 354 # Update the bonded array structure, if necessary. 355 if index2 not in self.bonded[index1]: 356 self.bonded[index1].append(index2) 357 if index1 not in self.bonded[index2]: 358 self.bonded[index2].append(index1)

359 360

361 - def fill_object_from_gaussian(self, records):

362 """Method for generating a complete Structure_container object from the given Gaussian log records. 363 364 @param records: A list of structural Gaussian log records. 365 @type records: list of str 366 """ 367 368 # Loop over the records. 369 for record in records: 370 # Parse the record. 371 data = self._parse_gaussian_record(record) 372 373 # Nothing to do. 374 if data == None: 375 continue 376 377 # Unpack. 378 atom_number, atomic_num, x, y, z = data 379 380 # Translate the atomic number to the atom name. 381 atom_name = periodic_table.lookup_symbol(atomic_num) 382 383 # Add. 384 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)

385 386

387 - def fill_object_from_pdb(self, records, alt_loc_select=None):

388 """Method for generating a complete Structure_container object from the given PDB records. 389 390 @param records: A list of structural PDB records. 391 @type records: list of str 392 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 393 @type alt_loc_select: str or None 394 """ 395 396 # Loop over the records. 397 water = [] 398 missing_connect = [] 399 for record in records: 400 # Nothing to do. 401 if not record or record == '\n': 402 continue 403 404 # Add the atom. 405 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 406 # Parse the record. 407 if record[:4] == 'ATOM': 408 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 409 if record[:6] == 'HETATM': 410 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 411 412 # Skip waters. 413 if res_name == 'HOH': 414 water.append(res_seq) 415 continue 416 417 # Handle the alternate locations. 418 if alt_loc != None: 419 # Don't know what to do. 420 if alt_loc_select == None: 421 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 422 423 # Skip non-matching locations. 424 if alt_loc != alt_loc_select: 425 continue 426 427 # Attempt at determining the element, if missing. 428 if not element: 429 element = self._det_pdb_element(name) 430 431 # Add. 432 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, pos=[x, y, z], element=element) 433 434 # Connect atoms. 435 if record[:6] == 'CONECT': 436 # Parse the record. 437 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 438 439 # Loop over the atoms of the record. 440 for bonded in [bonded1, bonded2, bonded3, bonded4]: 441 # Skip if there is no record. 442 if not bonded: 443 continue 444 445 # The atom indices. 446 serial_index = self._atom_index(serial) 447 bonded_index = self._atom_index(bonded) 448 449 # Skip broken CONECT records (for when the record points to a non-existent atom). 450 if serial_index == None: 451 if serial not in missing_connect: 452 missing_connect.append(serial) 453 continue 454 if bonded_index == None: 455 if bonded not in missing_connect: 456 missing_connect.append(bonded) 457 continue 458 459 # Make the connection. 460 self.atom_connect(index1=serial_index, index2=bonded_index) 461 462 # Warnings. 463 if len(missing_connect): 464 missing_connect.sort() 465 warn(RelaxWarning("The following atom numbers from the CONECT records cannot be found within the ATOM and HETATM records: %s." % missing_connect)) 466 if len(water): 467 warn(RelaxWarning("Skipping the water molecules HOH %s." % water))

468 469

470 - def fill_object_from_xyz(self, records):

471 """Method for generating a complete Structure_container object from the given xyz records. 472 473 @param records: A list of structural xyz records. 474 @type records: list of str 475 """ 476 477 # initialisation for atom number 478 atom_number = 1 479 480 # Loop over the records. 481 for record in records: 482 # Parse the record. 483 record = self._parse_xyz_record(record) 484 485 # Nothing to do. 486 if not record: 487 continue 488 489 # Add the atom. 490 if len(record) == 4: 491 # Add. 492 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 493 494 # Increment of atom number 495 atom_number = atom_number + 1

496 497

498 - def from_xml(self, mol_node, file_version=1):

499 """Recreate the MolContainer from the XML molecule node. 500 501 @param mol_node: The molecule XML node. 502 @type mol_node: xml.dom.minicompat.NodeList instance 503 @keyword file_version: The relax XML version of the XML file. 504 @type file_version: int 505 """ 506 507 # Recreate the current molecule container. 508 xml_to_object(mol_node, self, file_version=file_version)

509 510

511 - def is_empty(self):

512 """Check if the container is empty.""" 513 514 # Set attributes. 515 if hasattr(self, 'file_name'): return False 516 if hasattr(self, 'file_path'): return False 517 if hasattr(self, 'file_mol_num'): return False 518 if hasattr(self, 'file_model'): return False 519 520 # Internal data structures. 521 if not self.atom_num == []: return False 522 if not self.atom_name == []: return False 523 if not self.bonded == []: return False 524 if not self.chain_id == []: return False 525 if not self.element == []: return False 526 if not self.pdb_record == []: return False 527 if not self.res_name == []: return False 528 if not self.res_num == []: return False 529 if not self.seg_id == []: return False 530 if not self.x == []: return False 531 if not self.y == []: return False 532 if not self.z == []: return False 533 534 # Ok, now this thing must be empty. 535 return True

536 537

538 - def last_residue(self):

539 """Return the number of the last residue. 540 541 @return: The last residue number. 542 @rtype: int 543 """ 544 545 # No residues yet. 546 if not len(self.res_num): 547 return 0 548 549 # Return the number. 550 return self.res_num[-1]

551 552

553 - def loop_residues(self):

554 """Generator method for looping over the individual residues of the molecule. 555 556 @return: The residue name and number. 557 @rtype: str, int 558 """ 559 560 # Loop over the atoms. 561 last_res = None 562 for i in range(len(self.atom_num)): 563 # A new residue. 564 if self.res_num[i] != last_res: 565 last_res = self.res_num[i] 566 yield self.res_name[i], self.res_num[i]

567 568

569 - def merge(self, mol_cont=None):

570 """Merge the contents of the given molecule container into here. 571 572 @keyword mol_cont: The data structure for the molecule to merge. 573 @type mol_cont: MolContainer instance 574 """ 575 576 # The current index. 577 curr_index = len(self.atom_num) 578 579 # Loop over all data. 580 for i in range(len(mol_cont.atom_num)): 581 # Add the atom. 582 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 583 584 # Connect the atoms. 585 for j in range(len(mol_cont.bonded[i])): 586 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)

587 588

589 - def to_xml(self, doc, element):

590 """Create XML elements for the contents of this molecule container. 591 592 @param doc: The XML document object. 593 @type doc: xml.dom.minidom.Document instance 594 @param element: The element to add the molecule XML elements to. 595 @type element: XML element object 596 """ 597 598 # Create an XML element for this molecule and add it to the higher level element. 599 mol_element = doc.createElement('mol_cont') 600 element.appendChild(mol_element) 601 602 # Set the molecule attributes. 603 mol_element.setAttribute('desc', 'Molecule container') 604 mol_element.setAttribute('name', str(self.mol_name)) 605 606 # Add all simple python objects within the MolContainer to the XML element. 607 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))

612 """List type data container for holding the different molecules of one model.""" 613

614 - def __repr__(self):

615 """The string representation of the object. 616 617 Rather than using the standard Python conventions (either the string representation of the 618 value or the "<...desc...>" notation), a rich-formatted description of the object is given. 619 """ 620 621 text = "Molecules.\n\n" 622 text = text + "%-8s%-8s" % ("Index", "Name") + "\n" 623 for i in range(len(self)): 624 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n" 625 return text

626 627

628 - def add_item(self, mol_name=None, mol_cont=None):

629 """Append the given MolContainer instance to the MolList. 630 631 @keyword mol_name: The molecule number. 632 @type mol_name: int 633 @keyword mol_cont: The data structure for the molecule. 634 @type mol_cont: MolContainer instance 635 @return: The new molecule container. 636 @rtype: MolContainer instance 637 """ 638 639 # If no molecule data exists, replace the empty first molecule with this molecule (just a renaming). 640 if len(self) and self.is_empty(): 641 self[0].mol_name = mol_name 642 643 # Otherwise append an empty MolContainer. 644 else: 645 # Test if the molecule already exists. 646 for i in range(len(self)): 647 if self[i].mol_name == mol_name: 648 raise RelaxError("The molecule '%s' already exists." % mol_name) 649 650 # Append an empty MolContainer. 651 self.append(mol_cont) 652 653 # Set the name. 654 self[-1].mol_name = mol_name 655 656 # Return the container. 657 return self[-1]

658 659

660 - def is_empty(self):

661 """Method for testing if this MolList object is empty. 662 663 @return: True if this list only has one MolContainer and the molecule name has not 664 been set, False otherwise. 665 @rtype: bool 666 """ 667 668 # No MolContainers. 669 if len(self) == 0: 670 return True 671 672 # There is only one MolContainer and it is empty. 673 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty(): 674 return True 675 676 # Otherwise. 677 return False

678 679

680 - def from_xml(self, mol_nodes, file_version=1):

681 """Recreate a molecule list data structure from the XML molecule nodes. 682 683 @param mol_nodes: The molecule XML nodes. 684 @type mol_nodes: xml.dom.minicompat.NodeList instance 685 @keyword file_version: The relax XML version of the XML file. 686 @type file_version: int 687 """ 688 689 # Test if empty. 690 if not self.is_empty(): 691 raise RelaxFromXMLNotEmptyError(self.__class__.__name__) 692 693 # Loop over the molecules. 694 for mol_node in mol_nodes: 695 # Initialise a MolContainer instance. 696 mol_cont = MolContainer() 697 698 # Get the molecule name. 699 name = mol_node.getAttribute('name') 700 if name == 'None': 701 name = None 702 703 # Add the molecule to the MolList structure. 704 self.add_item(mol_name=name, mol_cont=mol_cont) 705 706 # Execute the specific MolContainer from_xml() method. 707 self[-1].from_xml(mol_node, file_version=file_version)

708 709

710 - def merge_item(self, mol_name=None, mol_cont=None):

711 """Mege the given MolContainer instance into a pre-existing molecule container. 712 713 @keyword mol_name: The molecule number. 714 @type mol_name: int 715 @keyword mol_cont: The data structure for the molecule. 716 @type mol_cont: MolContainer instance 717 @return: The new molecule container. 718 @rtype: MolContainer instance 719 """ 720 721 # Find the molecule to merge. 722 index = None 723 for i in range(len(self)): 724 if self[i].mol_name == mol_name: 725 index = i 726 break 727 728 # No molecule found. 729 if index == None: 730 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name) 731 732 # Merge the molecules. 733 self[index].merge(mol_cont) 734 735 # Return the container. 736 return self[index]

737 738

739 - def to_xml(self, doc, element):

740 """Create XML elements for each molecule. 741 742 @param doc: The XML document object. 743 @type doc: xml.dom.minidom.Document instance 744 @param element: The element to add the molecule XML elements to. 745 @type element: XML element object 746 """ 747 748 # Loop over the molecules. 749 for i in range(len(self)): 750 # Add the molecule data. 751 self[i].to_xml(doc, element)

Source Code for Module lib.structure.internal.molecules