lib.structure.internal.molecules

39 """The container for the molecular information. 40 41 The structural data object for this class is a container possessing a number of different arrays 42 corresponding to different structural information. These objects include: 43 44 - atom_num: The atom name. 45 - atom_name: The atom name. 46 - bonded: Each element an array of bonded atom indices. 47 - chain_id: The chain ID. 48 - element: The element symbol. 49 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 50 - res_name: The residue name. 51 - res_num: The residue number. 52 - seg_id: The segment ID. 53 - x: The x coordinate of the atom. 54 - y: The y coordinate of the atom. 55 - z: The z coordinate of the atom. 56 57 All arrays should be of equal length so that an atom index can retrieve all the corresponding 58 data. Only the atom identification string is compulsory, all other arrays can contain None. 59 """ 60 61

62 - def __init__(self):

63 """Initialise the molecular container.""" 64 65 # The atom num (array of int). 66 self.atom_num = [] 67 68 # The atom name (array of str). 69 self.atom_name = [] 70 71 # The bonded atom indices (array of arrays of int). 72 self.bonded = [] 73 74 # The chain ID (array of str). 75 self.chain_id = [] 76 77 # The element symbol (array of str). 78 self.element = [] 79 80 # The optional PDB record name (array of str). 81 self.pdb_record = [] 82 83 # The residue name (array of str). 84 self.res_name = [] 85 86 # The residue number (array of int). 87 self.res_num = [] 88 89 # The segment ID (array of int). 90 self.seg_id = [] 91 92 # The x coordinate (array of float). 93 self.x = [] 94 95 # The y coordinate (array of float). 96 self.y = [] 97 98 # The z coordinate (array of float). 99 self.z = []

100 101

102 - def _atom_index(self, atom_num):

103 """Find the atom index corresponding to the given atom number. 104 105 @param atom_num: The atom number to find the index of. 106 @type atom_num: int 107 @return: The atom index corresponding to the atom. 108 @rtype: int 109 """ 110 111 # Loop over the atoms. 112 for j in range(len(self.atom_num)): 113 # Return the index. 114 if self.atom_num[j] == atom_num: 115 return j 116 117 # Should not be here, the PDB connect records are incorrect. 118 warn(RelaxWarning("The atom number " + repr(atom_num) + " from the CONECT record cannot be found within the ATOM and HETATM records."))

119 120

121 - def _det_pdb_element(self, atom_name):

122 """Try to determine the element from the PDB atom name. 123 124 @param atom_name: The PDB atom name. 125 @type atom_name: str 126 @return: The element name, or None if unsuccessful. 127 @rtype: str or None 128 """ 129 130 # Strip away the "'" character (for RNA, etc.). 131 element = atom_name.strip("'") 132 133 # Strip away atom numbering, from the front and end. 134 element = element.strip(digits) 135 136 # Amino acid atom translation table (note, numbers have been stripped already!). 137 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 138 'N': ['ND', 'NE', 'NH', 'NZ'], 139 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 140 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 141 'S': ['SD', 'SG'] 142 } 143 144 # Translate amino acids. 145 for key in list(table.keys()): 146 if element in table[key]: 147 element = key 148 break 149 150 # Allowed element list. 151 elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S'] 152 153 # Return the element, if in the list. 154 if element in elements: 155 return element 156 157 # Else, throw a warning. 158 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))

159 160

161 - def _parse_gaussian_record(self, record):

162 """Parse the Gaussian log record string and return an array of the corresponding atomic information. 163 164 The format of the Gaussian log records is:: 165 __________________________________________________________________________________________ 166 | | | | 167 | Columns | Data type | Description | 168 |_________|______________|_______________________________________________________________| 169 | | | | 170 | 1 | int | "Center Number" - the sequential atom number. | 171 | 2 | int | "Atomic Number" - the atomic number. | 172 | 3 | int | "Atomic Type" - the atomic type? | 173 | 4 | float | X coordinate in Angstrom | 174 | 5 | float | Y coordinate in Angstrom | 175 | 6 | float | Z coordinate in Angstrom | 176 |_________|______________|_______________________________________________________________| 177 178 179 @param record: The single line Gaussian record. 180 @type record: str 181 @return: The list of atomic information 182 @rtype: list of str 183 """ 184 185 # Skip the header. 186 if search("---------", record): 187 return None 188 if search("Center", record): 189 return None 190 if search("Number", record): 191 return None 192 193 # Initialise. 194 word = record.split() 195 196 # Proper records. 197 if len(word) == 6: 198 # Convert strings to numbers. 199 atom_number = int(word[0]) 200 atomic_num = int(word[1]) 201 x = float(word[3]) 202 y = float(word[4]) 203 z = float(word[5]) 204 205 # Return the atomic info. 206 return atom_number, atomic_num, x, y, z

207 208

209 - def _parse_xyz_record(self, record):

210 """Parse the XYZ record string and return an array of the corresponding atomic information. 211 212 The format of the XYZ records is:: 213 __________________________________________________________________________________________ 214 | | | | | 215 | Columns | Data type | Field | Definition | 216 |_________|______________|______________|________________________________________________| 217 | | | | | 218 | 1 | String | element | | 219 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 220 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 221 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 222 |_________|______________|______________|________________________________________________| 223 224 225 @param record: The single line PDB record. 226 @type record: str 227 @return: The list of atomic information 228 @rtype: list of str 229 """ 230 231 # Initialise. 232 fields = [] 233 word = record.split() 234 235 # ATOM and HETATM records. 236 if len(word)==4: 237 # Split up the record. 238 fields.append(word[0]) 239 fields.append(word[1]) 240 fields.append(word[2]) 241 fields.append(word[3]) 242 243 # Loop over the fields. 244 for i in range(len(fields)): 245 # Strip all whitespace. 246 fields[i] = fields[i].strip() 247 248 # Replace nothingness with None. 249 if fields[i] == '': 250 fields[i] = None 251 252 # Convert strings to numbers. 253 if fields[1]: 254 fields[1] = float(fields[1]) 255 if fields[2]: 256 fields[2] = float(fields[2]) 257 if fields[3]: 258 fields[3] = float(fields[3]) 259 260 # Return the atomic info. 261 return fields

262 263

264 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

265 """Method for adding an atom to the structural data object. 266 267 This method will create the key-value pair for the given atom. 268 269 270 @keyword atom_name: The atom name, e.g. 'H1'. 271 @type atom_name: str or None 272 @keyword res_name: The residue name. 273 @type res_name: str or None 274 @keyword res_num: The residue number. 275 @type res_num: int or None 276 @keyword pos: The position vector of coordinates. 277 @type pos: list (length = 3) 278 @keyword element: The element symbol. 279 @type element: str or None 280 @keyword atom_num: The atom number. 281 @type atom_num: int or None 282 @keyword chain_id: The chain identifier. 283 @type chain_id: str or None 284 @keyword segment_id: The segment identifier. 285 @type segment_id: str or None 286 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 287 @type pdb_record: str or None 288 @return: The index of the added atom. 289 @rtype: int 290 """ 291 292 # Append to all the arrays. 293 self.atom_num.append(atom_num) 294 self.atom_name.append(atom_name) 295 self.bonded.append([]) 296 self.chain_id.append(chain_id) 297 self.element.append(element) 298 self.pdb_record.append(pdb_record) 299 self.res_name.append(res_name) 300 self.res_num.append(res_num) 301 self.seg_id.append(segment_id) 302 self.x.append(pos[0]) 303 self.y.append(pos[1]) 304 self.z.append(pos[2]) 305 306 # Return the index. 307 return len(self.atom_num) - 1

308 309

310 - def atom_connect(self, index1=None, index2=None):

311 """Method for connecting two atoms within the data structure object. 312 313 This method will append index2 to the array at bonded[index1] and vice versa. 314 315 316 @keyword index1: The index of the first atom. 317 @type index1: int 318 @keyword index2: The index of the second atom. 319 @type index2: int 320 """ 321 322 # Update the bonded array structure, if necessary. 323 if index2 not in self.bonded[index1]: 324 self.bonded[index1].append(index2) 325 if index1 not in self.bonded[index2]: 326 self.bonded[index2].append(index1)

327 328

329 - def fill_object_from_gaussian(self, records):

330 """Method for generating a complete Structure_container object from the given Gaussian log records. 331 332 @param records: A list of structural Gaussian log records. 333 @type records: list of str 334 """ 335 336 # Loop over the records. 337 for record in records: 338 # Parse the record. 339 data = self._parse_gaussian_record(record) 340 341 # Nothing to do. 342 if data == None: 343 continue 344 345 # Unpack. 346 atom_number, atomic_num, x, y, z = data 347 348 # Translate the atomic number to the atom name. 349 atom_name = periodic_table.lookup_z_to_symbol(atomic_num) 350 351 # Add. 352 self.atom_add(atom_name=atom_name, atom_num=atom_number, pos=[x, y, z], element=atom_name)

353 354

355 - def fill_object_from_pdb(self, records, alt_loc_select=None):

356 """Method for generating a complete Structure_container object from the given PDB records. 357 358 @param records: A list of structural PDB records. 359 @type records: list of str 360 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 361 @type alt_loc_select: str or None 362 """ 363 364 # Loop over the records. 365 for record in records: 366 # Nothing to do. 367 if not record or record == '\n': 368 continue 369 370 # Add the atom. 371 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 372 # Parse the record. 373 if record[:4] == 'ATOM': 374 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 375 if record[:6] == 'HETATM': 376 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 377 378 # Handle the alternate locations. 379 if alt_loc != None: 380 # Don't know what to do. 381 if alt_loc_select == None: 382 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 383 384 # Skip non-matching locations. 385 if alt_loc != alt_loc_select: 386 continue 387 388 # Attempt at determining the element, if missing. 389 if not element: 390 element = self._det_pdb_element(name) 391 392 # Add. 393 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, pos=[x, y, z], element=element) 394 395 # Connect atoms. 396 if record[:6] == 'CONECT': 397 # Parse the record. 398 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 399 400 # Loop over the atoms of the record. 401 for bonded in [bonded1, bonded2, bonded3, bonded4]: 402 # Skip if there is no record. 403 if not bonded: 404 continue 405 406 # Skip broken CONECT records (for when the record points to a non-existent atom). 407 if self._atom_index(serial) == None or self._atom_index(bonded) == None: 408 continue 409 410 # Make the connection. 411 self.atom_connect(index1=self._atom_index(serial), index2=self._atom_index(bonded))

412 413

414 - def fill_object_from_xyz(self, records):

415 """Method for generating a complete Structure_container object from the given xyz records. 416 417 @param records: A list of structural xyz records. 418 @type records: list of str 419 """ 420 421 # initialisation for atom number 422 atom_number = 1 423 424 # Loop over the records. 425 for record in records: 426 # Parse the record. 427 record = self._parse_xyz_record(record) 428 429 # Nothing to do. 430 if not record: 431 continue 432 433 # Add the atom. 434 if len(record) == 4: 435 # Add. 436 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 437 438 # Increment of atom number 439 atom_number = atom_number + 1

440 441

442 - def from_xml(self, mol_node, file_version=1):

443 """Recreate the MolContainer from the XML molecule node. 444 445 @param mol_node: The molecule XML node. 446 @type mol_node: xml.dom.minicompat.NodeList instance 447 @keyword file_version: The relax XML version of the XML file. 448 @type file_version: int 449 """ 450 451 # Recreate the current molecule container. 452 xml_to_object(mol_node, self, file_version=file_version)

453 454

455 - def is_empty(self):

456 """Check if the container is empty.""" 457 458 # Set attributes. 459 if hasattr(self, 'mol_name'): return False 460 if hasattr(self, 'file_name'): return False 461 if hasattr(self, 'file_path'): return False 462 if hasattr(self, 'file_mol_num'): return False 463 if hasattr(self, 'file_model'): return False 464 465 # Internal data structures. 466 if not self.atom_num == []: return False 467 if not self.atom_name == []: return False 468 if not self.bonded == []: return False 469 if not self.chain_id == []: return False 470 if not self.element == []: return False 471 if not self.pdb_record == []: return False 472 if not self.res_name == []: return False 473 if not self.res_num == []: return False 474 if not self.seg_id == []: return False 475 if not self.x == []: return False 476 if not self.y == []: return False 477 if not self.z == []: return False 478 479 # Ok, now this thing must be empty. 480 return True

481 482

483 - def last_residue(self):

484 """Return the number of the last residue. 485 486 @return: The last residue number. 487 @rtype: int 488 """ 489 490 # Return the number. 491 return self.res_num[-1]

492 493

494 - def merge(self, mol_cont=None):

495 """Merge the contents of the given molecule container into here. 496 497 @keyword mol_cont: The data structure for the molecule to merge. 498 @type mol_cont: MolContainer instance 499 """ 500 501 # The current index. 502 curr_index = len(self.atom_num) 503 504 # Loop over all data. 505 for i in range(len(mol_cont.atom_num)): 506 # Add the atom. 507 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 508 509 # Connect the atoms. 510 for j in range(len(mol_cont.bonded[i])): 511 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)

512 513

514 - def to_xml(self, doc, element):

515 """Create XML elements for the contents of this molecule container. 516 517 @param doc: The XML document object. 518 @type doc: xml.dom.minidom.Document instance 519 @param element: The element to add the molecule XML elements to. 520 @type element: XML element object 521 """ 522 523 # Create an XML element for this molecule and add it to the higher level element. 524 mol_element = doc.createElement('mol_cont') 525 element.appendChild(mol_element) 526 527 # Set the molecule attributes. 528 mol_element.setAttribute('desc', 'Molecule container') 529 mol_element.setAttribute('name', str(self.mol_name)) 530 531 # Add all simple python objects within the MolContainer to the XML element. 532 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))

537 """List type data container for holding the different molecules of one model.""" 538

539 - def __repr__(self):

540 """The string representation of the object. 541 542 Rather than using the standard Python conventions (either the string representation of the 543 value or the "<...desc...>" notation), a rich-formatted description of the object is given. 544 """ 545 546 text = "Molecules.\n\n" 547 text = text + "%-8s%-8s" % ("Index", "Name") + "\n" 548 for i in range(len(self)): 549 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n" 550 return text

551 552

553 - def add_item(self, mol_name=None, mol_cont=None):

554 """Append the given MolContainer instance to the MolList. 555 556 @keyword mol_name: The molecule number. 557 @type mol_name: int 558 @keyword mol_cont: The data structure for the molecule. 559 @type mol_cont: MolContainer instance 560 @return: The new molecule container. 561 @rtype: MolContainer instance 562 """ 563 564 # If no molecule data exists, replace the empty first molecule with this molecule (just a renaming). 565 if len(self) and self.is_empty(): 566 self[0].mol_name = mol_name 567 568 # Otherwise append an empty MolContainer. 569 else: 570 # Test if the molecule already exists. 571 for i in range(len(self)): 572 if self[i].mol_name == mol_name: 573 raise RelaxError("The molecule '%s' already exists." % mol_name) 574 575 # Append an empty MolContainer. 576 self.append(mol_cont) 577 578 # Set the name. 579 self[-1].mol_name = mol_name 580 581 # Return the container. 582 return self[-1]

583 584

585 - def is_empty(self):

586 """Method for testing if this MolList object is empty. 587 588 @return: True if this list only has one MolContainer and the molecule name has not 589 been set, False otherwise. 590 @rtype: bool 591 """ 592 593 # No MolContainers. 594 if len(self) == 0: 595 return True 596 597 # There is only one MolContainer and it is empty. 598 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty(): 599 return True 600 601 # Otherwise. 602 return False

603 604

605 - def from_xml(self, mol_nodes, file_version=1):

606 """Recreate a molecule list data structure from the XML molecule nodes. 607 608 @param mol_nodes: The molecule XML nodes. 609 @type mol_nodes: xml.dom.minicompat.NodeList instance 610 @keyword file_version: The relax XML version of the XML file. 611 @type file_version: int 612 """ 613 614 # Test if empty. 615 if not self.is_empty(): 616 raise RelaxFromXMLNotEmptyError(self.__class__.__name__) 617 618 # Loop over the molecules. 619 for mol_node in mol_nodes: 620 # Initialise a MolContainer instance. 621 mol_cont = MolContainer() 622 623 # Get the molecule name. 624 name = mol_node.getAttribute('name') 625 if name == 'None': 626 name = None 627 628 # Add the molecule to the MolList structure. 629 self.add_item(mol_name=name, mol_cont=mol_cont) 630 631 # Execute the specific MolContainer from_xml() method. 632 self[-1].from_xml(mol_node, file_version=file_version)

633 634

635 - def merge_item(self, mol_name=None, mol_cont=None):

636 """Mege the given MolContainer instance into a pre-existing molecule container. 637 638 @keyword mol_name: The molecule number. 639 @type mol_name: int 640 @keyword mol_cont: The data structure for the molecule. 641 @type mol_cont: MolContainer instance 642 @return: The new molecule container. 643 @rtype: MolContainer instance 644 """ 645 646 # Find the molecule to merge. 647 index = None 648 for i in range(len(self)): 649 if self[i].mol_name == mol_name: 650 index = i 651 break 652 653 # No molecule found. 654 if index == None: 655 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name) 656 657 # Merge the molecules. 658 self[index].merge(mol_cont) 659 660 # Return the container. 661 return self[index]

662 663

664 - def to_xml(self, doc, element):

665 """Create XML elements for each molecule. 666 667 @param doc: The XML document object. 668 @type doc: xml.dom.minidom.Document instance 669 @param element: The element to add the molecule XML elements to. 670 @type element: XML element object 671 """ 672 673 # Loop over the molecules. 674 for i in range(len(self)): 675 # Add the molecule data. 676 self[i].to_xml(doc, element)

Source Code for Module lib.structure.internal.molecules