lib.structure.internal.molecules

37 """The container for the molecular information. 38 39 The structural data object for this class is a container possessing a number of different arrays 40 corresponding to different structural information. These objects include: 41 42 - atom_num: The atom name. 43 - atom_name: The atom name. 44 - bonded: Each element an array of bonded atom indices. 45 - chain_id: The chain ID. 46 - element: The element symbol. 47 - pdb_record: The optional PDB record name (one of ATOM, HETATM, or TER). 48 - res_name: The residue name. 49 - res_num: The residue number. 50 - seg_id: The segment ID. 51 - x: The x coordinate of the atom. 52 - y: The y coordinate of the atom. 53 - z: The z coordinate of the atom. 54 55 All arrays should be of equal length so that an atom index can retrieve all the corresponding 56 data. Only the atom identification string is compulsory, all other arrays can contain None. 57 """ 58 59

60 - def __init__(self):

61 """Initialise the molecular container.""" 62 63 # The atom num (array of int). 64 self.atom_num = [] 65 66 # The atom name (array of str). 67 self.atom_name = [] 68 69 # The bonded atom indices (array of arrays of int). 70 self.bonded = [] 71 72 # The chain ID (array of str). 73 self.chain_id = [] 74 75 # The element symbol (array of str). 76 self.element = [] 77 78 # The optional PDB record name (array of str). 79 self.pdb_record = [] 80 81 # The residue name (array of str). 82 self.res_name = [] 83 84 # The residue number (array of int). 85 self.res_num = [] 86 87 # The segment ID (array of int). 88 self.seg_id = [] 89 90 # The x coordinate (array of float). 91 self.x = [] 92 93 # The y coordinate (array of float). 94 self.y = [] 95 96 # The z coordinate (array of float). 97 self.z = []

98 99

100 - def _atom_index(self, atom_num):

101 """Find the atom index corresponding to the given atom number. 102 103 @param atom_num: The atom number to find the index of. 104 @type atom_num: int 105 @return: The atom index corresponding to the atom. 106 @rtype: int 107 """ 108 109 # Loop over the atoms. 110 for j in range(len(self.atom_num)): 111 # Return the index. 112 if self.atom_num[j] == atom_num: 113 return j 114 115 # Should not be here, the PDB connect records are incorrect. 116 warn(RelaxWarning("The atom number " + repr(atom_num) + " from the CONECT record cannot be found within the ATOM and HETATM records."))

117 118

119 - def _det_pdb_element(self, atom_name):

120 """Try to determine the element from the PDB atom name. 121 122 @param atom_name: The PDB atom name. 123 @type atom_name: str 124 @return: The element name, or None if unsuccessful. 125 @rtype: str or None 126 """ 127 128 # Strip away the "'" character (for RNA, etc.). 129 element = atom_name.strip("'") 130 131 # Strip away atom numbering, from the front and end. 132 element = element.strip(digits) 133 134 # Amino acid atom translation table (note, numbers have been stripped already!). 135 table = {'C': ['CA', 'CB', 'CG', 'CD', 'CE', 'CH', 'CZ'], 136 'N': ['ND', 'NE', 'NH', 'NZ'], 137 'H': ['HA', 'HB', 'HG', 'HD', 'HE', 'HH', 'HT', 'HZ'], 138 'O': ['OG', 'OD', 'OE', 'OH', 'OT'], 139 'S': ['SD', 'SG'] 140 } 141 142 # Translate amino acids. 143 for key in list(table.keys()): 144 if element in table[key]: 145 element = key 146 break 147 148 # Allowed element list. 149 elements = ['H', 'C', 'N', 'O', 'F', 'P', 'S'] 150 151 # Return the element, if in the list. 152 if element in elements: 153 return element 154 155 # Else, throw a warning. 156 warn(RelaxWarning("Cannot determine the element associated with atom '%s'." % atom_name))

157 158

159 - def _parse_xyz_record(self, record):

160 """Parse the XYZ record string and return an array of the corresponding atomic information. 161 162 The format of the XYZ records is:: 163 __________________________________________________________________________________________ 164 | | | | | 165 | Columns | Data type | Field | Definition | 166 |_________|______________|______________|________________________________________________| 167 | | | | | 168 | 1 | String | element | | 169 | 2 | Real | x | Orthogonal coordinates for X in Angstroms | 170 | 3 | Real | y | Orthogonal coordinates for Y in Angstroms | 171 | 4 | Real | z | Orthogonal coordinates for Z in Angstroms | 172 |_________|______________|______________|________________________________________________| 173 174 175 @param record: The single line PDB record. 176 @type record: str 177 @return: The list of atomic information 178 @rtype: list of str 179 """ 180 181 # Initialise. 182 fields = [] 183 word = record.split() 184 185 # ATOM and HETATM records. 186 if len(word)==4: 187 # Split up the record. 188 fields.append(word[0]) 189 fields.append(word[1]) 190 fields.append(word[2]) 191 fields.append(word[3]) 192 193 # Loop over the fields. 194 for i in range(len(fields)): 195 # Strip all whitespace. 196 fields[i] = fields[i].strip() 197 198 # Replace nothingness with None. 199 if fields[i] == '': 200 fields[i] = None 201 202 # Convert strings to numbers. 203 if fields[1]: 204 fields[1] = float(fields[1]) 205 if fields[2]: 206 fields[2] = float(fields[2]) 207 if fields[3]: 208 fields[3] = float(fields[3]) 209 210 # Return the atomic info. 211 return fields

212 213

214 - def atom_add(self, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

215 """Method for adding an atom to the structural data object. 216 217 This method will create the key-value pair for the given atom. 218 219 220 @keyword atom_name: The atom name, e.g. 'H1'. 221 @type atom_name: str or None 222 @keyword res_name: The residue name. 223 @type res_name: str or None 224 @keyword res_num: The residue number. 225 @type res_num: int or None 226 @keyword pos: The position vector of coordinates. 227 @type pos: list (length = 3) 228 @keyword element: The element symbol. 229 @type element: str or None 230 @keyword atom_num: The atom number. 231 @type atom_num: int or None 232 @keyword chain_id: The chain identifier. 233 @type chain_id: str or None 234 @keyword segment_id: The segment identifier. 235 @type segment_id: str or None 236 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 237 @type pdb_record: str or None 238 @return: The index of the added atom. 239 @rtype: int 240 """ 241 242 # Append to all the arrays. 243 self.atom_num.append(atom_num) 244 self.atom_name.append(atom_name) 245 self.bonded.append([]) 246 self.chain_id.append(chain_id) 247 self.element.append(element) 248 self.pdb_record.append(pdb_record) 249 self.res_name.append(res_name) 250 self.res_num.append(res_num) 251 self.seg_id.append(segment_id) 252 self.x.append(pos[0]) 253 self.y.append(pos[1]) 254 self.z.append(pos[2]) 255 256 # Return the index. 257 return len(self.atom_num) - 1

258 259

260 - def atom_connect(self, index1=None, index2=None):

261 """Method for connecting two atoms within the data structure object. 262 263 This method will append index2 to the array at bonded[index1] and vice versa. 264 265 266 @keyword index1: The index of the first atom. 267 @type index1: int 268 @keyword index2: The index of the second atom. 269 @type index2: int 270 """ 271 272 # Update the bonded array structure, if necessary. 273 if index2 not in self.bonded[index1]: 274 self.bonded[index1].append(index2) 275 if index1 not in self.bonded[index2]: 276 self.bonded[index2].append(index1)

277 278

279 - def fill_object_from_pdb(self, records, alt_loc_select=None):

280 """Method for generating a complete Structure_container object from the given PDB records. 281 282 @param records: A list of structural PDB records. 283 @type records: list of str 284 @keyword alt_loc_select: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 285 @type alt_loc_select: str or None 286 """ 287 288 # Loop over the records. 289 for record in records: 290 # Nothing to do. 291 if not record or record == '\n': 292 continue 293 294 # Add the atom. 295 if record[:4] == 'ATOM' or record[:6] == 'HETATM': 296 # Parse the record. 297 if record[:4] == 'ATOM': 298 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.atom(record) 299 if record[:6] == 'HETATM': 300 record_type, serial, name, alt_loc, res_name, chain_id, res_seq, icode, x, y, z, occupancy, temp_factor, element, charge = pdb_read.hetatm(record) 301 302 # Handle the alternate locations. 303 if alt_loc != None: 304 # Don't know what to do. 305 if alt_loc_select == None: 306 raise RelaxError("Multiple alternate location indicators are present in the PDB file, but the desired coordinate set has not been specified.") 307 308 # Skip non-matching locations. 309 if alt_loc != alt_loc_select: 310 continue 311 312 # Attempt at determining the element, if missing. 313 if not element: 314 element = self._det_pdb_element(name) 315 316 # Add. 317 self.atom_add(pdb_record=record_type, atom_num=serial, atom_name=name, res_name=res_name, chain_id=chain_id, res_num=res_seq, pos=[x, y, z], element=element) 318 319 # Connect atoms. 320 if record[:6] == 'CONECT': 321 # Parse the record. 322 record_type, serial, bonded1, bonded2, bonded3, bonded4 = pdb_read.conect(record) 323 324 # Loop over the atoms of the record. 325 for bonded in [bonded1, bonded2, bonded3, bonded4]: 326 # Skip if there is no record. 327 if not bonded: 328 continue 329 330 # Skip broken CONECT records (for when the record points to a non-existent atom). 331 if self._atom_index(serial) == None or self._atom_index(bonded) == None: 332 continue 333 334 # Make the connection. 335 self.atom_connect(index1=self._atom_index(serial), index2=self._atom_index(bonded))

336 337

338 - def fill_object_from_xyz(self, records):

339 """Method for generating a complete Structure_container object from the given xyz records. 340 341 @param records: A list of structural xyz records. 342 @type records: list of str 343 """ 344 345 # initialisation for atom number 346 atom_number = 1 347 348 # Loop over the records. 349 for record in records: 350 # Parse the record. 351 record = self._parse_xyz_record(record) 352 353 # Nothing to do. 354 if not record: 355 continue 356 357 # Add the atom. 358 if len(record) == 4: 359 # Add. 360 self.atom_add(atom_name=record[0], atom_num=atom_number, pos=[record[1], record[2], record[3]], element=record[0]) 361 362 # Increment of atom number 363 atom_number = atom_number + 1

364 365

366 - def from_xml(self, mol_node, file_version=1):

367 """Recreate the MolContainer from the XML molecule node. 368 369 @param mol_node: The molecule XML node. 370 @type mol_node: xml.dom.minicompat.NodeList instance 371 @keyword file_version: The relax XML version of the XML file. 372 @type file_version: int 373 """ 374 375 # Recreate the current molecule container. 376 xml_to_object(mol_node, self, file_version=file_version)

377 378

379 - def is_empty(self):

380 """Check if the container is empty.""" 381 382 # Set attributes. 383 if hasattr(self, 'mol_name'): return False 384 if hasattr(self, 'file_name'): return False 385 if hasattr(self, 'file_path'): return False 386 if hasattr(self, 'file_mol_num'): return False 387 if hasattr(self, 'file_model'): return False 388 389 # Internal data structures. 390 if not self.atom_num == []: return False 391 if not self.atom_name == []: return False 392 if not self.bonded == []: return False 393 if not self.chain_id == []: return False 394 if not self.element == []: return False 395 if not self.pdb_record == []: return False 396 if not self.res_name == []: return False 397 if not self.res_num == []: return False 398 if not self.seg_id == []: return False 399 if not self.x == []: return False 400 if not self.y == []: return False 401 if not self.z == []: return False 402 403 # Ok, now this thing must be empty. 404 return True

405 406

407 - def last_residue(self):

408 """Return the number of the last residue. 409 410 @return: The last residue number. 411 @rtype: int 412 """ 413 414 # Return the number. 415 return self.res_num[-1]

416 417

418 - def merge(self, mol_cont=None):

419 """Merge the contents of the given molecule container into here. 420 421 @keyword mol_cont: The data structure for the molecule to merge. 422 @type mol_cont: MolContainer instance 423 """ 424 425 # The current index. 426 curr_index = len(self.atom_num) 427 428 # Loop over all data. 429 for i in range(len(mol_cont.atom_num)): 430 # Add the atom. 431 self.atom_add(atom_num=curr_index+i+1, atom_name=mol_cont.atom_name[i], res_name=mol_cont.res_name[i], res_num=mol_cont.res_num[i], pos=[mol_cont.x[i], mol_cont.y[i], mol_cont.z[i]], element=mol_cont.element[i], chain_id=mol_cont.chain_id[i], pdb_record=mol_cont.pdb_record[i]) 432 433 # Connect the atoms. 434 for j in range(len(mol_cont.bonded[i])): 435 self.atom_connect(index1=i+curr_index+1, index2=mol_cont.bonded[i][j]+curr_index+1)

436 437

438 - def to_xml(self, doc, element):

439 """Create XML elements for the contents of this molecule container. 440 441 @param doc: The XML document object. 442 @type doc: xml.dom.minidom.Document instance 443 @param element: The element to add the molecule XML elements to. 444 @type element: XML element object 445 """ 446 447 # Create an XML element for this molecule and add it to the higher level element. 448 mol_element = doc.createElement('mol_cont') 449 element.appendChild(mol_element) 450 451 # Set the molecule attributes. 452 mol_element.setAttribute('desc', 'Molecule container') 453 mol_element.setAttribute('name', str(self.mol_name)) 454 455 # Add all simple python objects within the MolContainer to the XML element. 456 fill_object_contents(doc, mol_element, object=self, blacklist=list(self.__class__.__dict__.keys()))

461 """List type data container for holding the different molecules of one model.""" 462

463 - def __repr__(self):

464 """The string representation of the object. 465 466 Rather than using the standard Python conventions (either the string representation of the 467 value or the "<...desc...>" notation), a rich-formatted description of the object is given. 468 """ 469 470 text = "Molecules.\n\n" 471 text = text + "%-8s%-8s" % ("Index", "Name") + "\n" 472 for i in range(len(self)): 473 text = text + "%-8i%-8s" % (i, self[i].mol_name) + "\n" 474 return text

475 476

477 - def add_item(self, mol_name=None, mol_cont=None):

478 """Append the given MolContainer instance to the MolList. 479 480 @keyword mol_name: The molecule number. 481 @type mol_name: int 482 @keyword mol_cont: The data structure for the molecule. 483 @type mol_cont: MolContainer instance 484 @return: The new molecule container. 485 @rtype: MolContainer instance 486 """ 487 488 # If no molecule data exists, replace the empty first molecule with this molecule (just a renaming). 489 if len(self) and self.is_empty(): 490 self[0].mol_name = mol_name 491 492 # Otherwise append an empty MolContainer. 493 else: 494 # Test if the molecule already exists. 495 for i in range(len(self)): 496 if self[i].mol_name == mol_name: 497 raise RelaxError("The molecule '%s' already exists." % mol_name) 498 499 # Append an empty MolContainer. 500 self.append(mol_cont) 501 502 # Set the name. 503 self[-1].mol_name = mol_name 504 505 # Return the container. 506 return self[-1]

507 508

509 - def is_empty(self):

510 """Method for testing if this MolList object is empty. 511 512 @return: True if this list only has one MolContainer and the molecule name has not 513 been set, False otherwise. 514 @rtype: bool 515 """ 516 517 # No MolContainers. 518 if len(self) == 0: 519 return True 520 521 # There is only one MolContainer and it is empty. 522 if len(self) == 1 and hasattr(self[0], 'is_empty') and self[0].is_empty(): 523 return True 524 525 # Otherwise. 526 return False

527 528

529 - def from_xml(self, mol_nodes, file_version=1):

530 """Recreate a molecule list data structure from the XML molecule nodes. 531 532 @param mol_nodes: The molecule XML nodes. 533 @type mol_nodes: xml.dom.minicompat.NodeList instance 534 @keyword file_version: The relax XML version of the XML file. 535 @type file_version: int 536 """ 537 538 # Test if empty. 539 if not self.is_empty(): 540 raise RelaxFromXMLNotEmptyError(self.__class__.__name__) 541 542 # Loop over the molecules. 543 for mol_node in mol_nodes: 544 # Initialise a MolContainer instance. 545 mol_cont = MolContainer() 546 547 # Get the molecule name. 548 name = mol_node.getAttribute('name') 549 if name == 'None': 550 name = None 551 552 # Add the molecule to the MolList structure. 553 self.add_item(mol_name=name, mol_cont=mol_cont) 554 555 # Execute the specific MolContainer from_xml() method. 556 self[-1].from_xml(mol_node, file_version=file_version)

557 558

559 - def merge_item(self, mol_name=None, mol_cont=None):

560 """Mege the given MolContainer instance into a pre-existing molecule container. 561 562 @keyword mol_name: The molecule number. 563 @type mol_name: int 564 @keyword mol_cont: The data structure for the molecule. 565 @type mol_cont: MolContainer instance 566 @return: The new molecule container. 567 @rtype: MolContainer instance 568 """ 569 570 # Find the molecule to merge. 571 index = None 572 for i in range(len(self)): 573 if self[i].mol_name == mol_name: 574 index = i 575 break 576 577 # No molecule found. 578 if index == None: 579 raise RelaxError("The molecule '%s' to merge with cannot be found." % mol_name) 580 581 # Merge the molecules. 582 self[index].merge(mol_cont) 583 584 # Return the container. 585 return self[index]

586 587

588 - def to_xml(self, doc, element):

589 """Create XML elements for each molecule. 590 591 @param doc: The XML document object. 592 @type doc: xml.dom.minidom.Document instance 593 @param element: The element to add the molecule XML elements to. 594 @type element: XML element object 595 """ 596 597 # Loop over the molecules. 598 for i in range(len(self)): 599 # Add the molecule data. 600 self[i].to_xml(doc, element)

Source Code for Module lib.structure.internal.molecules