lib.structure.pdb

1 ############################################################################### 2 # # 3 # Copyright (C) 2013-2015 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Module docstring. 23 """Module for creating PDB records. 24 25 This module currently used the PDB format version 3.30 from July, 2011 U{http://www.wwpdb.org/documentation/file-format/format33/v3.3.html}. 26 """ 27 28 # Python module imports. 29 from textwrap import wrap 30 31 # relax module imports. 32 from lib.errors import RelaxError 33 34

35 -def _handle_atom_name(name):

36 """Handle the funky PDB atom name alignment. 37 38 From the PDB format documents: 39 40 - "Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13." 41 42 43 @param name: The atom name. 44 @type name: str or None 45 @return: The whitespace padded and PDB formatted atom name. This will be exactly 4 characters. 46 @rtype: str 47 """ 48 49 # Handle none. 50 if name == None: 51 name = " " 52 53 # Single letter name. 54 if len(name) == 1: 55 name = " %s " % name 56 57 # Two letter name. 58 elif len(name) == 2: 59 name = "%s " % name 60 61 # Three letter name. 62 elif len(name) == 3: 63 name = "%s " % name 64 65 # Return the name. 66 return name

67 68

69 -def _handle_none(value):

70 """Auxiliary function for handling values of None. 71 72 @param value: The value to convert. 73 @type value: anything 74 @return: If the value is None, then an empty string. Otherwise the original value is returned. 75 @rtype: anything 76 """ 77 78 # Handle None. 79 if value == None: 80 return '' 81 82 # Normal value. 83 return value

84 85

86 -def _handle_text(text):

87 """Auxiliary function for handling text values. 88 89 This will convert None to empty strings and make sure everything is capitalised. 90 91 92 @param text: The text to convert. 93 @type text: anything 94 @return: If the text is None, then an empty string. All text will be capitalised. 95 @rtype: anything 96 """ 97 98 # Handle None. 99 if text == None: 100 return '' 101 102 # Return capitalised text. 103 return text.upper()

104 105

106 -def _record_validate(record):

107 """Check that the record is ok. 108 109 @param record: The PDB record as text. 110 @type record: str 111 @raises RelaxError: If the record is not exactly 80 characters long. 112 """ 113 114 # Check the length. 115 if len(record) != 80: 116 if len(record) < 80: 117 raise RelaxError("The PDB record '%s' is too short." % record) 118 else: 119 raise RelaxError("The PDB record '%s' is too long." % record) 120 121 # Check for nan. 122 if 'nan' in record: 123 raise RelaxError("The PDB record '%s' contains NaN values." % record)

124 125

126 -def atom(file, serial='', name='', alt_loc='', res_name='', chain_id='', res_seq='', icode='', x='', y='', z='', occupancy=1.0, temp_factor=0.0, element='', charge=''):

127 """Generate the ATOM record. 128 129 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html#ATOM}. 130 131 ATOM 132 ==== 133 134 Overview 135 -------- 136 137 The ATOM records present the atomic coordinates for standard amino acids and nucleotides. They also present the occupancy and temperature factor for each atom. Non-polymer chemical coordinates use the HETATM record type. The element symbol is always present on each ATOM record; charge is optional. 138 139 Changes in ATOM/HETATM records result from the standardization atom and residue nomenclature. This nomenclature is described in the Chemical Component Dictionary (U{ftp://ftp.wwpdb.org/pub/pdb/data/monomers}). 140 141 142 Record Format 143 ------------- 144 145 The format is:: 146 __________________________________________________________________________________________ 147 | | | | | 148 | Columns | Data type | Field | Definition | 149 |_________|______________|______________|________________________________________________| 150 | | | | | 151 | 1 - 6 | Record name | "ATOM" | | 152 | 7 - 11 | Integer | serial | Atom serial number. | 153 | 13 - 16 | Atom | name | Atom name. | 154 | 17 | Character | altLoc | Alternate location indicator. | 155 | 18 - 20 | Residue name | resName | Residue name. | 156 | 22 | Character | chainID | Chain identifier. | 157 | 23 - 26 | Integer | resSeq | Residue sequence number. | 158 | 27 | AChar | iCode | Code for insertion of residues. | 159 | 31 - 38 | Real(8.3) | x | Orthogonal coordinates for X in Angstroms. | 160 | 39 - 46 | Real(8.3) | y | Orthogonal coordinates for Y in Angstroms. | 161 | 47 - 54 | Real(8.3) | z | Orthogonal coordinates for Z in Angstroms. | 162 | 55 - 60 | Real(6.2) | occupancy | Occupancy. | 163 | 61 - 66 | Real(6.2) | tempFactor | Temperature factor. | 164 | 77 - 78 | LString(2) | element | Element symbol, right-justified. | 165 | 79 - 80 | LString(2) | charge | Charge on the atom. | 166 |_________|______________|______________|________________________________________________| 167 168 169 Details 170 ------- 171 172 ATOM records for proteins are listed from amino to carboxyl terminus. 173 174 Nucleic acid residues are listed from the 5' to the 3' terminus. 175 176 Alignment of one-letter atom name such as C starts at column 14, while two-letter atom name such as FE starts at column 13. 177 178 Atom nomenclature begins with atom type. 179 180 No ordering is specified for polysaccharides. 181 182 Non-blank alphanumerical character is used for chain identifier. 183 184 The list of ATOM records in a chain is terminated by a TER record. 185 186 If more than one model is present in the entry, each model is delimited by MODEL and ENDMDL records. 187 188 AltLoc is the place holder to indicate alternate conformation. The alternate conformation can be in the entire polymer chain, or several residues or partial residue (several atoms within one residue). If an atom is provided in more than one position, then a non-blank alternate location indicator must be used for each of the atomic positions. Within a residue, all atoms that are associated with each other in a given conformation are assigned the same alternate position indicator. There are two ways of representing alternate conformation- either at atom level or at residue level (see examples). 189 190 For atoms that are in alternate sites indicated by the alternate site indicator, sorting of atoms in the ATOM/HETATM list uses the following general rules: 191 192 - In the simple case that involves a few atoms or a few residues with alternate sites, the coordinates occur one after the other in the entry. 193 - In the case of a large heterogen groups which are disordered, the atoms for each conformer are listed together. 194 195 Alphabet letters are commonly used for insertion code. The insertion code is used when two residues have the same numbering. The combination of residue numbering and insertion code defines the unique residue. 196 197 If the depositor provides the data, then the isotropic B value is given for the temperature factor. 198 199 If there are neither isotropic B values from the depositor, nor anisotropic temperature factors in ANISOU, then the default value of 0.0 is used for the temperature factor. 200 201 Columns 79 - 80 indicate any charge on the atom, e.g., 2+, 1-. In most cases, these are blank. 202 203 For refinements with program REFMAC prior 5.5.0042 which use TLS refinement, the values of B may include only the TLS contribution to the isotropic temperature factor rather than the full isotropic value. 204 205 206 Verification/Validation/Value Authority Control 207 ----------------------------------------------- 208 209 The ATOM/HETATM records are checked for PDB file format, sequence information, and packing. 210 211 212 Relationships to Other Record Types 213 ----------------------------------- 214 215 The ATOM records are compared to the corresponding sequence database. Sequence discrepancies appear in the SEQADV record. Missing atoms are annotated in the remarks. HETATM records are formatted in the same way as ATOM records. The sequence implied by ATOM records must be identical to that given in SEQRES, with the exception that residues that have no coordinates, e.g., due to disorder, must appear in SEQRES. 216 217 218 Example 219 ------- 220 221 Example 1:: 222 223 1 2 3 4 5 6 7 8 224 12345678901234567890123456789012345678901234567890123456789012345678901234567890 225 ATOM 32 N AARG A -3 11.281 86.699 94.383 0.50 35.88 N 226 ATOM 33 N BARG A -3 11.296 86.721 94.521 0.50 35.60 N 227 ATOM 34 CA AARG A -3 12.353 85.696 94.456 0.50 36.67 C 228 ATOM 35 CA BARG A -3 12.333 85.862 95.041 0.50 36.42 C 229 ATOM 36 C AARG A -3 13.559 86.257 95.222 0.50 37.37 C 230 ATOM 37 C BARG A -3 12.759 86.530 96.365 0.50 36.39 C 231 ATOM 38 O AARG A -3 13.753 87.471 95.270 0.50 37.74 O 232 ATOM 39 O BARG A -3 12.924 87.757 96.420 0.50 37.26 O 233 ATOM 40 CB AARG A -3 12.774 85.306 93.039 0.50 37.25 C 234 ATOM 41 CB BARG A -3 13.428 85.746 93.980 0.50 36.60 C 235 ATOM 42 CG AARG A -3 11.754 84.432 92.321 0.50 38.44 C 236 ATOM 43 CG BARG A -3 12.866 85.172 92.651 0.50 37.31 C 237 ATOM 44 CD AARG A -3 11.698 84.678 90.815 0.50 38.51 C 238 ATOM 45 CD BARG A -3 13.374 85.886 91.406 0.50 37.66 C 239 ATOM 46 NE AARG A -3 12.984 84.447 90.163 0.50 39.94 N 240 ATOM 47 NE BARG A -3 12.644 85.487 90.195 0.50 38.24 N 241 ATOM 48 CZ AARG A -3 13.202 84.534 88.850 0.50 40.03 C 242 ATOM 49 CZ BARG A -3 13.114 85.582 88.947 0.50 39.55 C 243 ATOM 50 NH1AARG A -3 12.218 84.840 88.007 0.50 40.76 N 244 ATOM 51 NH1BARG A -3 14.338 86.056 88.706 0.50 40.23 N 245 ATOM 52 NH2AARG A -3 14.421 84.308 88.373 0.50 40.45 N 246 247 Example 2:: 248 249 1 2 3 4 5 6 7 8 250 12345678901234567890123456789012345678901234567890123456789012345678901234567890 251 ATOM 32 N AARG A -3 11.281 86.699 94.383 0.50 35.88 N 252 ATOM 33 CA AARG A -3 12.353 85.696 94.456 0.50 36.67 C 253 ATOM 34 C AARG A -3 13.559 86.257 95.222 0.50 37.37 C 254 ATOM 35 O AARG A -3 13.753 87.471 95.270 0.50 37.74 O 255 ATOM 36 CB AARG A -3 12.774 85.306 93.039 0.50 37.25 C 256 ATOM 37 CG AARG A -3 11.754 84.432 92.321 0.50 38.44 C 257 ATOM 38 CD AARG A -3 11.698 84.678 90.815 0.50 38.51 C 258 ATOM 39 NE AARG A -3 12.984 84.447 90.163 0.50 39.94 N 259 ATOM 40 CZ AARG A -3 13.202 84.534 88.850 0.50 40.03 C 260 ATOM 41 NH1AARG A -3 12.218 84.840 88.007 0.50 40.76 N 261 ATOM 42 NH2AARG A -3 14.421 84.308 88.373 0.50 40.45 N 262 ATOM 43 N BARG A -3 11.296 86.721 94.521 0.50 35.60 N 263 ATOM 44 CA BARG A -3 12.333 85.862 95.041 0.50 36.42 C 264 ATOM 45 C BARG A -3 12.759 86.530 96.365 0.50 36.39 C 265 ATOM 46 O BARG A -3 12.924 87.757 96.420 0.50 37.26 O 266 ATOM 47 CB BARG A -3 13.428 85.746 93.980 0.50 36.60 C 267 ATOM 48 CG BARG A -3 12.866 85.172 92.651 0.50 37.31 C 268 ATOM 49 CD BARG A -3 13.374 85.886 91.406 0.50 37.66 C 269 ATOM 50 NE BARG A -3 12.644 85.487 90.195 0.50 38.24 N 270 ATOM 51 CZ BARG A -3 13.114 85.582 88.947 0.50 39.55 C 271 ATOM 52 NH1BARG A -3 14.338 86.056 88.706 0.50 40.23 N 272 273 274 @param file: The file to write the record to. 275 @type file: file object 276 @keyword serial: The atom serial number. 277 @type serial: int 278 @keyword name: The atom name. 279 @type name: str 280 @keyword alt_loc: The alternate location indicator. 281 @type alt_loc: str 282 @keyword res_name: The residue name. 283 @type res_name: str 284 @keyword chain_id: The chain identifier. 285 @type chain_id: str 286 @keyword res_seq: The sequence number. 287 @type res_seq: int 288 @keyword icode: The insertion code. 289 @type icode: str 290 @keyword x: Orthogonal coordinates for X in Angstroms. 291 @type x: float 292 @keyword y: Orthogonal coordinates for Y in Angstroms. 293 @type y: float 294 @keyword z: Orthogonal coordinates for Z in Angstroms. 295 @type z: float 296 @keyword occupancy: Occupancy. 297 @type occupancy: float 298 @keyword temp_factor: Temperature factor. 299 @type temp_factor: float 300 @keyword element: Element symbol. 301 @type element: str 302 @keyword charge: Charge on the atom. 303 @type charge: int 304 """ 305 306 # Coordinate bounds. 307 pdb_min = -999.999 308 pdb_max = 9999.999 309 coord = [x, y, z] 310 for i in range(3): 311 if coord[i] != '' and coord[i] < pdb_min: 312 coord[i] = pdb_min 313 if coord[i] != '' and coord[i] > pdb_max: 314 coord[i] = pdb_max 315 316 # The formatted record. 317 text = "%-6s%5s %-4s%1s%3s %1s%4s%1s %8.3f%8.3f%8.3f%6.2f%6.2f %2s%2s" % ( 318 'ATOM', 319 _handle_none(serial), 320 _handle_atom_name(name), 321 _handle_none(alt_loc), 322 _handle_none(res_name), 323 _handle_none(chain_id), 324 _handle_none(res_seq), 325 _handle_none(icode), 326 _handle_none(coord[0]), 327 _handle_none(coord[1]), 328 _handle_none(coord[2]), 329 _handle_none(occupancy), 330 _handle_none(temp_factor), 331 _handle_none(element), 332 _handle_none(charge) 333 ) 334 335 # Validate. 336 _record_validate(text) 337 338 # Write out the formatted record. 339 file.write(text) 340 file.write('\n')

341 342

343 -def conect(file, serial='', bonded1='', bonded2='', bonded3='', bonded4=''):

344 """Generate the CONECT record. 345 346 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect10.html#CONECT}. 347 348 CONECT 349 ====== 350 351 Overview 352 -------- 353 354 The CONECT records specify connectivity between atoms for which coordinates are supplied. The connectivity is described using the atom serial number as shown in the entry. CONECT records are mandatory for HET groups (excluding water) and for other bonds not specified in the standard residue connectivity table. These records are generated automatically. 355 356 Record Format 357 ------------- 358 359 The format is:: 360 ______________________________________________________________________________________________ 361 | | | | | 362 | Columns | Data type | Field | Definition | 363 |_________|______________|______________|____________________________________________________| 364 | | | | | 365 | 1 - 6 | Record name | "CONECT" | | 366 | 7 - 11 | Integer | serial | Atom serial number | 367 | 12 - 16 | Integer | serial | Serial number of bonded atom | 368 | 17 - 21 | Integer | serial | Serial number of bonded atom | 369 | 22 - 26 | Integer | serial | Serial number of bonded atom | 370 | 27 - 31 | Integer | serial | Serial number of bonded atom | 371 |_________|______________|______________|____________________________________________________| 372 373 374 Details 375 ------- 376 377 CONECT records are present for: 378 379 - Intra-residue connectivity within non-standard (HET) residues (excluding water). 380 - Inter-residue connectivity of HET groups to standard groups (including water) or to other HET groups. 381 - Disulfide bridges specified in the SSBOND records have corresponding records. 382 383 No differentiation is made between atoms with delocalized charges (excess negative or positive charge). 384 385 Atoms specified in the CONECT records have the same numbers as given in the coordinate section. 386 387 All atoms connected to the atom with serial number in columns 7 - 11 are listed in the remaining fields of the record. 388 389 If more than four fields are required for non-hydrogen and non-salt bridges, a second CONECT record with the same atom serial number in columns 7 - 11 will be used. 390 391 These CONECT records occur in increasing order of the atom serial numbers they carry in columns 7 - 11. The target-atom serial numbers carried on these records also occur in increasing order. 392 393 The connectivity list given here is redundant in that each bond indicated is given twice, once with each of the two atoms involved specified in columns 7 - 11. 394 395 For hydrogen bonds, when the hydrogen atom is present in the coordinates, a CONECT record between the hydrogen atom and its acceptor atom is generated. 396 397 For NMR entries, CONECT records for one model are generated describing heterogen connectivity and others for LINK records assuming that all models are homogeneous models. 398 399 400 Verification/Validation/Value Authority Control 401 ----------------------------------------------- 402 403 Connectivity is checked for unusual bond lengths. 404 405 406 Relationships to Other Record Types 407 ----------------------------------- 408 409 CONECT records must be present in an entry that contains either non-standard groups or disulfide bonds. 410 411 412 Example 413 ------- 414 415 Example 1:: 416 417 1 2 3 4 5 6 7 8 418 12345678901234567890123456789012345678901234567890123456789012345678901234567890 419 CONECT 1179 746 1184 1195 1203 420 CONECT 1179 1211 1222 421 CONECT 1021 544 1017 1020 1022 422 423 424 Known Problems 425 -------------- 426 427 CONECT records involving atoms for which the coordinates are not present in the entry (e.g., symmetry-generated) are not given. 428 429 CONECT records involving atoms for which the coordinates are missing due to disorder, are also not provided. 430 431 432 @param file: The file to write the record to. 433 @type file: file object 434 @keyword serial: The atom serial number. 435 @type serial: int 436 @keyword bonded1: The serial number of the bonded atom. 437 @type bonded1: int 438 @keyword bonded2: The serial number of the bonded atom. 439 @type bonded2: int 440 @keyword bonded3: The serial number of the bonded atom. 441 @type bonded3: int 442 @keyword bonded4: The serial number of the bonded atom. 443 @type bonded4: int 444 """ 445 446 # The formatted record. 447 text = "%-6s%5s%5s%5s%5s%5s%49s" % ( 448 'CONECT', 449 _handle_none(serial), 450 _handle_none(bonded1), 451 _handle_none(bonded2), 452 _handle_none(bonded3), 453 _handle_none(bonded4), 454 '' 455 ) 456 457 # Validate. 458 _record_validate(text) 459 460 # Write out the formatted record. 461 file.write(text) 462 file.write('\n')

463 464

465 -def end(file):

466 """Generate the END record. 467 468 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect11.html#END}. 469 470 END 471 === 472 473 Overview 474 -------- 475 476 The END record marks the end of the PDB file. 477 478 479 Record Format 480 ------------- 481 482 The format is:: 483 ______________________________________________________________________________________________ 484 | | | | | 485 | Columns | Data type | Field | Definition | 486 |_________|______________|______________|____________________________________________________| 487 | | | | | 488 | 1 - 6 | Record name | "END " | | 489 |_________|______________|______________|____________________________________________________| 490 491 492 Details 493 ------- 494 495 END is the final record of a coordinate entry. 496 497 498 Verification/Validation/Value Authority Control 499 ----------------------------------------------- 500 501 END must appear in every coordinate entry. 502 503 504 Relationships to Other Record Types 505 ----------------------------------- 506 507 This is the final record in the entry. 508 509 510 Example 511 ------- 512 513 Example 1:: 514 515 1 2 3 4 5 6 7 8 516 12345678901234567890123456789012345678901234567890123456789012345678901234567890 517 END 518 519 520 @param file: The file to write the record to. 521 @type file: file object 522 """ 523 524 # The formatted record. 525 text = "END" + ' '*77 526 527 # Validate. 528 _record_validate(text) 529 530 # Write out the formatted record. 531 file.write(text) 532 file.write('\n')

533 534

535 -def endmdl(file):

536 """Generate the ENDMDL record. 537 538 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/v3.3.html}. 539 540 ENDMDL 541 ====== 542 543 Overview 544 -------- 545 546 The ENDMDL records are paired with MODEL records to group individual structures found in a coordinate entry. 547 548 549 Record Format 550 ------------- 551 552 The format is:: 553 ______________________________________________________________________________________________ 554 | | | | | 555 | Columns | Data type | Field | Definition | 556 |_________|______________|______________|____________________________________________________| 557 | | | | | 558 | 1 - 6 | Record name | "ENDMDL" | | 559 |_________|______________|______________|____________________________________________________| 560 561 562 Details 563 ------- 564 565 MODEL/ENDMDL records are used only when more than one structure is presented in the entry, as is often the case with NMR entries. 566 567 All the models in a multi-model entry must represent the same structure. 568 569 Every MODEL record has an associated ENDMDL record. 570 571 572 Verification/Validation/Value Authority Control 573 ----------------------------------------------- 574 575 Entries with multiple structures in the NUMMDL record are checked for corresponding pairs of MODEL/ ENDMDL records, and for consecutively numbered models. 576 577 578 Relationships to Other Record Types 579 ----------------------------------- 580 581 There must be a corresponding MODEL record. 582 583 In the case of an NMR entry, the NUMMDL record states the number of model structures that are present in the individual entry. 584 585 586 Example 587 ------- 588 589 Example 1:: 590 591 1 2 3 4 5 6 7 8 592 12345678901234567890123456789012345678901234567890123456789012345678901234567890 593 ... 594 ... 595 ATOM 14550 1HG GLU 122 -14.364 14.787 -14.258 1.00 0.00 H 596 ATOM 14551 2HG GLU 122 -13.794 13.738 -12.961 1.00 0.00 H 597 TER 14552 GLU 122 598 ENDMDL 599 MODEL 9 600 ATOM 14553 N SER 1 -28.280 1.567 12.004 1.00 0.00 N 601 ATOM 14554 CA SER 1 -27.749 0.392 11.256 1.00 0.00 C 602 ... 603 ... 604 ATOM 16369 1HG GLU 122 -3.757 18.546 -8.439 1.00 0.00 H 605 ATOM 16370 2HG GLU 122 -3.066 17.166 -7.584 1.00 0.00 H 606 TER 16371 GLU 122 607 ENDMDL 608 609 610 @param file: The file to write the record to. 611 @type file: file object 612 """ 613 614 # The formatted record. 615 text = 'ENDMDL' + ' '*74 616 617 # Validate. 618 _record_validate(text) 619 620 # Write out the formatted record. 621 file.write(text) 622 file.write('\n')

623 624

625 -def formul(file, comp_num='', het_id='', continuation='', asterisk='', text=''):

626 """Generate the FORMUL record. 627 628 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect4.html#FORMUL}. 629 630 FORMUL 631 ====== 632 633 Overview 634 -------- 635 636 The FORMUL record presents the chemical formula and charge of a non-standard group. 637 638 639 Record Format 640 ------------- 641 642 The format is:: 643 ______________________________________________________________________________________________ 644 | | | | | 645 | Columns | Data type | Field | Definition | 646 |_________|______________|______________|____________________________________________________| 647 | | | | | 648 | 1 - 6 | Record name | "FORMUL" | | 649 | 9 - 10 | Integer | compNum | Component number. | 650 | 13 - 15 | LString(3) | hetID | Het identifier. | 651 | 17 - 18 | Integer | continuation | Continuation number. | 652 | 19 | Character | asterisk | "*" for water. | 653 | 20 - 70 | String | text | Chemical formula. | 654 |_________|______________|______________|____________________________________________________| 655 656 657 Details 658 ------- 659 660 The elements of the chemical formula are given in the order following Hill ordering. The order of elements depends on whether carbon is present or not. If carbon is present, the order should be: C, then H, then the other elements in alphabetical order of their symbol. If carbon is not present, the elements are listed purely in alphabetic order of their symbol. This is the 'Hill' system used by Chemical Abstracts. 661 662 The number of each atom type present immediately follows its chemical symbol without an intervening blank space. There will be no number indicated if there is only one atom for a particular atom type. 663 664 Each set of SEQRES records and each HET group is assigned a component number in an entry. These numbers are assigned serially, beginning with 1 for the first set of SEQRES records. In addition: 665 666 - If a HET group is presented on a SEQRES record its FORMUL is assigned the component number of the chain in which it appears. 667 - If the HET group occurs more than once and is not presented on SEQRES records, the component number of its first occurrence is used. 668 669 All occurrences of the HET group within a chain are grouped together with a multiplier. The remaining occurrences are also grouped with a multiplier. The sum of the multipliers is the number equaling the number of times that that HET group appears in the entry. 670 671 A continuation field is provided in the event that more space is needed for the formula. Columns 17 - 18 are used in order to maintain continuity with the existing format. 672 673 674 Verification/Validation/Value Authority Control 675 ----------------------------------------------- 676 677 For each het group that appears in the entry, the corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records must appear. The FORMUL record is generated automatically by PDB processing programs using the het group template file and information from HETATM records. UNL, UNK and UNX will not be listed in FORMUL even though these het groups present in the coordinate section. 678 679 680 Relationships to Other Record Types 681 ----------------------------------- 682 683 For each het group that appears in the entry, the corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records must appear. 684 685 686 Example 687 ------- 688 689 Example 1:: 690 691 1 2 3 4 5 6 7 8 692 12345678901234567890123456789012345678901234567890123456789012345678901234567890 693 FORMUL 3 MG 2(MG 2+) 694 FORMUL 5 SO4 6(O4 S 2-) 695 FORMUL 13 HOH *360(H2 O) 696 697 FORMUL 3 NAP 2(C21 H28 N7 O17 P3) 698 FORMUL 4 FOL 2(C19 H19 N7 O6) 699 FORMUL 5 1PE C10 H22 O6 700 701 FORMUL 2 NX5 C14 H10 O2 CL2 S 702 703 704 @param file: The file to write the record to. 705 @type file: file object 706 @keyword comp_num: The component number. 707 @type comp_num: int 708 @keyword het_id: The Het identifier. 709 @type het_id: str 710 @keyword continuation: Allows concatenation of multiple records. 711 @type continuation: int 712 @keyword asterisk: "*" for water. 713 @type asterisk: str 714 @keyword text: Text describing the Het group. 715 @type text: str 716 """ 717 718 # The formatted record. 719 text = "%-6s %2s %3s %2s%1s%-51s%10s" % ( 720 'FORMUL', 721 _handle_none(comp_num), 722 _handle_none(het_id), 723 _handle_none(continuation), 724 _handle_none(asterisk), 725 _handle_none(text), 726 '' 727 ) 728 729 # Validate. 730 _record_validate(text) 731 732 # Write out the formatted record. 733 file.write(text) 734 file.write('\n')

735 736

737 -def helix(file, ser_num='', helix_id='', init_res_name='', init_chain_id='', init_seq_num='', init_icode='', end_res_name='', end_chain_id='', end_seq_num='', end_icode='', helix_class='', comment='', length=''):

738 """Generate the HELIX record. 739 740 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect5.html#HELIX}. 741 742 HELIX 743 ===== 744 745 Overview 746 -------- 747 748 HELIX records are used to identify the position of helices in the molecule. Helices are named, numbered, and classified by type. The residues where the helix begins and ends are noted, as well as the total length. 749 750 751 Record Format 752 ------------- 753 754 The format is:: 755 ______________________________________________________________________________________________ 756 | | | | | 757 | Columns | Data type | Field | Definition | 758 |_________|______________|______________|____________________________________________________| 759 | | | | | 760 | 1 - 6 | Record name | "HELIX " | | 761 | 8 - 10 | Integer | serNum | Serial number of the helix. This starts at 1 and | 762 | | | | increases incrementally. | 763 | 12 - 14 | LString(3) | helixID | Helix identifier. In addition to a serial number, | 764 | | | | each helix is given an alphanumeric character | 765 | | | | helix identifier. | 766 | 16 - 18 | Residue name | initResName | Name of the initial residue. | 767 | 20 | Character | initChainID | Chain identifier for the chain containing this | 768 | | | | helix. | 769 | 22 - 25 | Integer | initSeqNum | Sequence number of the initial residue. | 770 | 26 | AChar | initICode | Insertion code of the initial residue. | 771 | 28 - 30 | Residue name | endResName | Name of the terminal residue of the helix. | 772 | 32 | Character | endChainID | Chain identifier for the chain containing this | 773 | | | | helix. | 774 | 34 - 37 | Integer | endSeqNum | Sequence number of the terminal residue. | 775 | 38 | AChar | endICode | Insertion code of the terminal residue. | 776 | 39 - 40 | Integer | helixClass | Helix class (see below). | 777 | 41 - 70 | String | comment | Comment about this helix. | 778 | 72 - 76 | Integer | length | Length of this helix. | 779 |_________|______________|______________|____________________________________________________| 780 781 782 Details 783 ------- 784 785 Additional HELIX records with different serial numbers and identifiers occur if more than one helix is present. 786 787 The initial residue of the helix is the N-terminal residue. 788 789 Helices are classified as follows:: 790 791 _____________________________________________________ 792 | | CLASS NUMBER | 793 | TYPE OF HELIX | (COLUMNS 39 - 40) | 794 |_______________________________|___________________| 795 | | | 796 | Right-handed alpha (default) | 1 | 797 | Right-handed omega | 2 | 798 | Right-handed pi | 3 | 799 | Right-handed gamma | 4 | 800 | Right-handed 3 - 10 | 5 | 801 | Left-handed alpha | 6 | 802 | Left-handed omega | 7 | 803 | Left-handed gamma | 8 | 804 | 2 - 7 ribbon/helix | 9 | 805 | Polyproline | 10 | 806 |_______________________________|___________________| 807 808 809 Relationships to Other Record Types 810 ----------------------------------- 811 812 There may be related information in the REMARKs. 813 814 815 Example 816 ------- 817 818 Example 1:: 819 820 1 2 3 4 5 6 7 8 821 12345678901234567890123456789012345678901234567890123456789012345678901234567890 822 HELIX 1 HA GLY A 86 GLY A 94 1 9 823 HELIX 2 HB GLY B 86 GLY B 94 1 9 824 825 HELIX 21 21 PRO J 385 LEU J 388 5 4 826 HELIX 22 22 PHE J 397 PHE J 402 5 6 827 828 829 @param file: The file to write the record to. 830 @type file: file object 831 @keyword ser_num: The helix serial number 832 @type ser_num: int 833 @keyword helix_id: The helix identifier 834 @type helix_id: str 835 @keyword init_res_name: The name of the initial residue 836 @type init_res_name: str 837 @keyword init_chain_id: The chain identifier 838 @type init_chain_id: str 839 @keyword init_seq_num: The sequence number of the initial residue 840 @type init_seq_num: int 841 @keyword init_icode: The insertion code of the initial residue 842 @type init_icode: str 843 @keyword end_res_name: The name of the terminal residue 844 @type end_res_name: str 845 @keyword end_chain_id: The chain identifier 846 @type end_chain_id: str 847 @keyword end_seq_num: The sequence number of the terminal residue 848 @type end_seq_num: int 849 @keyword end_icode: The insertion code of the terminal residue 850 @type end_icode: str 851 @keyword helix_class: The helix class 852 @type helix_class: int 853 @keyword comment: The comment 854 @type comment: str 855 @keyword length: The helix length. 856 @type length: int 857 """ 858 859 # The formatted record. 860 text = "%-6s %3s %3s %3s %1s %4s%1s %3s %1s %4s%1s%2s%30s %5s " % ( 861 'HELIX', 862 _handle_none(ser_num), 863 _handle_none(helix_id), 864 _handle_none(init_res_name), 865 _handle_none(init_chain_id), 866 _handle_none(init_seq_num), 867 _handle_none(init_icode), 868 _handle_none(end_res_name), 869 _handle_none(end_chain_id), 870 _handle_none(end_seq_num), 871 _handle_none(end_icode), 872 _handle_none(helix_class), 873 _handle_none(comment), 874 _handle_none(length) 875 ) 876 877 # Validate. 878 _record_validate(text) 879 880 # Write out the formatted record. 881 file.write(text) 882 file.write('\n')

883 884

885 -def het(file, het_id='', chain_id='', seq_num='', icode='', num_het_atoms='', text=''):

886 """Generate the HET record. 887 888 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect4.html#HET}. 889 890 HET 891 === 892 893 Overview 894 -------- 895 896 HET records are used to describe non-standard residues, such as prosthetic groups, inhibitors, solvent molecules, and ions for which coordinates are supplied. Groups are considered HET if they are not part of a biological polymer described in SEQRES and considered to be a molecule bound to the polymer, or they are a chemical species that constitute part of a biological polymer and is not one of the following: 897 898 - standard amino acids, or 899 - standard nucleic acids (C, G, A, U, I, DC, DG, DA, DU, DT and DI), or 900 - unknown amino acid (UNK) or nucleic acid (N) where UNK and N are used to indicate the unknown residue name. 901 902 HET records also describe chemical components for which the chemical identity is unknown, in which case the group is assigned the hetID UNL (Unknown Ligand). 903 904 The heterogen section of a PDB formatted file contains the complete description of non-standard residues in the entry. 905 906 907 Record Format 908 ------------- 909 910 The format is:: 911 ______________________________________________________________________________________________ 912 | | | | | 913 | Columns | Data type | Field | Definition | 914 |_________|______________|______________|____________________________________________________| 915 | | | | | 916 | 1 - 6 | Record name | "HET " | | 917 | 8 - 10 | LString(3) | hetID | Het identifier, right-justified. | 918 | 13 | Character | ChainID | Chain identifier. | 919 | 14 - 17 | Integer | seqNum | Sequence number. | 920 | 18 | AChar | iCode | Insertion code. | 921 | 21 - 25 | Integer | numHetAtoms | Number of HETATM records for the group present in | 922 | | | | the entry. | 923 | 31 - 70 | String | text | Text describing Het group. | 924 |_________|______________|______________|____________________________________________________| 925 926 927 Details 928 ------- 929 930 Each HET group is assigned a hetID of not more than three (3) alphanumeric characters. The sequence number, chain identifier, insertion code, and number of coordinate records are given for each occurrence of the HET group in the entry. The chemical name of the HET group is given in the HETNAM record and synonyms for the chemical name are given in the HETSYN records, see U{ftp://ftp.wwpdb.org/pub/pdb/data/monomers}. 931 932 There is a separate HET record for each occurrence of the HET group in an entry. 933 934 A particular HET group is represented in the PDB archive with a unique hetID. 935 936 PDB entries do not have HET records for water molecules, deuterated water, or methanol (when used as solvent). 937 938 Unknown atoms or ions will be represented as UNX with the chemical formula X1. Unknown ligands are UNL; unknown amino acids are UNK. 939 940 941 Verification/Validation/Value Authority Control 942 ----------------------------------------------- 943 944 For each het group that appears in the entry, the wwPDB checks that the corresponding HET, HETNAM, HETSYN, FORMUL, HETATM, and CONECT records appear, if applicable. The HET record is generated automatically using the Chemical Component Dictionary and information from the HETATM records. 945 946 Each unique hetID represents a unique molecule. 947 948 949 Relationships to Other Record Types 950 ----------------------------------- 951 952 For each het group that appears in the entry, there must be corresponding HET, HETNAM, HETSYN, FORMUL,HETATM, and CONECT records. LINK records may also be created. 953 954 955 Example 956 ------- 957 958 Example 1:: 959 960 1 2 3 4 5 6 7 8 961 12345678901234567890123456789012345678901234567890123456789012345678901234567890 962 HET TRS B 975 8 963 964 HET UDP A1457 25 965 HET B3P A1458 19 966 967 HET NAG Y 3 15 968 HET FUC Y 4 10 969 HET NON Y 5 12 970 HET UNK A 161 1 971 972 973 @param file: The file to write the record to. 974 @type file: file object 975 @keyword het_id: The Het identifier. 976 @type het_id: str 977 @keyword chain_id: The chain identifier. 978 @type chain_id: str 979 @keyword seq_num: The sequence number. 980 @type seq_num: int 981 @keyword icode: The insertion code. 982 @type icode: str 983 @keyword num_het_atoms: The number of HETATM records for the group present in the entry. 984 @type num_het_atoms: int 985 @keyword text: Text describing the Het group. 986 @type text: str 987 """ 988 989 # The formatted record. 990 text = "%-6s %3s %1s%4s%1s %5s %-40s%10s" % ( 991 'HET', 992 _handle_none(het_id), 993 _handle_none(chain_id), 994 _handle_none(seq_num), 995 _handle_none(icode), 996 _handle_none(num_het_atoms), 997 _handle_text(text), 998 '' 999 ) 1000 1001 # Validate. 1002 _record_validate(text) 1003 1004 # Write out the formatted record. 1005 file.write(text) 1006 file.write('\n')

1007 1008

1009 -def hetatm(file, serial='', name='', alt_loc='', res_name='', chain_id='', res_seq='', icode='', x='', y='', z='', occupancy=1.0, temp_factor=0.0, element='', charge=''):

1010 """Generate the HETATM record. 1011 1012 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html#HETATM}. 1013 1014 HETATM 1015 ====== 1016 1017 Overview 1018 -------- 1019 1020 Non-polymer or other "non-standard" chemical coordinates, such as water molecules or atoms presented in HET groups use the HETATM record type. They also present the occupancy and temperature factor for each atom. The ATOM records present the atomic coordinates for standard residues. The element symbol is always present on each HETATM record; charge is optional. 1021 1022 Changes in ATOM/HETATM records will require standardization in atom and residue nomenclature. This nomenclature is described in the Chemical Component Dictionary, U{ftp://ftp.wwpdb.org/pub/pdb/data/monomers}. 1023 1024 1025 Record Format 1026 ------------- 1027 1028 The format is:: 1029 ______________________________________________________________________________________________ 1030 | | | | | 1031 | Columns | Data type | Field | Definition | 1032 |_________|______________|______________|____________________________________________________| 1033 | | | | | 1034 | 1 - 6 | Record name | "HETATM" | | 1035 | 7 - 11 | Integer | serial | Atom serial number. | 1036 | 13 - 16 | Atom | name | Atom name. | 1037 | 17 | Character | altLoc | Alternate location indicator. | 1038 | 18 - 20 | Residue name | resName | Residue name. | 1039 | 22 | Character | chainID | Chain identifier. | 1040 | 23 - 26 | Integer | resSeq | Residue sequence number. | 1041 | 27 | AChar | iCode | Code for insertion of residues. | 1042 | 31 - 38 | Real(8.3) | x | Orthogonal coordinates for X. | 1043 | 39 - 46 | Real(8.3) | y | Orthogonal coordinates for Y. | 1044 | 47 - 54 | Real(8.3) | z | Orthogonal coordinates for Z. | 1045 | 55 - 60 | Real(6.2) | occupancy | Occupancy. | 1046 | 61 - 66 | Real(6.2) | tempFactor | Temperature factor. | 1047 | 77 - 78 | LString(2) | element | Element symbol; right-justified. | 1048 | 79 - 80 | LString(2) | charge | Charge on the atom. | 1049 |_________|______________|______________|____________________________________________________| 1050 1051 1052 Details 1053 ------- 1054 1055 The x, y, z coordinates are in Angstrom units. 1056 1057 No ordering is specified for polysaccharides. 1058 1059 See the HET section of this document regarding naming of heterogens. See the Chemical Component Dictionary for residue names, formulas, and topology of the HET groups that have appeared so far in the PDB (see U{ftp://ftp.wwpdb.org/pub/pdb/data/monomers}). 1060 1061 If the depositor provides the data, then the isotropic B value is given for the temperature factor. 1062 1063 If there are neither isotropic B values provided by the depositor, nor anisotropic temperature factors in ANISOU, then the default value of 0.0 is used for the temperature factor. 1064 1065 Insertion codes and element naming are fully described in the ATOM section of this document. 1066 1067 1068 Verification/Validation/Value Authority Control 1069 ----------------------------------------------- 1070 1071 Processing programs check ATOM/HETATM records for PDB file format, sequence information, and packing. 1072 1073 1074 Relationships to Other Record Types 1075 ----------------------------------- 1076 1077 HETATM records must have corresponding HET, HETNAM, FORMUL and CONECT records, except for waters. 1078 1079 1080 Example 1081 ------- 1082 1083 Example 1:: 1084 1085 1 2 3 4 5 6 7 8 1086 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1087 HETATM 8237 MG MG A1001 13.872 -2.555 -29.045 1.00 27.36 MG 1088 1089 HETATM 3835 FE HEM A 1 17.140 3.115 15.066 1.00 14.14 FE 1090 HETATM 8238 S SO4 A2001 10.885 -15.746 -14.404 1.00 47.84 S 1091 HETATM 8239 O1 SO4 A2001 11.191 -14.833 -15.531 1.00 50.12 O 1092 HETATM 8240 O2 SO4 A2001 9.576 -16.338 -14.706 1.00 48.55 O 1093 HETATM 8241 O3 SO4 A2001 11.995 -16.703 -14.431 1.00 49.88 O 1094 HETATM 8242 O4 SO4 A2001 10.932 -15.073 -13.100 1.00 49.91 O 1095 1096 1097 @param file: The file to write the record to. 1098 @type file: file object 1099 @keyword serial: The atom serial number. 1100 @type serial: int 1101 @keyword name: The atom name. 1102 @type name: str 1103 @keyword alt_loc: The alternate location indicator. 1104 @type alt_loc: str 1105 @keyword res_name: The residue name. 1106 @type res_name: str 1107 @keyword chain_id: The chain identifier. 1108 @type chain_id: str 1109 @keyword res_seq: The sequence number. 1110 @type res_seq: int 1111 @keyword icode: The insertion code. 1112 @type icode: str 1113 @keyword x: Orthogonal coordinates for X in Angstroms. 1114 @type x: float 1115 @keyword y: Orthogonal coordinates for Y in Angstroms. 1116 @type y: float 1117 @keyword z: Orthogonal coordinates for Z in Angstroms. 1118 @type z: float 1119 @keyword occupancy: Occupancy. 1120 @type occupancy: float 1121 @keyword temp_factor: Temperature factor. 1122 @type temp_factor: float 1123 @keyword element: Element symbol. 1124 @type element: str 1125 @keyword charge: Charge on the atom. 1126 @type charge: int 1127 """ 1128 1129 # Coordinate bounds. 1130 pdb_min = -999.999 1131 pdb_max = 9999.999 1132 coord = [x, y, z] 1133 for i in range(3): 1134 if coord[i] != '' and coord[i] < pdb_min: 1135 coord[i] = pdb_min 1136 if coord[i] != '' and coord[i] > pdb_max: 1137 coord[i] = pdb_max 1138 1139 # The formatted record. 1140 text = "%-6s%5s %4s%1s%3s %1s%4s%1s %8.3f%8.3f%8.3f%6.2f%6.2f %2s%2s" % ( 1141 'HETATM', 1142 _handle_none(serial), 1143 _handle_none(name), 1144 _handle_none(alt_loc), 1145 _handle_none(res_name), 1146 _handle_none(chain_id), 1147 _handle_none(res_seq), 1148 _handle_none(icode), 1149 _handle_none(coord[0]), 1150 _handle_none(coord[1]), 1151 _handle_none(coord[2]), 1152 _handle_none(occupancy), 1153 _handle_none(temp_factor), 1154 _handle_none(element), 1155 _handle_none(charge) 1156 ) 1157 1158 # Validate. 1159 _record_validate(text) 1160 1161 # Write out the formatted record. 1162 file.write(text) 1163 file.write('\n')

1164 1165

1166 -def hetnam(file, continuation='', het_id='', text=''):

1167 """Generate the HETNAM record. 1168 1169 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect4.html#HETNAM}. 1170 1171 HETNAM 1172 ====== 1173 1174 Overview 1175 -------- 1176 1177 This record gives the chemical name of the compound with the given hetID. 1178 1179 1180 Record Format 1181 ------------- 1182 1183 The format is:: 1184 ______________________________________________________________________________________________ 1185 | | | | | 1186 | Columns | Data type | Field | Definition | 1187 |_________|______________|______________|____________________________________________________| 1188 | | | | | 1189 | 1 - 6 | Record name | "HETNAM" | | 1190 | 9 - 10 | Continuation | continuation | Allows concatenation of multiple records. | 1191 | 12 - 14 | LString(3) | hetID | Het identifier, right-justified. | 1192 | 16 - 70 | String | text | Chemical name. | 1193 |_________|______________|______________|____________________________________________________| 1194 1195 1196 Details 1197 ------- 1198 1199 Each hetID is assigned a unique chemical name for the HETNAM record, see U{ftp://ftp.wwpdb.org/pub/pdb/data/monomers}. 1200 1201 Other names for the group are given on HETSYN records. 1202 1203 PDB entries follow IUPAC/IUB naming conventions to describe groups systematically. 1204 1205 The special character "~" is used to indicate superscript in a heterogen name. For example: N6 will be listed in the HETNAM section as N~6~, with the ~ character indicating both the start and end of the superscript in the name, e.g.: 1206 1207 - N-(BENZYLSULFONYL)SERYL-N~1~-{4-[AMINO(IMINO)METHYL]BENZYL}GLYCINAMIDE 1208 1209 Continuation of chemical names onto subsequent records is allowed. 1210 1211 Only one HETNAM record is included for a given hetID, even if the same hetID appears on more than one HET record. 1212 1213 1214 Verification/Validation/Value Authority Control 1215 ----------------------------------------------- 1216 1217 For each het group that appears in the entry, the corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records must appear. The HETNAM record is generated automatically using the Chemical Component Dictionary and information from HETATM records. 1218 1219 1220 Relationships to Other Record Types 1221 ----------------------------------- 1222 1223 For each het group that appears in the entry, there must be corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records. HETSYN and LINK records may also be created. 1224 1225 1226 Example 1227 ------- 1228 1229 Example 1:: 1230 1231 1 2 3 4 5 6 7 8 1232 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1233 HETNAM NAG N-ACETYL-D-GLUCOSAMINE 1234 HETNAM SAD BETA-METHYLENE SELENAZOLE-4-CARBOXAMIDE ADENINE 1235 HETNAM 2 SAD DINUCLEOTIDE 1236 1237 HETNAM UDP URIDINE-5'-DIPHOSPHATE 1238 1239 HETNAM UNX UNKNOWN ATOM OR ION 1240 HETNAM UNL UNKNOWN LIGAND 1241 1242 HETNAM B3P 2-[3-(2-HYDROXY-1,1-DIHYDROXYMETHYL-ETHYLAMINO)- 1243 HETNAM 2 B3P PROPYLAMINO]-2-HYDROXYMETHYL-PROPANE-1,3-DIOL 1244 1245 1246 @param file: The file to write the record to. 1247 @type file: file object 1248 @keyword continuation: Allows concatenation of multiple records. 1249 @type continuation: int 1250 @keyword het_id: The Het identifier. 1251 @type het_id: str 1252 @keyword text: The chemical name. 1253 @type text: str 1254 """ 1255 1256 # The formatted record. 1257 text = "%-6s %2s %3s %-55s%10s" % ( 1258 'HETNAM', 1259 _handle_none(continuation), 1260 _handle_none(het_id), 1261 _handle_text(text), 1262 '' 1263 ) 1264 1265 # Validate. 1266 _record_validate(text) 1267 1268 # Write out the formatted record. 1269 file.write(text) 1270 file.write('\n')

1271 1272

1273 -def master(file, num_remark=0, num_het=0, num_helix=0, num_sheet=0, num_turn=0, num_site=0, num_xform=0, num_coord=0, num_ter=0, num_conect=0, num_seq=0):

1274 """Generate the MASTER record. 1275 1276 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect11.html#MASTER}. 1277 1278 MASTER 1279 ====== 1280 1281 Overview 1282 -------- 1283 1284 The MASTER record is a control record for bookkeeping. It lists the number of lines in the coordinate entry or file for selected record types. MASTER records only the first model when there are multiple models in the coordinates. 1285 1286 1287 Record Format 1288 ------------- 1289 1290 The format is:: 1291 ______________________________________________________________________________________________ 1292 | | | | | 1293 | Columns | Data type | Field | Definition | 1294 |_________|______________|______________|____________________________________________________| 1295 | | | | | 1296 | 1 - 6 | Record name | "MASTER" | | 1297 | 11 - 15 | Integer | numRemark | Number of REMARK records | 1298 | 16 - 20 | Integer | "0" | | 1299 | 21 - 25 | Integer | numHet | Number of HET records | 1300 | 26 - 30 | Integer | numHelix | Number of HELIX records | 1301 | 31 - 35 | Integer | numSheet | Number of SHEET records | 1302 | 36 - 40 | Integer | numTurn | deprecated | 1303 | 41 - 45 | Integer | numSite | Number of SITE records | 1304 | 46 - 50 | Integer | numXform | Number of coordinate transformation records | 1305 | | | | (ORIGX+SCALE+MTRIX) | 1306 | 51 - 55 | Integer | numCoord | Number of atomic coordinate records (ATOM+HETATM) | 1307 | 56 - 60 | Integer | numTer | Number of TER records | 1308 | 61 - 65 | Integer | numConect | Number of CONECT records | 1309 | 66 - 70 | Integer | numSeq | Number of SEQRES records | 1310 |_________|______________|______________|____________________________________________________| 1311 1312 1313 Details 1314 ------- 1315 1316 MASTER gives checksums of the number of records in the entry, for selected record types. 1317 1318 MASTER records only the first model when there are multiple models in the coordinates. 1319 1320 1321 Verification/Validation/Value Authority Control 1322 ----------------------------------------------- 1323 1324 The MASTER line is automatically generated. 1325 1326 1327 Relationships to Other Record Types 1328 ----------------------------------- 1329 1330 MASTER presents a checksum of the lines present for each of the record types listed above. 1331 1332 1333 Example 1334 ------- 1335 1336 Example 1:: 1337 1338 1 2 3 4 5 6 7 8 1339 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1340 MASTER 40 0 0 0 0 0 0 6 2930 2 0 29 1341 1342 1343 @param file: The file to write the record to. 1344 @type file: file object 1345 @keyword num_remark: The number of REMARK records. 1346 @type num_remark: int 1347 @keyword num_het: The number of HET records. 1348 @type num_het: int 1349 @keyword num_helix: The number of HELIX records. 1350 @type num_helix: int 1351 @keyword num_sheet: The number of SHEET records. 1352 @type num_sheet: int 1353 @keyword num_turn: Depreciated. 1354 @type num_turn: int 1355 @keyword num_site: The number of SITE records. 1356 @type num_site: int 1357 @keyword num_xform: The number of coordinate transformation records (ORIGX+SCALE+MTRIX). 1358 @type num_xform: int 1359 @keyword num_coord: The number of atomic coordinate records (ATOM+HETATM). 1360 @type num_coord: int 1361 @keyword num_ter: The number of TER records. 1362 @type num_ter: int 1363 @keyword num_conect: The number of CONECT records. 1364 @type num_conect: int 1365 @keyword num_seq The number of SEQRES records. 1366 @type num_seq int 1367 """ 1368 1369 # The formatted record. 1370 text = "%-6s %5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%5s%10s" % ( 1371 'MASTER', 1372 _handle_none(num_remark), 1373 0, 1374 _handle_none(num_het), 1375 _handle_none(num_helix), 1376 _handle_none(num_sheet), 1377 _handle_none(num_turn), 1378 _handle_none(num_site), 1379 _handle_none(num_xform), 1380 _handle_none(num_coord), 1381 _handle_none(num_ter), 1382 _handle_none(num_conect), 1383 _handle_none(num_seq), 1384 '' 1385 ) 1386 1387 # Validate. 1388 _record_validate(text) 1389 1390 # Write out the formatted record. 1391 file.write(text) 1392 file.write('\n')

1393 1394

1395 -def model(file, serial=''):

1396 """Generate the MODEL record. 1397 1398 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html#MODEL}. 1399 1400 MODEL 1401 ===== 1402 1403 Overview 1404 -------- 1405 1406 The MODEL record specifies the model serial number when multiple models of the same structure are presented in a single coordinate entry, as is often the case with structures determined by NMR. 1407 1408 1409 Record Format 1410 ------------- 1411 1412 The format is:: 1413 ______________________________________________________________________________________________ 1414 | | | | | 1415 | Columns | Data type | Field | Definition | 1416 |_________|______________|______________|____________________________________________________| 1417 | | | | | 1418 | 1 - 6 | Record name | "MODEL " | | 1419 | 11 - 14 | Integer | serial | Model serial number. | 1420 |_________|______________|______________|____________________________________________________| 1421 1422 1423 Details 1424 ------- 1425 1426 This record is used only when more than one model appears in an entry. Generally, it is employed mainly for NMR structures. The chemical connectivity should be the same for each model. ATOM, HETATM, ANISOU, and TER records for each model structure and are interspersed as needed between MODEL and ENDMDL records. 1427 1428 The numbering of models is sequential, beginning with 1. 1429 1430 All models in a deposition should be superimposed in an appropriate author determined manner and only one superposition method should be used. Structures from different experiments, or different domains of a structure should not be superimposed and deposited as models of a deposition. 1431 1432 All models in an NMR ensemble should be homogeneous - each model should have the exact same atoms (hydrogen and heavy atoms), sequence and chemistry. 1433 1434 All models in an NMR entry should have hydrogen atoms. 1435 1436 Deposition of minimized average structure must be accompanied with ensemble and must be homogeneous with ensemble. 1437 1438 A model cannot have more than 99,999 atoms. Where the entry does not contain an ensemble of models, then the entry cannot have more than 99,999 atoms. Entries that go beyond this atom limit must be split into multiple entries, each containing no more than the limits specified above. 1439 1440 1441 Verification/Validation/Value Authority Control 1442 ----------------------------------------------- 1443 1444 Entries with multiple models in the NUMMDL record are checked for corresponding pairs of MODEL/ ENDMDL records, and for consecutively numbered models. 1445 1446 1447 Relationships to Other Record Types 1448 ----------------------------------- 1449 1450 Each MODEL must have a corresponding ENDMDL record. 1451 1452 1453 Examples 1454 -------- 1455 1456 Example 1:: 1457 1458 1 2 3 4 5 6 7 8 1459 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1460 MODEL 1 1461 ATOM 1 N ALA A 1 11.104 6.134 -6.504 1.00 0.00 N 1462 ATOM 2 CA ALA A 1 11.639 6.071 -5.147 1.00 0.00 C 1463 ... 1464 ... 1465 ... 1466 ATOM 293 1HG GLU A 18 -14.861 -4.847 0.361 1.00 0.00 H 1467 ATOM 294 2HG GLU A 18 -13.518 -3.769 0.084 1.00 0.00 H 1468 TER 295 GLU A 18 1469 ENDMDL 1470 MODEL 2 1471 ATOM 296 N ALA A 1 10.883 6.779 -6.464 1.00 0.00 N 1472 ATOM 297 CA ALA A 1 11.451 6.531 -5.142 1.00 0.00 C 1473 ... 1474 ... 1475 ATOM 588 1HG GLU A 18 -13.363 -4.163 -2.372 1.00 0.00 H 1476 ATOM 589 2HG GLU A 18 -12.634 -3.023 -3.475 1.00 0.00 H 1477 TER 590 GLU A 18 1478 ENDMDL 1479 1480 Example 2:: 1481 1482 1 2 3 4 5 6 7 8 1483 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1484 MODEL 1 1485 ATOM 1 N AALA A 1 72.883 57.697 56.410 0.50 83.80 N 1486 ATOM 2 CA AALA A 1 73.796 56.531 56.644 0.50 84.78 C 1487 ATOM 3 C AALA A 1 74.549 56.551 57.997 0.50 85.05 C 1488 ATOM 4 O AALA A 1 73.951 56.413 59.075 0.50 84.77 O 1489 ... 1490 ... 1491 ... 1492 HETATM37900 O AHOH 490 -24.915 147.513 36.413 0.50 41.86 O 1493 HETATM37901 O AHOH 491 -28.699 130.471 22.248 0.50 36.06 O 1494 HETATM37902 O AHOH 492 -33.309 184.488 26.176 0.50 15.00 O 1495 ENDMDL 1496 MODEL 2 1497 ATOM 1 N BALA A 1 72.883 57.697 56.410 0.50 83.80 N 1498 ATOM 2 CA BALA A 1 73.796 56.531 56.644 0.50 84.78 C 1499 ATOM 3 C BALA A 1 74.549 56.551 57.997 0.50 85.05 C 1500 ATOM 4 O BALA A 1 73.951 56.413 59.075 0.50 84.77 O 1501 ATOM 5 CB BALA A 1 74.804 56.369 55.453 0.50 84.29 C 1502 ATOM 6 N BASP A 2 75.872 56.703 57.905 0.50 85.59 N 1503 ATOM 7 CA BASP A 2 76.801 56.651 59.048 0.50 85.67 C 1504 ATOM 8 C BASP A 2 76.283 57.361 60.309 0.50 84.80 C 1505 ... 1506 1507 1508 @param file: The file to write the record to. 1509 @type file: file object 1510 @keyword serial: The model serial number. 1511 @type serial: int 1512 """ 1513 1514 # The formatted record. 1515 text = "%-6s %4i%66s" % ( 1516 'MODEL', 1517 _handle_none(serial), 1518 '' 1519 ) 1520 1521 # Validate. 1522 _record_validate(text) 1523 1524 # Write out the formatted record. 1525 file.write(text) 1526 file.write('\n')

1527 1528

1529 -def remark(file, num='', remark=''):

1530 """Generate the REMARK record. 1531 1532 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/remarks.html}. 1533 1534 REMARK 1535 ====== 1536 1537 Overview 1538 -------- 1539 1540 REMARK records present experimental details, annotations, comments, and information not included in other records. In a number of cases, REMARKs are used to expand the contents of other record types. A new level of structure is being used for some REMARK records. This is expected to facilitate searching and will assist in the conversion to a relational database. 1541 1542 The very first line of every set of REMARK records is used as a spacer to aid in reading:: 1543 1544 ______________________________________________________________________________________________ 1545 | | | | | 1546 | Columns | Data type | Field | Definition | 1547 |_________|_____________|_____________|______________________________________________________| 1548 | | | | | 1549 | 1 - 6 | Record name | "REMARK" | | 1550 | 8 - 10 | Integer | remarkNum | Remark number. It is not an error for remark n to | 1551 | | | | exist in an entry when remark n-1 does not. | 1552 | 12 - 79 | LString | empty | Left as white space in first line of each new | 1553 | | | | remark. | 1554 |_________|_____________|_____________|______________________________________________________| 1555 1556 1557 @param file: The file to write the record to. 1558 @type file: file object 1559 @keyword num: The remarkNum value. 1560 @type num: int 1561 @keyword remark: The remark. 1562 @type remark: str 1563 """ 1564 1565 # Initialise. 1566 lines = [] 1567 1568 # Handle empty lines. 1569 if remark == None: 1570 lines.append("%-6s %3s %-68s " % ("REMARK", num, '')) 1571 1572 # The formatted record, splitting across lines if needed. 1573 else: 1574 for line in wrap(remark, 68): 1575 lines.append("%-6s %3s %-68s " % ("REMARK", num, line.upper())) 1576 1577 # Output all lines. 1578 for text in lines: 1579 # Validate. 1580 _record_validate(text) 1581 1582 # Write out the formatted record. 1583 file.write(text) 1584 file.write('\n')

1585 1586

1587 -def sheet(file, strand='', sheet_id='', num_strands='', init_res_name='', init_chain_id='', init_seq_num='', init_icode='', end_res_name='', end_chain_id='', end_seq_num='', end_icode='', sense='', cur_atom='', cur_res_name='', cur_chain_id='', cur_res_seq='', cur_icode='', prev_atom='', prev_res_name='', prev_chain_id='', prev_res_seq='', prev_icode=''):

1588 """Generate the SHEET record. 1589 1590 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect5.html#SHEET}. 1591 1592 SHEET 1593 ===== 1594 1595 Overview 1596 -------- 1597 1598 SHEET records are used to identify the position of sheets in the molecule. Sheets are both named and numbered. The residues where the sheet begins and ends are noted. 1599 1600 1601 Record Format 1602 ------------- 1603 1604 The format is:: 1605 ______________________________________________________________________________________________ 1606 | | | | | 1607 | Columns | Data type | Field | Definition | 1608 |_________|______________|______________|____________________________________________________| 1609 | | | | | 1610 | 1 - 6 | Record name | "SHEET " | | 1611 | 8 - 10 | Integer | strand | Strand number which starts at 1 for each strand | 1612 | | | | within a sheet and increases by one. | 1613 | 12 - 14 | LString(3) | sheetID | Sheet identifier. | 1614 | 15 - 16 | Integer | numStrands | Number of strands in sheet. | 1615 | 18 - 20 | Residue name | initResName | Residue name of initial residue. | 1616 | 22 | Character | initChainID | Chain identifier of initial residue in strand. | 1617 | 23 - 26 | Integer | initSeqNum | Sequence number of initial residue in strand. | 1618 | 27 | AChar | initICode | Insertion code of initial residue in strand. | 1619 | 29 - 31 | Residue name | endResName | Residue name of terminal residue. | 1620 | 33 | Character | endChainID | Chain identifier of terminal residue. | 1621 | 34 - 37 | Integer | endSeqNum | Sequence number of terminal residue. | 1622 | 38 | AChar | endICode | Insertion code of terminal residue. | 1623 | 39 - 40 | Integer | sense | Sense of strand with respect to previous strand in | 1624 | | | | the sheet. 0 if first strand, 1 if parallel, and | 1625 | | | | -1 if anti-parallel. | 1626 | 42 - 45 | Atom | curAtom | Registration. Atom name in current strand. | 1627 | 46 - 48 | Residue name | curResName | Registration. Residue name in current strand. | 1628 | 50 | Character | curChainId | Registration. Chain identifier in current strand. | 1629 | 51 - 54 | Integer | curResSeq | Registration. Residue sequence number in current | 1630 | | | | strand. | 1631 | 55 | AChar | curICode | Registration. Insertion code in current strand. | 1632 | 57 - 60 | Atom | prevAtom | Registration. Atom name in previous strand. | 1633 | 61 - 63 | Residue name | prevResName | Registration. Residue name in previous strand. | 1634 | 65 | Character | prevChainId | Registration. Chain identifier in previous strand.| 1635 | 66 - 69 | Integer | prevResSeq | Registration. Residue sequence number in previous | 1636 | | | | strand. | 1637 | 70 | AChar | prevICode | Registration. Insertion code in previous strand. | 1638 |_________|______________|______________|____________________________________________________| 1639 1640 1641 Details 1642 ------- 1643 1644 The initial residue for a strand is its N-terminus. Strand registration information is provided in columns 39 - 70. Strands are listed starting with one edge of the sheet and continuing to the spatially adjacent strand. 1645 1646 The sense in columns 39 - 40 indicates whether strand n is parallel (sense = 1) or anti-parallel (sense = -1) to strand n-1. Sense is equal to zero (0) for the first strand of a sheet. 1647 1648 The registration (columns 42 - 70) of strand n to strand n-1 may be specified by one hydrogen bond between each such pair of strands. This is done by providing the hydrogen bonding between the current and previous strands. No register information should be provided for the first strand. 1649 1650 Split strands, or strands with two or more runs of residues from discontinuous parts of the amino acid sequence, are explicitly listed. Detail description can be included in the REMARK 700. 1651 1652 1653 Relationships to Other Record Types 1654 ----------------------------------- 1655 1656 If the entry contains bifurcated sheets or beta-barrels, the relevant REMARK 700 records must be provided. See the REMARK section for details. 1657 1658 1659 Examples 1660 -------- 1661 1662 Example 1:: 1663 1664 1 2 3 4 5 6 7 8 1665 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1666 SHEET 1 A 5 THR A 107 ARG A 110 0 1667 SHEET 2 A 5 ILE A 96 THR A 99 -1 N LYS A 98 O THR A 107 1668 SHEET 3 A 5 ARG A 87 SER A 91 -1 N LEU A 89 O TYR A 97 1669 SHEET 4 A 5 TRP A 71 ASP A 75 -1 N ALA A 74 O ILE A 88 1670 SHEET 5 A 5 GLY A 52 PHE A 56 -1 N PHE A 56 O TRP A 71 1671 SHEET 1 B 5 THR B 107 ARG B 110 0 1672 SHEET 2 B 5 ILE B 96 THR B 99 -1 N LYS B 98 O THR B 107 1673 SHEET 3 B 5 ARG B 87 SER B 91 -1 N LEU B 89 O TYR B 97 1674 SHEET 4 B 5 TRP B 71 ASP B 75 -1 N ALA B 74 O ILE B 88 1675 SHEET 5 B 5 GLY B 52 ILE B 55 -1 N ASP B 54 O GLU B 73 1676 1677 The sheet presented as BS1 below is an eight-stranded beta-barrel. This is represented by a nine-stranded sheet in which the first and last strands are identical:: 1678 1679 SHEET 1 BS1 9 VAL 13 ILE 17 0 1680 SHEET 2 BS1 9 ALA 70 ILE 73 1 O TRP 72 N ILE 17 1681 SHEET 3 BS1 9 LYS 127 PHE 132 1 O ILE 129 N ILE 73 1682 SHEET 4 BS1 9 GLY 221 ASP 225 1 O GLY 221 N ILE 130 1683 SHEET 5 BS1 9 VAL 248 GLU 253 1 O PHE 249 N ILE 222 1684 SHEET 6 BS1 9 LEU 276 ASP 278 1 N LEU 277 O GLY 252 1685 SHEET 7 BS1 9 TYR 310 THR 318 1 O VAL 317 N ASP 278 1686 SHEET 8 BS1 9 VAL 351 TYR 356 1 O VAL 351 N THR 318 1687 SHEET 9 BS1 9 VAL 13 ILE 17 1 N VAL 14 O PRO 352 1688 1689 The sheet structure of this example is bifurcated. In order to represent this feature, two sheets are defined. Strands 2 and 3 of BS7 and BS8 are identical:: 1690 1691 SHEET 1 BS7 3 HIS 662 THR 665 0 1692 SHEET 2 BS7 3 LYS 639 LYS 648 -1 N PHE 643 O HIS 662 1693 SHEET 3 BS7 3 ASN 596 VAL 600 -1 N TYR 598 O ILE 646 1694 SHEET 1 BS8 3 ASN 653 TRP 656 0 1695 SHEET 2 BS8 3 LYS 639 LYS 648 -1 N LYS 647 O THR 655 1696 SHEET 3 BS8 3 ASN 596 VAL 600 -1 N TYR 598 O ILE 646 1697 1698 1699 @param file: The file to write the record to. 1700 @type file: file object 1701 @keyword strand: The strand number. 1702 @type strand: int 1703 @keyword sheet_id: The sheet identifier. 1704 @type sheet_id: str 1705 @keyword num_strands: The number of strands in sheet. 1706 @type num_strands: int 1707 @keyword init_res_name: The residue name of initial residue. 1708 @type init_res_name: str 1709 @keyword init_chain_id: The chain identifier of initial residue in strand. 1710 @type init_chain_id: str 1711 @keyword init_seq_num: The sequence number of initial residue in strand. 1712 @type init_seq_num: int 1713 @keyword init_icode: The insertion code of initial residue in strand. 1714 @type init_icode: str 1715 @keyword end_res_name: The residue name of terminal residue. 1716 @type end_res_name: str 1717 @keyword end_chain_id: The chain identifier of terminal residue. 1718 @type end_chain_id: str 1719 @keyword end_seq_num: The sequence number of terminal residue. 1720 @type end_seq_num: int 1721 @keyword end_icode: The insertion code of terminal residue. 1722 @type end_icode: str 1723 @keyword sense: The sense of strand with respect to previous strand. 1724 @type sense: int 1725 @keyword cur_atom: The atom name in current strand. 1726 @type cur_atom: str 1727 @keyword cur_res_name: The residue name in current strand. 1728 @type cur_res_name: str 1729 @keyword cur_chain_id: The chain identifier in current strand. 1730 @type cur_chain_id: str 1731 @keyword cur_res_seq: The residue sequence number in current strand. 1732 @type cur_res_seq: int 1733 @keyword cur_icode: The insertion code in current strand. 1734 @type cur_icode: str 1735 @keyword prev_atom: The atom name in previous strand. 1736 @type prev_atom: str 1737 @keyword prev_res_name: The residue name in previous strand. 1738 @type prev_res_name: str 1739 @keyword prev_chain_id: The chain identifier in previous strand. 1740 @type prev_chain_id: str 1741 @keyword prev_res_seq: The residue sequence number in previous strand. 1742 @type prev_res_seq: int 1743 @keyword prev_icode: The insertion code in previous strand. 1744 @type prev_icode: str 1745 """ 1746 1747 # The formatted record. 1748 text = "%-6s %3s %3s%2s %3s %1s%4s%1s %3s %1s%4s%1s%2s %4s%3s %1s%4s%1s %4s%3s %1s%4s%1s%10s" % ( 1749 'SHEET', 1750 _handle_none(strand), 1751 _handle_none(sheet_id), 1752 _handle_none(num_strands), 1753 _handle_none(init_res_name), 1754 _handle_none(init_chain_id), 1755 _handle_none(init_seq_num), 1756 _handle_none(init_icode), 1757 _handle_none(end_res_name), 1758 _handle_none(end_chain_id), 1759 _handle_none(end_seq_num), 1760 _handle_none(end_icode), 1761 _handle_none(sense), 1762 _handle_atom_name(cur_atom), 1763 _handle_none(cur_res_name), 1764 _handle_none(cur_chain_id), 1765 _handle_none(cur_res_seq), 1766 _handle_none(cur_icode), 1767 _handle_atom_name(prev_atom), 1768 _handle_none(prev_res_name), 1769 _handle_none(prev_chain_id), 1770 _handle_none(prev_res_seq), 1771 _handle_none(prev_icode), 1772 '' 1773 ) 1774 1775 # Validate. 1776 _record_validate(text) 1777 1778 # Write out the formatted record. 1779 file.write(text) 1780 file.write('\n')

1781 1782

1783 -def ter(file, serial='', res_name='', chain_id='', res_seq='', icode=''):

1784 """Generate the TER record. 1785 1786 The following is the PDB v3.3 documentation U{http://www.wwpdb.org/documentation/file-format/format33/sect9.html#TER}. 1787 1788 TER 1789 === 1790 1791 Overview 1792 -------- 1793 1794 The TER record indicates the end of a list of ATOM/HETATM records for a chain. 1795 1796 1797 Record Format 1798 ------------- 1799 1800 The format is:: 1801 ______________________________________________________________________________________________ 1802 | | | | | 1803 | Columns | Data type | Field | Definition | 1804 |_________|______________|______________|____________________________________________________| 1805 | | | | | 1806 | 1 - 6 | Record name | "TER " | | 1807 | 7 - 11 | Integer | serial | Serial number. | 1808 | 18 - 20 | Residue name | resName | Residue name. | 1809 | 22 | Character | chainID | Chain identifier. | 1810 | 23 - 26 | Integer | resSeq | Residue sequence number. | 1811 | 27 | AChar | iCode | Insertion code. | 1812 |_________|______________|______________|____________________________________________________| 1813 1814 1815 Details 1816 ------- 1817 1818 Every chain of ATOM/HETATM records presented on SEQRES records is terminated with a TER record. 1819 1820 The TER records occur in the coordinate section of the entry, and indicate the last residue presented for each polypeptide and/or nucleic acid chain for which there are determined coordinates. For proteins, the residue defined on the TER record is the carboxy-terminal residue; for nucleic acids it is the 3'-terminal residue. 1821 1822 For a cyclic molecule, the choice of termini is arbitrary. 1823 1824 Terminal oxygen atoms are presented as OXT for proteins, and as O5' or OP3 for nucleic acids. These atoms are present only if the last residue in the polymer is truly the last residue in the SEQRES. 1825 1826 The TER record has the same residue name, chain identifier, sequence number and insertion code as the terminal residue. The serial number of the TER record is one number greater than the serial number of the ATOM/HETATM preceding the TER. 1827 1828 1829 Verification/Validation/Value Authority Control 1830 ----------------------------------------------- 1831 1832 TER must appear at the terminal carboxyl end or 3' end of a chain. For proteins, there is usually a terminal oxygen, labeled OXT. The validation program checks for the occurrence of TER and OXT records. 1833 1834 1835 Relationships to Other Record Types 1836 ----------------------------------- 1837 1838 The residue name appearing on the TER record must be the same as the residue name of the immediately preceding ATOM or non-water HETATM record. 1839 1840 1841 Example 1842 ------- 1843 1844 Example 1:: 1845 1846 1 2 3 4 5 6 7 8 1847 12345678901234567890123456789012345678901234567890123456789012345678901234567890 1848 ATOM 601 N LEU A 75 -17.070 -16.002 2.409 1.00 55.63 N 1849 ATOM 602 CA LEU A 75 -16.343 -16.746 3.444 1.00 55.50 C 1850 ATOM 603 C LEU A 75 -16.499 -18.263 3.300 1.00 55.55 C 1851 ATOM 604 O LEU A 75 -16.645 -18.789 2.195 1.00 55.50 O 1852 ATOM 605 CB LEU A 75 -16.776 -16.283 4.844 1.00 55.51 C 1853 TER 606 LEU A 75 1854 ... 1855 ATOM 1185 O LEU B 75 26.292 -4.310 16.940 1.00 55.45 O 1856 ATOM 1186 CB LEU B 75 23.881 -1.551 16.797 1.00 55.32 C 1857 TER 1187 LEU B 75 1858 HETATM 1188 H2 SRT A1076 -17.263 11.260 28.634 1.00 59.62 H 1859 HETATM 1189 HA SRT A1076 -19.347 11.519 28.341 1.00 59.42 H 1860 HETATM 1190 H3 SRT A1076 -17.157 14.303 28.677 1.00 58.00 H 1861 HETATM 1191 HB SRT A1076 -15.110 13.610 28.816 1.00 57.77 H 1862 HETATM 1192 O1 SRT A1076 -17.028 11.281 31.131 1.00 62.63 O 1863 1864 ATOM 295 HB2 ALA A 18 4.601 -9.393 7.275 1.00 0.00 H 1865 ATOM 296 HB3 ALA A 18 3.340 -9.147 6.043 1.00 0.00 H 1866 TER 297 ALA A 18 1867 ENDMDL 1868 1869 1870 @param file: The file to write the record to. 1871 @type file: file object 1872 @keyword serial: The atom serial number. 1873 @type serial: int 1874 @keyword res_name: The residue name. 1875 @type res_name: str 1876 @keyword chain_id: The chain identifier. 1877 @type chain_id: str 1878 @keyword res_seq: The sequence number. 1879 @type res_seq: int 1880 @keyword icode: The insertion code. 1881 @type icode: str 1882 """ 1883 1884 # Write out the formatted record. 1885 text = "%-6s%5s %3s %1s%4s%1s%53s" % ( 1886 'TER', 1887 _handle_none(serial), 1888 _handle_none(res_name), 1889 _handle_none(chain_id), 1890 _handle_none(res_seq), 1891 _handle_none(icode), 1892 '' 1893 ) 1894 1895 # Validate. 1896 _record_validate(text) 1897 1898 # Write out the formatted record. 1899 file.write(text) 1900 file.write('\n')

1901

Source Code for Module lib.structure.pdb_write