generic_fns.structure.main

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Python module imports. 23 from math import sqrt 24 from minfx.generic import generic_minimise 25 from numpy import array, dot, float64, ndarray, zeros 26 from numpy.linalg import norm 27 from os import F_OK, access 28 from re import search 29 import sys 30 from warnings import warn 31 32 # relax module imports. 33 from generic_fns import molmol, relax_re 34 from generic_fns.interatomic import interatomic_loop 35 from generic_fns.mol_res_spin import create_spin, exists_mol_res_spin_data, generate_spin_id_unique, linear_ave, return_molecule, return_residue, return_spin, spin_loop 36 from generic_fns import pipes 37 from generic_fns.structure.api_base import Displacements 38 from generic_fns.structure.internal import Internal 39 from generic_fns.structure.scientific import Scientific_data 40 from generic_fns.structure.statistics import atomic_rmsd 41 from generic_fns.structure.superimpose import fit_to_first, fit_to_mean 42 from maths_fns.ens_pivot_finder import Pivot_finder 43 from relax_errors import RelaxError, RelaxFileError, RelaxNoPdbError, RelaxNoSequenceError 44 from relax_io import get_file_path, open_write_file, write_data, write_spin_data 45 from relax_warnings import RelaxWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning 46 47

48 -def add_atom(mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

49 """Add a new atom to the structural data object. 50 51 @keyword mol_name: The name of the molecule. 52 @type mol_name: str 53 @keyword atom_name: The atom name, e.g. 'H1'. 54 @type atom_name: str or None 55 @keyword res_name: The residue name. 56 @type res_name: str or None 57 @keyword res_num: The residue number. 58 @type res_num: int or None 59 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 60 @type pos: rank-1 or rank-2 array or list of float 61 @keyword element: The element symbol. 62 @type element: str or None 63 @keyword atom_num: The atom number. 64 @type atom_num: int or None 65 @keyword chain_id: The chain identifier. 66 @type chain_id: str or None 67 @keyword segment_id: The segment identifier. 68 @type segment_id: str or None 69 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 70 @type pdb_record: str or None 71 """ 72 73 # Test if the current data pipe exists. 74 pipes.test() 75 76 # Place the structural object into the relax data store if needed. 77 if not hasattr(cdp, 'structure'): 78 cdp.structure = Internal() 79 80 # Add the atoms. 81 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)

82 83

84 -def add_model(model_num=None):

85 """Add a new model to the empty structural data object.""" 86 87 # Test if the current data pipe exists. 88 pipes.test() 89 90 # Place the structural object into the relax data store if needed. 91 if not hasattr(cdp, 'structure'): 92 cdp.structure = Internal() 93 94 # The structural object can only be the internal object. 95 if cdp.structure.id != 'internal': 96 raise RelaxError("Models can only be added to the internal structural object.") 97 98 # Check the structural object is empty. 99 if cdp.structure.num_molecules() != 0: 100 raise RelaxError("The internal structural object is not empty.") 101 102 # Add a model. 103 cdp.structure.structural_data.add_item(model_num=model_num) 104 print("Created the empty model number %s." % model_num)

105 106

107 -def connect_atom(index1=None, index2=None):

108 """Connect two atoms. 109 110 @keyword index1: The global index of the first atom. 111 @type index1: str 112 @keyword index2: The global index of the first atom. 113 @type index2: str 114 """ 115 116 # Test if the current data pipe exists. 117 pipes.test() 118 119 # Place the structural object into the relax data store if needed. 120 if not hasattr(cdp, 'structure'): 121 cdp.structure = Internal() 122 123 # Add the atoms. 124 cdp.structure.connect_atom(index1=index1, index2=index2)

125 126

127 -def delete(atom_id=None):

128 """Delete structural data. 129 130 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 131 @type atom_id: str or None 132 """ 133 134 # Test if the current data pipe exists. 135 pipes.test() 136 137 # Run the object method. 138 if hasattr(cdp, 'structure'): 139 print("Deleting structural data from the current pipe.") 140 cdp.structure.delete(atom_id=atom_id) 141 else: 142 print("No structures are present.") 143 144 # Then remove any spin specific structural info. 145 print("Deleting all spin specific structural info.") 146 for spin in spin_loop(selection=atom_id): 147 # Delete positional information. 148 if hasattr(spin, 'pos'): 149 del spin.pos 150 151 # Then remove any interatomic vector structural info. 152 print("Deleting all interatomic vectors.") 153 for interatom in interatomic_loop(selection1=atom_id): 154 # Delete bond vectors. 155 if hasattr(interatom, 'vector'): 156 del interatom.vector

157 158

159 -def displacement(model_from=None, model_to=None, atom_id=None, centroid=None):

160 """Calculate the rotational and translational displacement between two structural models. 161 162 @keyword model_from: The optional model number for the starting position of the displacement. 163 @type model_from: int or None 164 @keyword model_to: The optional model number for the ending position of the displacement. 165 @type model_to: int or None 166 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 167 @type atom_id: str or None 168 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems. 169 @type centroid: list of float or numpy rank-1, 3D array 170 """ 171 172 # Test if the current data pipe exists. 173 pipes.test() 174 175 # Convert the model_from and model_to args to lists, is supplied. 176 if model_from != None: 177 model_from = [model_from] 178 if model_to != None: 179 model_to = [model_to] 180 181 # Create a list of all models. 182 models = [] 183 for model in cdp.structure.model_loop(): 184 models.append(model.num) 185 186 # Set model_from or model_to to all models if None. 187 if model_from == None: 188 model_from = models 189 if model_to == None: 190 model_to = models 191 192 # Initialise the data structure. 193 if not hasattr(cdp.structure, 'displacements'): 194 cdp.structure.displacements = Displacements() 195 196 # Loop over the starting models. 197 for i in range(len(model_from)): 198 # Assemble the atomic coordinates. 199 coord_from = [] 200 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_from[i], pos_flag=True): 201 coord_from.append(pos) 202 203 # Loop over the ending models. 204 for j in range(len(model_to)): 205 # Assemble the atomic coordinates. 206 coord_to = [] 207 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_to[j], pos_flag=True): 208 coord_to.append(pos) 209 210 # Send to the base container for the calculations. 211 cdp.structure.displacements._calculate(model_from=model_from[i], model_to=model_to[j], coord_from=array(coord_from), coord_to=array(coord_to), centroid=centroid)

212 213

214 -def find_pivot(models=None, atom_id=None, init_pos=None, func_tol=1e-5, box_limit=200):

215 """Superimpose a set of structural models. 216 217 @keyword models: The list of models to use. If set to None, then all models will be used. 218 @type models: list of int or None 219 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 220 @type atom_id: str or None 221 @keyword init_pos: The starting pivot position for the pivot point optimisation. 222 @type init_pos: list of float or numpy rank-1, 3D array 223 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check. 224 @type func_tol: None or float 225 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x. 226 @type box_limit: int 227 """ 228 229 # Test if the current data pipe exists. 230 pipes.test() 231 232 # Initialised the starting position if needed. 233 if init_pos == None: 234 init_pos = zeros(3, float64) 235 init_pos = array(init_pos) 236 237 # Validate the models. 238 cdp.structure.validate_models() 239 240 # Create a list of all models. 241 if models == None: 242 models = [] 243 for model in cdp.structure.model_loop(): 244 models.append(model.num) 245 246 # Assemble the atomic coordinates of all models. 247 coord = [] 248 for model in models: 249 coord.append([]) 250 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 251 coord[-1].append(pos) 252 coord[-1] = array(coord[-1]) 253 coord = array(coord) 254 255 # Linear constraints for the pivot position (between -1000 and 1000 Angstrom). 256 A = zeros((6, 3), float64) 257 b = zeros(6, float64) 258 for i in range(3): 259 A[2*i, i] = 1 260 A[2*i+1, i] = -1 261 b[2*i] = -box_limit 262 b[2*i+1] = -box_limit 263 264 # The target function. 265 finder = Pivot_finder(models, coord) 266 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1) 267 268 # No result. 269 if results == None: 270 return 271 272 # Store the data. 273 cdp.structure.pivot = results 274 275 # Print out. 276 print("Motional pivot found at: %s" % results)

277 278

279 -def get_pos(spin_id=None, str_id=None, ave_pos=False):

280 """Load the spins from the structural object into the relax data store. 281 282 @keyword spin_id: The molecule, residue, and spin identifier string. 283 @type spin_id: str 284 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 285 @type str_id: int or str 286 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 287 @type ave_pos: bool 288 """ 289 290 # Test if the current data pipe exists. 291 pipes.test() 292 293 # Test if the structure exists. 294 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 295 raise RelaxNoPdbError 296 297 # Loop over all atoms of the spin_id selection. 298 model_index = -1 299 last_model = None 300 data = [] 301 for model_num, mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, model_num_flag=True, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 302 # Update the model info. 303 if last_model != model_num: 304 model_index = model_index + 1 305 last_model = model_num 306 307 # Remove the '+' regular expression character from the mol, res, and spin names! 308 if mol_name and search('\+', mol_name): 309 mol_name = mol_name.replace('+', '') 310 if res_name and search('\+', res_name): 311 res_name = res_name.replace('+', '') 312 if atom_name and search('\+', atom_name): 313 atom_name = atom_name.replace('+', '') 314 315 # The spin identification string. 316 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name) 317 318 # Get the spin container. 319 spin_cont = return_spin(id) 320 321 # Skip the spin if it doesn't exist. 322 if spin_cont == None: 323 continue 324 325 # Add the position vector to the spin container. 326 if ave_pos: 327 spin_cont.pos = pos 328 else: 329 if not hasattr(spin_cont, 'pos'): 330 spin_cont.pos = [] 331 spin_cont.pos.append(pos) 332 333 # Store the data for a printout at the end. 334 data.append([id, repr(pos)]) 335 336 # No positions found. 337 if not len(data): 338 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id) 339 340 # Update pseudo-atoms. 341 for spin in spin_loop(): 342 if hasattr(spin, 'members'): 343 # Get the spin positions. 344 positions = [] 345 for atom in spin.members: 346 # Get the spin container. 347 subspin = return_spin(atom) 348 349 # Test that the spin exists. 350 if subspin == None: 351 raise RelaxNoSpinError(atom) 352 353 # Test the position. 354 if not hasattr(subspin, 'pos') or not subspin.pos: 355 raise RelaxError("Positional information is not available for the atom '%s'." % atom) 356 357 # Alias the position. 358 pos = subspin.pos 359 360 # Convert to a list of lists if not already. 361 multi_model = True 362 if type(pos[0]) in [float, float64]: 363 multi_model = False 364 pos = [pos] 365 366 # Store the position. 367 positions.append([]) 368 for i in range(len(pos)): 369 positions[-1].append(pos[i].tolist()) 370 371 # The averaging. 372 if spin.averaging == 'linear': 373 # Average pos. 374 ave = linear_ave(positions) 375 376 # Convert to the correct structure. 377 if multi_model: 378 spin.pos = ave 379 else: 380 spin.pos = ave[0] 381 382 # Print out. 383 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)

384 385

386 -def load_spins(spin_id=None, str_id=None, mol_name_target=None, ave_pos=False):

387 """Load the spins from the structural object into the relax data store. 388 389 @keyword spin_id: The molecule, residue, and spin identifier string. 390 @type spin_id: str 391 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 392 @type str_id: int or str 393 @keyword mol_name: The name of target molecule container, overriding the name of the loaded structures 394 @type mol_name: str or None 395 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 396 @type ave_pos: bool 397 """ 398 399 # Test if the current data pipe exists. 400 pipes.test() 401 402 # Test if the structure exists. 403 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 404 raise RelaxNoPdbError 405 406 # Print out. 407 print("Adding the following spins to the relax data store.\n") 408 409 # Init the data for printing out. 410 mol_names = [] 411 res_nums = [] 412 res_names = [] 413 spin_nums = [] 414 spin_names = [] 415 416 # Initialise data for the atom loop. 417 model_index = -1 418 last_model = 1000000 419 420 # Loop over all atoms of the spin_id selection. 421 for model_num, mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, model_num_flag=True, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 422 # Override the molecule name. 423 if mol_name_target: 424 mol_name = mol_name_target 425 426 # Update the model info. 427 if last_model != model_num: 428 model_index = model_index + 1 429 last_model = model_num 430 431 # Remove the '+' regular expression character from the mol, res, and spin names! 432 if mol_name and search('\+', mol_name): 433 mol_name = mol_name.replace('+', '') 434 if res_name and search('\+', res_name): 435 res_name = res_name.replace('+', '') 436 if atom_name and search('\+', atom_name): 437 atom_name = atom_name.replace('+', '') 438 439 # Generate a spin ID for the current atom. 440 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 441 442 # Create the spin. 443 try: 444 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 445 except RelaxError: 446 # Throw a warning if still on the first model. 447 if not model_index: 448 warn(RelaxWarning("The spin '%s' already exists." % id)) 449 continue 450 451 # Otherwise, get the spin container. 452 spin_cont = return_spin(id) 453 454 # Append all the spin ID info for the first model for printing later. 455 if model_index == 0: 456 if mol_name_target: 457 mol_names.append(mol_name_target) 458 else: 459 mol_names.append(mol_name) 460 res_nums.append(res_num) 461 res_names.append(res_name) 462 spin_nums.append(atom_num) 463 spin_names.append(atom_name) 464 465 # Convert the position vector to a numpy array. 466 pos = array(pos, float64) 467 468 # Average position vector (already averaged across models in the atom_loop). 469 if ave_pos: 470 spin_cont.pos = pos 471 472 # All positions. 473 else: 474 # Initialise. 475 if not hasattr(spin_cont, 'pos'): 476 spin_cont.pos = [] 477 478 # Add the current model's position. 479 spin_cont.pos.append(pos) 480 481 # Add the element. 482 if not model_index: 483 spin_cont.element = element 484 485 # Catch no data. 486 if len(mol_names) == 0: 487 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id)) 488 return 489 490 # Print out. 491 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

492 493

494 -def read_pdb(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, parser='internal', alt_loc=None, verbosity=1, merge=False, fail=True):

495 """The PDB loading function. 496 497 Parsers 498 ======= 499 500 A number of parsers are available for reading PDB files. These include: 501 502 - 'scientific', the Scientific Python PDB parser. 503 - 'internal', a low quality yet fast PDB parser built into relax. 504 505 506 @keyword file: The name of the PDB file to read. 507 @type file: str 508 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory. 509 @type dir: str or None 510 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are determined differently by the different parsers, but are numbered consecutively from 1. If set to None, then all molecules will be loaded. 511 @type read_mol: None, int, or list of int 512 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 513 @type set_mol_name: None, str, or list of str 514 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 515 @type read_model: None, int, or list of int 516 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 517 @type set_model_num: None, int, or list of int 518 @keyword parser: The parser to be used to read the PDB file. 519 @type parser: str 520 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 521 @type alt_loc: str or None 522 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 523 @type verbosity: int 524 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 525 @type merge: bool 526 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead. 527 @type fail: bool 528 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist. 529 """ 530 531 # Test if the current data pipe exists. 532 pipes.test() 533 534 # The file path. 535 file_path = get_file_path(file, dir) 536 537 # Try adding file extensions to the end of the file path, if the file can't be found. 538 file_path_orig = file_path 539 if not access(file_path, F_OK): 540 # List of possible extensions. 541 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']: 542 # Add the extension if the file can be found. 543 if access(file_path+ext, F_OK): 544 file_path = file_path + ext 545 546 # Test if the file exists. 547 if not access(file_path, F_OK): 548 if fail: 549 raise RelaxFileError('PDB', file_path_orig) 550 else: 551 warn(RelaxNoPDBFileWarning(file_path)) 552 return 553 554 # Check that the parser is the same as the currently loaded PDB files. 555 if hasattr(cdp, 'structure') and cdp.structure.id != parser: 556 raise RelaxError("The " + repr(parser) + " parser does not match the " + repr(cdp.structure.id) + " parser of the PDB loaded into the current pipe.") 557 558 # Place the parser specific structural object into the relax data store. 559 if not hasattr(cdp, 'structure'): 560 if parser == 'scientific': 561 cdp.structure = Scientific_data() 562 elif parser == 'internal': 563 cdp.structure = Internal() 564 565 # Load the structures. 566 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge) 567 568 # Load into Molmol (if running). 569 molmol.molmol_obj.open_pdb()

570 571

572 -def read_xyz(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=1, fail=True):

573 """The XYZ loading function. 574 575 576 @keyword file: The name of the XYZ file to read. 577 @type file: str 578 @keyword dir: The directory where the XYZ file is located. If set to None, then the 579 file will be searched for in the current directory. 580 @type dir: str or None 581 @keyword read_mol: The molecule(s) to read from the file, independent of model. 582 If set to None, then all molecules will be loaded. 583 @type read_mol: None, int, or list of int 584 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then 585 the molecules will be automatically labelled based on the file name or 586 other information. 587 @type set_mol_name: None, str, or list of str 588 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models 589 will be loaded. 590 @type read_model: None, int, or list of int 591 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the 592 XYZ model numbers will be preserved, if they exist. 593 @type set_model_num: None, int, or list of int 594 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ 595 file does not exist. If False, then a RelaxWarning will be trown 596 instead. 597 @type fail: bool 598 @keyword verbosity: The amount of information to print to screen. Zero corresponds to 599 minimal output while higher values increase the amount of output. The 600 default value is 1. 601 @type verbosity: int 602 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file 603 does not exist. 604 """ 605 606 # Test if the current data pipe exists. 607 pipes.test() 608 609 # The file path. 610 file_path = get_file_path(file, dir) 611 612 # Try adding '.xyz' to the end of the file path, if the file can't be found. 613 if not access(file_path, F_OK): 614 file_path_orig = file_path 615 file_path = file_path + '.xyz' 616 617 # Test if the file exists. 618 if not access(file_path, F_OK): 619 if fail: 620 raise RelaxFileError('XYZ', file_path_orig) 621 else: 622 warn(RelaxNoPDBFileWarning(file_path)) 623 return 624 625 # Place the structural object into the relax data store. 626 if not hasattr(cdp, 'structure'): 627 cdp.structure = Internal() 628 629 # Load the structures. 630 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)

631 632

633 -def rmsd(atom_id=None, models=None):

634 """Calculate the RMSD between the loaded models. 635 636 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 637 @type atom_id: str or None 638 @keyword models: The list of models to calculate the RMDS of. If set to None, then all models will be used. 639 @type models: list of int or None 640 @return: The RMSD value. 641 @rtype: float 642 """ 643 644 # Test if the current data pipe exists. 645 pipes.test() 646 647 # Create a list of all models. 648 if models == None: 649 models = [] 650 for model in cdp.structure.model_loop(): 651 models.append(model.num) 652 653 # Assemble the atomic coordinates of all models. 654 coord = [] 655 for model in models: 656 coord.append([]) 657 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 658 coord[-1].append(pos) 659 coord[-1] = array(coord[-1]) 660 661 # Calculate the RMSD. 662 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1) 663 664 # Return the RMSD. 665 return cdp.structure.rmsd

666 667

668 -def rotate(R=None, origin=None, model=None, atom_id=None):

669 """Rotate the structural data about the origin by the specified forwards rotation. 670 671 @keyword R: The forwards rotation matrix. 672 @type R: numpy 3D, rank-2 array or a 3x3 list of floats 673 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0]. 674 @type origin: numpy 3D, rank-1 array or list of len 3 or None 675 @keyword model: The model to rotate. If None, all models will be rotated. 676 @type model: int 677 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 678 @type atom_id: str or None 679 """ 680 681 # Test if the current data pipe exists. 682 pipes.test() 683 684 # Test if the structure exists. 685 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 686 raise RelaxNoPdbError 687 688 # Set the origin if not supplied. 689 if origin == None: 690 origin = [0, 0, 0] 691 692 # Convert the args to numpy float data structures. 693 R = array(R, float64) 694 origin = array(origin, float64) 695 696 # Call the specific code. 697 cdp.structure.rotate(R=R, origin=origin, model=model, atom_id=atom_id)

698 699

700 -def set_vector(spin=None, xh_vect=None):

701 """Place the XH unit vector into the spin container object. 702 703 @keyword spin: The spin container object. 704 @type spin: SpinContainer instance 705 @keyword xh_vect: The unit vector parallel to the XH bond. 706 @type xh_vect: array of len 3 707 """ 708 709 # Place the XH unit vector into the container. 710 spin.xh_vect = xh_vect

711 712

713 -def superimpose(models=None, method='fit to mean', atom_id=None, centroid=None):

714 """Superimpose a set of structural models. 715 716 @keyword models: The list of models to superimpose. If set to None, then all models will be used. 717 @type models: list of int or None 718 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'. 719 @type method: str 720 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 721 @type atom_id: str or None 722 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions. 723 @type centroid: list of float or numpy rank-1, 3D array 724 """ 725 726 # Check the method. 727 allowed = ['fit to mean', 'fit to first'] 728 if method not in allowed: 729 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed)) 730 731 # Test if the current data pipe exists. 732 pipes.test() 733 734 # Validate the models. 735 cdp.structure.validate_models() 736 737 # Create a list of all models. 738 if models == None: 739 models = [] 740 for model in cdp.structure.model_loop(): 741 models.append(model.num) 742 743 # Assemble the atomic coordinates of all models. 744 coord = [] 745 for model in models: 746 coord.append([]) 747 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 748 coord[-1].append(pos) 749 coord[-1] = array(coord[-1]) 750 751 # The different algorithms. 752 if method == 'fit to mean': 753 T, R, pivot = fit_to_mean(models=models, coord=coord, centroid=centroid) 754 elif method == 'fit to first': 755 T, R, pivot = fit_to_first(models=models, coord=coord, centroid=centroid) 756 757 758 # Update to the new coordinates. 759 for i in range(len(models)): 760 # Translate the molecule first (the rotational pivot is defined in the first model). 761 translate(T=T[i], model=models[i]) 762 763 # Rotate the molecule. 764 rotate(R=R[i], origin=pivot[i], model=models[i])

765 766

767 -def translate(T=None, model=None, atom_id=None):

768 """Shift the structural data by the specified translation vector. 769 770 @keyword T: The translation vector. 771 @type T: numpy rank-1, 3D array or list of float 772 @keyword model: The model to translate. If None, all models will be rotated. 773 @type model: int or None 774 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 775 @type atom_id: str or None 776 """ 777 778 # Test if the current data pipe exists. 779 pipes.test() 780 781 # Test if the structure exists. 782 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 783 raise RelaxNoPdbError 784 785 # Convert the args to numpy float data structures. 786 T = array(T, float64) 787 788 # Call the specific code. 789 cdp.structure.translate(T=T, model=model, atom_id=atom_id)

790 791

792 -def vectors(spin_id1=None, spin_id2=None, model=None, verbosity=1, ave=True, unit=True):

793 """Extract the bond vectors from the loaded structures and store them in the spin container. 794 795 @keyword spin_id1: The spin identifier string of the first spin of the pair. 796 @type spin_id1: str 797 @keyword spin_id2: The spin identifier string of the second spin of the pair. 798 @type spin_id2: str 799 @keyword model: The model to extract the vector from. If None, all vectors will be extracted. 800 @type model: str 801 @keyword verbosity: The higher the value, the more information is printed to screen. 802 @type verbosity: int 803 @keyword ave: A flag which if True will cause the average of all vectors to be extracted. 804 @type ave: bool 805 @keyword unit: A flag which if True will cause the function to calculate the unit vectors. 806 @type unit: bool 807 """ 808 809 # Test if the current data pipe exists. 810 pipes.test() 811 812 # Test if the PDB file has been loaded. 813 if not hasattr(cdp, 'structure'): 814 raise RelaxNoPdbError 815 816 # Test if sequence data is loaded. 817 if not exists_mol_res_spin_data(): 818 raise RelaxNoSequenceError 819 820 # Print out. 821 if verbosity: 822 # Number of models. 823 num_models = cdp.structure.num_models() 824 825 # Multiple models loaded. 826 if num_models > 1: 827 if model: 828 print("Extracting vectors for model '%s'." % model) 829 else: 830 print("Extracting vectors for all %s models." % num_models) 831 if ave: 832 print("Averaging all vectors.") 833 834 # Single model loaded. 835 else: 836 print("Extracting vectors from the single model.") 837 838 # Unit vectors. 839 if unit: 840 print("Calculating the unit vectors.") 841 842 # Loop over the spins. 843 no_vectors = True 844 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True): 845 # Skip deselected spins. 846 if not spin.select: 847 continue 848 849 # The spin identification string. The residue name and spin num is not included to allow molecules with point mutations to be used as different models. 850 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num) 851 852 # Test that the spin number or name are set (one or both are essential for the identification of the atom). 853 if spin.num == None and spin.name == None: 854 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule.")) 855 continue 856 857 # The bond vector already exists. 858 if hasattr(spin, 'vector'): 859 obj = getattr(spin, 'vector') 860 if obj != None: 861 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists.")) 862 continue 863 864 # Get the bond info. 865 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True) 866 867 # No attached atom. 868 if not bond_vectors: 869 # Warning messages. 870 if warnings: 871 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ").")) 872 873 # Skip the spin. 874 continue 875 876 # Set the attached atom name. 877 if not hasattr(spin, 'attached_atom'): 878 spin.attached_atom = attached_name 879 elif spin.attached_atom != attached_name: 880 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".") 881 882 # Initialise the average vector. 883 if ave: 884 ave_vector = zeros(3, float64) 885 886 # Loop over the individual vectors. 887 for i in range(len(bond_vectors)): 888 # Unit vector. 889 if unit: 890 # Normalisation factor. 891 norm_factor = norm(bond_vectors[i]) 892 893 # Test for zero length. 894 if norm_factor == 0.0: 895 warn(RelaxZeroVectorWarning(id)) 896 897 # Calculate the normalised vector. 898 else: 899 bond_vectors[i] = bond_vectors[i] / norm_factor 900 901 # Sum the vectors. 902 if ave: 903 ave_vector = ave_vector + bond_vectors[i] 904 905 # Average. 906 if ave: 907 vector = ave_vector / float(len(bond_vectors)) 908 else: 909 vector = bond_vectors 910 911 # Convert to a single vector if needed. 912 if len(vector) == 1: 913 vector = vector[0] 914 915 # Set the vector. 916 setattr(spin, 'vector', vector) 917 918 # We have a vector! 919 no_vectors = False 920 921 # Print out of modified spins. 922 if verbosity: 923 # The number of vectors. 924 num = len(bond_vectors) 925 plural = 's' 926 if num == 1: 927 plural = '' 928 929 if spin.name: 930 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id)) 931 else: 932 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id)) 933 934 # Right, catch the problem of missing vectors to prevent massive user confusion! 935 if no_vectors: 936 raise RelaxError("No vectors could be extracted.")

937 938

939 -def web_of_motion(file=None, dir=None, models=None, force=False):

940 """Create a PDB representation of the motion between a set of models. 941 942 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object. 943 944 @keyword file: The name of the PDB file to write. 945 @type file: str 946 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 947 @type dir: str or None 948 @keyword models: The optional list of models to restrict this to. 949 @type models: list of int or None 950 @keyword force: The force flag which if True will cause the file to be overwritten. 951 @type force: bool 952 """ 953 954 # Test if the current data pipe exists. 955 pipes.test() 956 957 # Test if the structure exists. 958 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 959 raise RelaxNoPdbError 960 961 # Validate the models. 962 cdp.structure.validate_models() 963 964 # Check the structural object type. 965 if cdp.structure.id != 'internal': 966 raise RelaxError("The %s structure type is not supported." % cdp.structure.id) 967 968 # Initialise the structural object. 969 web = Internal() 970 971 # The model list. 972 if models == None: 973 models = [] 974 for k in range(len(cdp.structure.structural_data)): 975 models.append(cdp.structure.structural_data[k].num) 976 977 # Loop over the molecules. 978 for i in range(len(cdp.structure.structural_data[0].mol)): 979 # Alias the molecule of the first model. 980 mol1 = cdp.structure.structural_data[0].mol[i] 981 982 # Loop over the atoms. 983 for j in range(len(mol1.atom_name)): 984 # Loop over the models. 985 for k in range(len(cdp.structure.structural_data)): 986 # Skip the model. 987 if cdp.structure.structural_data[k].num not in models: 988 continue 989 990 # Alias. 991 mol = cdp.structure.structural_data[k].mol[i] 992 993 # Add the atom. 994 web.add_atom(mol_name=mol1.mol_name, atom_name=mol.atom_name[j], res_name=mol.res_name[j], res_num=mol.res_num[j], pos=[mol.x[j], mol.y[j], mol.z[j]], element=mol.element[j], chain_id=mol.chain_id[j], segment_id=mol.seg_id[j], pdb_record=mol.pdb_record[j]) 995 996 # Loop over the models again, this time twice. 997 for k in range(len(models)): 998 for l in range(len(models)): 999 # Skip identical atoms. 1000 if k == l: 1001 continue 1002 1003 # The atom index. 1004 index1 = j*len(models) + k 1005 index2 = j*len(models) + l 1006 1007 # Connect to the previous atoms. 1008 web.connect_atom(mol_name=mol1.mol_name, index1=index1, index2=index2) 1009 1010 # Append the PDB extension if needed. 1011 if isinstance(file, str): 1012 # The file path. 1013 file = get_file_path(file, dir) 1014 1015 # Add '.pdb' to the end of the file path if it isn't there yet. 1016 if not search(".pdb$", file): 1017 file += '.pdb' 1018 1019 # Open the file for writing. 1020 file = open_write_file(file, force=force) 1021 1022 # Write the structure. 1023 web.write_pdb(file)

1024 1025

1026 -def write_pdb(file=None, dir=None, model_num=None, compress_type=0, force=False):

1027 """The PDB writing function. 1028 1029 @keyword file: The name of the PDB file to write. 1030 @type file: str 1031 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1032 @type dir: str or None 1033 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 1034 @type model_num: None or int 1035 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 1036 @type compress_type: int 1037 @keyword force: The force flag which if True will cause the file to be overwritten. 1038 @type force: bool 1039 """ 1040 1041 # Test if the current data pipe exists. 1042 pipes.test() 1043 1044 # Check if the structural object exists. 1045 if not hasattr(cdp, 'structure'): 1046 raise RelaxError("No structural data is present in the current data pipe.") 1047 1048 # Check if the structural object is writable. 1049 if cdp.structure.id in ['scientific']: 1050 raise RelaxError("The structures from the " + cdp.structure.id + " parser are not writable.") 1051 1052 # The file path. 1053 file_path = get_file_path(file, dir) 1054 1055 # Add '.pdb' to the end of the file path if it isn't there yet. 1056 if not search(".pdb$", file_path): 1057 file_path = file_path + '.pdb' 1058 1059 # Open the file for writing. 1060 file = open_write_file(file_path, compress_type=compress_type, force=force) 1061 1062 # Write the structures. 1063 cdp.structure.write_pdb(file, model_num=model_num)

1064

Source Code for Module generic_fns.structure.main