pipe_control.structure.main

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2014 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Python module imports. 23 from minfx.generic import generic_minimise 24 from numpy import array, float64, zeros 25 from numpy.linalg import norm 26 from os import F_OK, access, getcwd 27 from re import search 28 import sys 29 from warnings import warn 30 31 # relax module imports. 32 from lib.errors import RelaxError, RelaxFileError, RelaxNoPdbError, RelaxNoSequenceError 33 from lib.io import get_file_path, open_write_file, write_data 34 from lib.sequence import write_spin_data 35 from lib.structure.internal.displacements import Displacements 36 from lib.structure.internal.object import Internal 37 from lib.structure.represent.diffusion_tensor import diffusion_tensor 38 from lib.structure.statistics import atomic_rmsd 39 from lib.structure.superimpose import fit_to_first, fit_to_mean 40 from lib.warnings import RelaxWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning 41 from pipe_control import molmol, pipes 42 from pipe_control.interatomic import interatomic_loop 43 from pipe_control.mol_res_spin import create_spin, exists_mol_res_spin_data, generate_spin_id_unique, linear_ave, return_spin, spin_loop 44 from pipe_control.structure.mass import pipe_centre_of_mass 45 from status import Status; status = Status() 46 from target_functions.ens_pivot_finder import Pivot_finder 47 48

49 -def add_atom(mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

50 """Add a new atom to the structural data object. 51 52 @keyword mol_name: The name of the molecule. 53 @type mol_name: str 54 @keyword atom_name: The atom name, e.g. 'H1'. 55 @type atom_name: str or None 56 @keyword res_name: The residue name. 57 @type res_name: str or None 58 @keyword res_num: The residue number. 59 @type res_num: int or None 60 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 61 @type pos: rank-1 or rank-2 array or list of float 62 @keyword element: The element symbol. 63 @type element: str or None 64 @keyword atom_num: The atom number. 65 @type atom_num: int or None 66 @keyword chain_id: The chain identifier. 67 @type chain_id: str or None 68 @keyword segment_id: The segment identifier. 69 @type segment_id: str or None 70 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 71 @type pdb_record: str or None 72 """ 73 74 # Test if the current data pipe exists. 75 pipes.test() 76 77 # Place the structural object into the relax data store if needed. 78 if not hasattr(cdp, 'structure'): 79 cdp.structure = Internal() 80 81 # Add the atoms. 82 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)

83 84

85 -def add_model(model_num=None):

86 """Add a new model to the empty structural data object.""" 87 88 # Test if the current data pipe exists. 89 pipes.test() 90 91 # Place the structural object into the relax data store if needed. 92 if not hasattr(cdp, 'structure'): 93 cdp.structure = Internal() 94 95 # Check the structural object is empty. 96 if cdp.structure.num_molecules() != 0: 97 raise RelaxError("The internal structural object is not empty.") 98 99 # Add a model. 100 cdp.structure.structural_data.add_item(model_num=model_num) 101 print("Created the empty model number %s." % model_num)

102 103

104 -def connect_atom(index1=None, index2=None):

105 """Connect two atoms. 106 107 @keyword index1: The global index of the first atom. 108 @type index1: str 109 @keyword index2: The global index of the first atom. 110 @type index2: str 111 """ 112 113 # Test if the current data pipe exists. 114 pipes.test() 115 116 # Place the structural object into the relax data store if needed. 117 if not hasattr(cdp, 'structure'): 118 cdp.structure = Internal() 119 120 # Add the atoms. 121 cdp.structure.connect_atom(index1=index1, index2=index2)

122 123

124 -def com(model=None):

125 """Calculate the centre of mass of all structures. 126 127 @keyword model: Only use a specific model. 128 @type model: int or None 129 """ 130 131 # Test if the current data pipe exists. 132 pipes.test() 133 134 # Calculate and store the centre of mass. 135 cdp.com = pipe_centre_of_mass(model=model)

136 137

138 -def create_diff_tensor_pdb(scale=1.8e-6, file=None, dir=None, force=False):

139 """Create the PDB representation of the diffusion tensor. 140 141 @keyword scale: The scaling factor for the diffusion tensor. 142 @type scale: float 143 @keyword file: The name of the PDB file to create. 144 @type file: str 145 @keyword dir: The name of the directory to place the PDB file into. 146 @type dir: str 147 @keyword force: Flag which if set to True will overwrite any pre-existing file. 148 @type force: bool 149 """ 150 151 # Test if the current data pipe exists. 152 pipes.test() 153 154 # Calculate the centre of mass. 155 com = pipe_centre_of_mass() 156 157 # Create the structural object. 158 structure = Internal() 159 160 # Create an array of data pipes to loop over (hybrid support). 161 if cdp.pipe_type == 'hybrid': 162 pipe_list = cdp.hybrid_pipes 163 else: 164 pipe_list = [pipes.cdp_name()] 165 166 # The molecule names. 167 if cdp.pipe_type == 'hybrid': 168 mol_names = [] 169 for pipe in pipe_list: 170 mol_names.append('diff_tensor_' % pipe) 171 else: 172 mol_names = ['diff_tensor'] 173 174 # Loop over the pipes. 175 for pipe_index in range(len(pipe_list)): 176 # Get the pipe container. 177 pipe = pipes.get_pipe(pipe_list[pipe_index]) 178 179 # Test if the diffusion tensor data is loaded. 180 if not hasattr(pipe, 'diff_tensor'): 181 raise RelaxNoTensorError('diffusion') 182 183 # Test if a structure has been loaded. 184 if not hasattr(cdp, 'structure'): 185 raise RelaxNoPdbError 186 187 # Add a new structure. 188 structure.add_molecule(name=mol_names[pipe_index]) 189 190 # Alias the single molecule from the single model. 191 mol = structure.get_molecule(mol_names[pipe_index]) 192 193 # The diffusion tensor type. 194 diff_type = pipe.diff_tensor.type 195 if diff_type == 'spheroid': 196 diff_type = pipe.diff_tensor.spheroid_type 197 198 # Simulation info. 199 sim_num = None 200 if hasattr(pipe.diff_tensor, 'tm_sim'): 201 # The number. 202 sim_num = len(pipe.diff_tensor.tm_sim) 203 204 # Tensor axes. 205 axes = [] 206 sim_axes = [] 207 if diff_type in ['oblate', 'prolate']: 208 axes.append(pipe.diff_tensor.Dpar * pipe.diff_tensor.Dpar_unit) 209 sim_axes.append([]) 210 if sim_num != None: 211 for i in range(sim_num): 212 sim_axes[0].append(pipe.diff_tensor.Dpar_sim[i] * pipe.diff_tensor.Dpar_unit_sim[i]) 213 214 if diff_type == 'ellipsoid': 215 axes.append(pipe.diff_tensor.Dx * pipe.diff_tensor.Dx_unit) 216 axes.append(pipe.diff_tensor.Dy * pipe.diff_tensor.Dy_unit) 217 axes.append(pipe.diff_tensor.Dz * pipe.diff_tensor.Dz_unit) 218 sim_axes.append([]) 219 sim_axes.append([]) 220 sim_axes.append([]) 221 if sim_num != None: 222 for i in range(sim_num): 223 sim_axes[0].append(pipe.diff_tensor.Dx_sim[i] * pipe.diff_tensor.Dx_unit_sim[i]) 224 sim_axes[1].append(pipe.diff_tensor.Dy_sim[i] * pipe.diff_tensor.Dy_unit_sim[i]) 225 sim_axes[2].append(pipe.diff_tensor.Dz_sim[i] * pipe.diff_tensor.Dz_unit_sim[i]) 226 227 # Create the object. 228 diffusion_tensor(mol=mol, tensor=pipe.diff_tensor.tensor, tensor_diag=pipe.diff_tensor.tensor_diag, diff_type=diff_type, rotation=pipe.diff_tensor.rotation, axes=axes, sim_axes=sim_axes, com=com, scale=scale) 229 230 231 # Create the PDB file. 232 ###################### 233 234 # Print out. 235 print("\nGenerating the PDB file.") 236 237 # Create the PDB file. 238 tensor_pdb_file = open_write_file(file, dir, force=force) 239 structure.write_pdb(tensor_pdb_file) 240 tensor_pdb_file.close() 241 242 # Add the file to the results file list. 243 if not hasattr(cdp, 'result_files'): 244 cdp.result_files = [] 245 if dir == None: 246 dir = getcwd() 247 cdp.result_files.append(['diff_tensor_pdb', 'Diffusion tensor PDB', get_file_path(file, dir)]) 248 status.observers.result_file.notify()

249 250

251 -def delete(atom_id=None, verbosity=1, spin_info=True):

252 """Delete structural data. 253 254 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 255 @type atom_id: str or None 256 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 257 @type verbosity: int 258 @keyword spin_info: A flag which if True will cause all structural information in the spin containers and interatomic data containers to be deleted as well. If False, then only the 3D structural data will be deleted. 259 @type spin_info: bool 260 """ 261 262 # Test if the current data pipe exists. 263 pipes.test() 264 265 # Run the object method. 266 if hasattr(cdp, 'structure'): 267 if verbosity: 268 print("Deleting structural data from the current pipe.") 269 cdp.structure.delete(atom_id=atom_id, verbosity=verbosity) 270 elif verbosity: 271 print("No structures are present.") 272 273 # Skip the rest. 274 if not spin_info: 275 return 276 277 # Then remove any spin specific structural info. 278 if verbosity: 279 print("Deleting all spin specific structural info.") 280 for spin in spin_loop(selection=atom_id): 281 # Delete positional information. 282 if hasattr(spin, 'pos'): 283 del spin.pos 284 285 # Then remove any interatomic vector structural info. 286 if verbosity: 287 print("Deleting all interatomic vectors.") 288 for interatom in interatomic_loop(selection1=atom_id): 289 # Delete bond vectors. 290 if hasattr(interatom, 'vector'): 291 del interatom.vector

292 293

294 -def displacement(model_from=None, model_to=None, atom_id=None, centroid=None):

295 """Calculate the rotational and translational displacement between two structural models. 296 297 @keyword model_from: The optional model number for the starting position of the displacement. 298 @type model_from: int or None 299 @keyword model_to: The optional model number for the ending position of the displacement. 300 @type model_to: int or None 301 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 302 @type atom_id: str or None 303 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems. 304 @type centroid: list of float or numpy rank-1, 3D array 305 """ 306 307 # Test if the current data pipe exists. 308 pipes.test() 309 310 # Convert the model_from and model_to args to lists, is supplied. 311 if model_from != None: 312 model_from = [model_from] 313 if model_to != None: 314 model_to = [model_to] 315 316 # Create a list of all models. 317 models = [] 318 for model in cdp.structure.model_loop(): 319 models.append(model.num) 320 321 # Set model_from or model_to to all models if None. 322 if model_from == None: 323 model_from = models 324 if model_to == None: 325 model_to = models 326 327 # Initialise the data structure. 328 if not hasattr(cdp.structure, 'displacements'): 329 cdp.structure.displacements = Displacements() 330 331 # Loop over the starting models. 332 for i in range(len(model_from)): 333 # Assemble the atomic coordinates. 334 coord_from = [] 335 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_from[i], pos_flag=True): 336 coord_from.append(pos[0]) 337 338 # Loop over the ending models. 339 for j in range(len(model_to)): 340 # Assemble the atomic coordinates. 341 coord_to = [] 342 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_to[j], pos_flag=True): 343 coord_to.append(pos[0]) 344 345 # Send to the base container for the calculations. 346 cdp.structure.displacements._calculate(model_from=model_from[i], model_to=model_to[j], coord_from=array(coord_from), coord_to=array(coord_to), centroid=centroid)

347 348

349 -def find_pivot(models=None, atom_id=None, init_pos=None, func_tol=1e-5, box_limit=200):

350 """Superimpose a set of structural models. 351 352 @keyword models: The list of models to use. If set to None, then all models will be used. 353 @type models: list of int or None 354 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 355 @type atom_id: str or None 356 @keyword init_pos: The starting pivot position for the pivot point optimisation. 357 @type init_pos: list of float or numpy rank-1, 3D array 358 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check. 359 @type func_tol: None or float 360 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x. 361 @type box_limit: int 362 """ 363 364 # Test if the current data pipe exists. 365 pipes.test() 366 367 # Initialised the starting position if needed. 368 if init_pos == None: 369 init_pos = zeros(3, float64) 370 init_pos = array(init_pos) 371 372 # Validate the models. 373 cdp.structure.validate_models() 374 375 # Create a list of all models. 376 if models == None: 377 models = [] 378 for model in cdp.structure.model_loop(): 379 models.append(model.num) 380 381 # Assemble the atomic coordinates of all models. 382 coord = [] 383 for model in models: 384 coord.append([]) 385 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 386 coord[-1].append(pos[0]) 387 coord[-1] = array(coord[-1]) 388 coord = array(coord) 389 390 # Linear constraints for the pivot position (between -1000 and 1000 Angstrom). 391 A = zeros((6, 3), float64) 392 b = zeros(6, float64) 393 for i in range(3): 394 A[2*i, i] = 1 395 A[2*i+1, i] = -1 396 b[2*i] = -box_limit 397 b[2*i+1] = -box_limit 398 399 # The target function. 400 finder = Pivot_finder(models, coord) 401 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1) 402 403 # No result. 404 if results == None: 405 return 406 407 # Store the data. 408 cdp.structure.pivot = results 409 410 # Print out. 411 print("Motional pivot found at: %s" % results)

412 413

414 -def get_pos(spin_id=None, str_id=None, ave_pos=False):

415 """Load the spins from the structural object into the relax data store. 416 417 @keyword spin_id: The molecule, residue, and spin identifier string. 418 @type spin_id: str 419 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 420 @type str_id: int or str 421 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 422 @type ave_pos: bool 423 """ 424 425 # Test if the current data pipe exists. 426 pipes.test() 427 428 # Test if the structure exists. 429 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 430 raise RelaxNoPdbError 431 432 # Loop over all atoms of the spin_id selection. 433 data = [] 434 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 435 # Remove the '+' regular expression character from the mol, res, and spin names! 436 if mol_name and search('\+', mol_name): 437 mol_name = mol_name.replace('+', '') 438 if res_name and search('\+', res_name): 439 res_name = res_name.replace('+', '') 440 if atom_name and search('\+', atom_name): 441 atom_name = atom_name.replace('+', '') 442 443 # The spin identification string. 444 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name) 445 446 # Get the spin container. 447 spin_cont = return_spin(id) 448 449 # Skip the spin if it doesn't exist. 450 if spin_cont == None: 451 continue 452 453 # Add the position vector to the spin container. 454 spin_cont.pos = pos 455 456 # Store the data for a printout at the end. 457 data.append([id, repr(pos)]) 458 459 # No positions found. 460 if not len(data): 461 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id) 462 463 # Update pseudo-atoms. 464 for spin in spin_loop(): 465 if hasattr(spin, 'members'): 466 # Get the spin positions. 467 positions = [] 468 for atom in spin.members: 469 # Get the spin container. 470 subspin = return_spin(atom) 471 472 # Test that the spin exists. 473 if subspin == None: 474 raise RelaxNoSpinError(atom) 475 476 # Test the position. 477 if not hasattr(subspin, 'pos') or subspin.pos == None or not len(subspin.pos): 478 raise RelaxError("Positional information is not available for the atom '%s'." % atom) 479 480 # Alias the position. 481 pos = subspin.pos 482 483 # Convert to a list of lists if not already. 484 multi_model = True 485 if type(pos[0]) in [float, float64]: 486 multi_model = False 487 pos = [pos] 488 489 # Store the position. 490 positions.append([]) 491 for i in range(len(pos)): 492 positions[-1].append(pos[i].tolist()) 493 494 # The averaging. 495 if spin.averaging == 'linear': 496 # Average pos. 497 ave = linear_ave(positions) 498 499 # Convert to the correct structure. 500 if multi_model: 501 spin.pos = ave 502 else: 503 spin.pos = ave[0] 504 505 # Print out. 506 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)

507 508

509 -def load_spins(spin_id=None, str_id=None, mol_name_target=None, ave_pos=False):

510 """Load the spins from the structural object into the relax data store. 511 512 @keyword spin_id: The molecule, residue, and spin identifier string. 513 @type spin_id: str 514 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 515 @type str_id: int or str 516 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures 517 @type mol_name_target: str or None 518 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 519 @type ave_pos: bool 520 """ 521 522 # Test if the current data pipe exists. 523 pipes.test() 524 525 # Test if the structure exists. 526 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 527 raise RelaxNoPdbError 528 529 # Print out. 530 print("Adding the following spins to the relax data store.\n") 531 532 # Init the data for printing out. 533 mol_names = [] 534 res_nums = [] 535 res_names = [] 536 spin_nums = [] 537 spin_names = [] 538 539 # Loop over all atoms of the spin_id selection. 540 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 541 # Override the molecule name. 542 if mol_name_target: 543 mol_name = mol_name_target 544 545 # Remove the '+' regular expression character from the mol, res, and spin names! 546 if mol_name and search('\+', mol_name): 547 mol_name = mol_name.replace('+', '') 548 if res_name and search('\+', res_name): 549 res_name = res_name.replace('+', '') 550 if atom_name and search('\+', atom_name): 551 atom_name = atom_name.replace('+', '') 552 553 # Generate a spin ID for the current atom. 554 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 555 556 # Create the spin. 557 try: 558 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 559 560 # Otherwise, get the spin container. 561 except RelaxError: 562 spin_cont = return_spin(id) 563 564 # Append all the spin ID info for printing later. 565 if mol_name_target: 566 mol_names.append(mol_name_target) 567 else: 568 mol_names.append(mol_name) 569 res_nums.append(res_num) 570 res_names.append(res_name) 571 spin_nums.append(atom_num) 572 spin_names.append(atom_name) 573 574 # Position vector. 575 spin_cont.pos = pos 576 577 # Add the element. 578 spin_cont.element = element 579 580 # Catch no data. 581 if len(mol_names) == 0: 582 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id)) 583 return 584 585 # Print out. 586 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

587 588

589 -def read_gaussian(file=None, dir=None, set_mol_name=None, set_model_num=None, verbosity=1, fail=True):

590 """Read structures from a Gaussian log file. 591 592 593 @keyword file: The name of the Gaussian log file to read. 594 @type file: str 595 @keyword dir: The directory where the Gaussian log file is located. If set to None, then the file will be searched for in the current directory. 596 @type dir: str or None 597 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 598 @type set_mol_name: None, str, or list of str 599 @keyword set_model_num: Set the model number of the loaded molecule. 600 @type set_model_num: None, int, or list of int 601 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the Gaussian log file does not exist. If False, then a RelaxWarning will be trown instead. 602 @type fail: bool 603 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 604 @type verbosity: int 605 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the Gaussian log file does not exist. 606 """ 607 608 # Test if the current data pipe exists. 609 pipes.test() 610 611 # The file path. 612 file_path = get_file_path(file, dir) 613 614 # Try adding '.log' to the end of the file path, if the file can't be found. 615 if not access(file_path, F_OK): 616 file_path_orig = file_path 617 file_path = file_path + '.log' 618 619 # Test if the file exists. 620 if not access(file_path, F_OK): 621 if fail: 622 raise RelaxFileError('Gaussian log', file_path_orig) 623 else: 624 warn(RelaxNoPDBFileWarning(file_path)) 625 return 626 627 # Place the structural object into the relax data store. 628 if not hasattr(cdp, 'structure'): 629 cdp.structure = Internal() 630 631 # Load the structures. 632 cdp.structure.load_gaussian(file_path, set_mol_name=set_mol_name, set_model_num=set_model_num, verbosity=verbosity)

633 634

635 -def read_pdb(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=1, merge=False, fail=True):

636 """The PDB loading function. 637 638 @keyword file: The name of the PDB file to read. 639 @type file: str 640 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory. 641 @type dir: str or None 642 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are numbered consecutively from 1. If set to None, then all molecules will be loaded. 643 @type read_mol: None, int, or list of int 644 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 645 @type set_mol_name: None, str, or list of str 646 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 647 @type read_model: None, int, or list of int 648 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 649 @type set_model_num: None, int, or list of int 650 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 651 @type alt_loc: str or None 652 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 653 @type verbosity: int 654 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 655 @type merge: bool 656 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead. 657 @type fail: bool 658 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist. 659 """ 660 661 # Test if the current data pipe exists. 662 pipes.test() 663 664 # The file path. 665 file_path = get_file_path(file, dir) 666 667 # Try adding file extensions to the end of the file path, if the file can't be found. 668 file_path_orig = file_path 669 if not access(file_path, F_OK): 670 # List of possible extensions. 671 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']: 672 # Add the extension if the file can be found. 673 if access(file_path+ext, F_OK): 674 file_path = file_path + ext 675 676 # Test if the file exists. 677 if not access(file_path, F_OK): 678 if fail: 679 raise RelaxFileError('PDB', file_path_orig) 680 else: 681 warn(RelaxNoPDBFileWarning(file_path)) 682 return 683 684 # Place the internal structural object into the relax data store. 685 if not hasattr(cdp, 'structure'): 686 cdp.structure = Internal() 687 688 # Load the structures. 689 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge) 690 691 # Load into Molmol (if running). 692 molmol.molmol_obj.open_pdb()

693 694

695 -def read_xyz(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=1, fail=True):

696 """The XYZ loading function. 697 698 699 @keyword file: The name of the XYZ file to read. 700 @type file: str 701 @keyword dir: The directory where the XYZ file is located. If set to None, then the 702 file will be searched for in the current directory. 703 @type dir: str or None 704 @keyword read_mol: The molecule(s) to read from the file, independent of model. 705 If set to None, then all molecules will be loaded. 706 @type read_mol: None, int, or list of int 707 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then 708 the molecules will be automatically labelled based on the file name or 709 other information. 710 @type set_mol_name: None, str, or list of str 711 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models 712 will be loaded. 713 @type read_model: None, int, or list of int 714 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the 715 XYZ model numbers will be preserved, if they exist. 716 @type set_model_num: None, int, or list of int 717 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ 718 file does not exist. If False, then a RelaxWarning will be trown 719 instead. 720 @type fail: bool 721 @keyword verbosity: The amount of information to print to screen. Zero corresponds to 722 minimal output while higher values increase the amount of output. The 723 default value is 1. 724 @type verbosity: int 725 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file 726 does not exist. 727 """ 728 729 # Test if the current data pipe exists. 730 pipes.test() 731 732 # The file path. 733 file_path = get_file_path(file, dir) 734 735 # Try adding '.xyz' to the end of the file path, if the file can't be found. 736 if not access(file_path, F_OK): 737 file_path_orig = file_path 738 file_path = file_path + '.xyz' 739 740 # Test if the file exists. 741 if not access(file_path, F_OK): 742 if fail: 743 raise RelaxFileError('XYZ', file_path_orig) 744 else: 745 warn(RelaxNoPDBFileWarning(file_path)) 746 return 747 748 # Place the structural object into the relax data store. 749 if not hasattr(cdp, 'structure'): 750 cdp.structure = Internal() 751 752 # Load the structures. 753 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)

754 755

756 -def rmsd(atom_id=None, models=None):

757 """Calculate the RMSD between the loaded models. 758 759 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 760 @type atom_id: str or None 761 @keyword models: The list of models to calculate the RMDS of. If set to None, then all models will be used. 762 @type models: list of int or None 763 @return: The RMSD value. 764 @rtype: float 765 """ 766 767 # Test if the current data pipe exists. 768 pipes.test() 769 770 # Create a list of all models. 771 if models == None: 772 models = [] 773 for model in cdp.structure.model_loop(): 774 models.append(model.num) 775 776 # Assemble the atomic coordinates of all models. 777 coord = [] 778 for model in models: 779 coord.append([]) 780 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 781 coord[-1].append(pos[0]) 782 coord[-1] = array(coord[-1]) 783 784 # Calculate the RMSD. 785 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1) 786 787 # Return the RMSD. 788 return cdp.structure.rmsd

789 790

791 -def rotate(R=None, origin=None, model=None, atom_id=None):

792 """Rotate the structural data about the origin by the specified forwards rotation. 793 794 @keyword R: The forwards rotation matrix. 795 @type R: numpy 3D, rank-2 array or a 3x3 list of floats 796 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0]. 797 @type origin: numpy 3D, rank-1 array or list of len 3 or None 798 @keyword model: The model to rotate. If None, all models will be rotated. 799 @type model: int 800 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 801 @type atom_id: str or None 802 """ 803 804 # Test if the current data pipe exists. 805 pipes.test() 806 807 # Test if the structure exists. 808 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 809 raise RelaxNoPdbError 810 811 # Set the origin if not supplied. 812 if origin == None: 813 origin = [0, 0, 0] 814 815 # Convert the args to numpy float data structures. 816 R = array(R, float64) 817 origin = array(origin, float64) 818 819 # Call the specific code. 820 cdp.structure.rotate(R=R, origin=origin, model=model, atom_id=atom_id)

821 822

823 -def set_vector(spin=None, xh_vect=None):

824 """Place the XH unit vector into the spin container object. 825 826 @keyword spin: The spin container object. 827 @type spin: SpinContainer instance 828 @keyword xh_vect: The unit vector parallel to the XH bond. 829 @type xh_vect: array of len 3 830 """ 831 832 # Place the XH unit vector into the container. 833 spin.xh_vect = xh_vect

834 835

836 -def superimpose(models=None, method='fit to mean', atom_id=None, centre_type="centroid", centroid=None):

837 """Superimpose a set of structural models. 838 839 @keyword models: The list of models to superimpose. If set to None, then all models will be used. 840 @type models: list of int or None 841 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'. 842 @type method: str 843 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 844 @type atom_id: str or None 845 @keyword centre_type: The type of centre to superimpose over. This can either be the standard centroid superimposition or the CoM could be used instead. 846 @type centre_type: str 847 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions. 848 @type centroid: list of float or numpy rank-1, 3D array 849 """ 850 851 # Check the method. 852 allowed = ['fit to mean', 'fit to first'] 853 if method not in allowed: 854 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed)) 855 856 # Check the type. 857 allowed = ['centroid', 'CoM'] 858 if centre_type not in allowed: 859 raise RelaxError("The superimposition centre type '%s' is unknown. It must be one of %s." % (centre_type, allowed)) 860 861 # Test if the current data pipe exists. 862 pipes.test() 863 864 # Validate the models. 865 cdp.structure.validate_models() 866 867 # Create a list of all models. 868 if models == None: 869 models = [] 870 for model in cdp.structure.model_loop(): 871 models.append(model.num) 872 873 # Assemble the atomic coordinates of all models. 874 coord = [] 875 for model in models: 876 coord.append([]) 877 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 878 coord[-1].append(pos[0]) 879 coord[-1] = array(coord[-1]) 880 881 # Assemble the element types. 882 elements = [] 883 for elem in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, element_flag=True): 884 elements.append(elem) 885 886 # The different algorithms. 887 if method == 'fit to mean': 888 T, R, pivot = fit_to_mean(models=models, coord=coord, centre_type=centre_type, elements=elements, centroid=centroid) 889 elif method == 'fit to first': 890 T, R, pivot = fit_to_first(models=models, coord=coord, centre_type=centre_type, elements=elements, centroid=centroid) 891 892 893 # Update to the new coordinates. 894 for i in range(len(models)): 895 # Translate the molecule first (the rotational pivot is defined in the first model). 896 translate(T=T[i], model=models[i]) 897 898 # Rotate the molecule. 899 rotate(R=R[i], origin=pivot[i], model=models[i])

900 901

902 -def translate(T=None, model=None, atom_id=None):

903 """Shift the structural data by the specified translation vector. 904 905 @keyword T: The translation vector. 906 @type T: numpy rank-1, 3D array or list of float 907 @keyword model: The model to translate. If None, all models will be rotated. 908 @type model: int or None 909 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 910 @type atom_id: str or None 911 """ 912 913 # Test if the current data pipe exists. 914 pipes.test() 915 916 # Test if the structure exists. 917 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 918 raise RelaxNoPdbError 919 920 # Convert the args to numpy float data structures. 921 T = array(T, float64) 922 923 # Call the specific code. 924 cdp.structure.translate(T=T, model=model, atom_id=atom_id)

925 926

927 -def vectors(spin_id1=None, spin_id2=None, model=None, verbosity=1, ave=True, unit=True):

928 """Extract the bond vectors from the loaded structures and store them in the spin container. 929 930 @keyword spin_id1: The spin identifier string of the first spin of the pair. 931 @type spin_id1: str 932 @keyword spin_id2: The spin identifier string of the second spin of the pair. 933 @type spin_id2: str 934 @keyword model: The model to extract the vector from. If None, all vectors will be extracted. 935 @type model: str 936 @keyword verbosity: The higher the value, the more information is printed to screen. 937 @type verbosity: int 938 @keyword ave: A flag which if True will cause the average of all vectors to be extracted. 939 @type ave: bool 940 @keyword unit: A flag which if True will cause the function to calculate the unit vectors. 941 @type unit: bool 942 """ 943 944 # Test if the current data pipe exists. 945 pipes.test() 946 947 # Test if the PDB file has been loaded. 948 if not hasattr(cdp, 'structure'): 949 raise RelaxNoPdbError 950 951 # Test if sequence data is loaded. 952 if not exists_mol_res_spin_data(): 953 raise RelaxNoSequenceError 954 955 # Print out. 956 if verbosity: 957 # Number of models. 958 num_models = cdp.structure.num_models() 959 960 # Multiple models loaded. 961 if num_models > 1: 962 if model: 963 print("Extracting vectors for model '%s'." % model) 964 else: 965 print("Extracting vectors for all %s models." % num_models) 966 if ave: 967 print("Averaging all vectors.") 968 969 # Single model loaded. 970 else: 971 print("Extracting vectors from the single model.") 972 973 # Unit vectors. 974 if unit: 975 print("Calculating the unit vectors.") 976 977 # Loop over the spins. 978 no_vectors = True 979 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True): 980 # Skip deselected spins. 981 if not spin.select: 982 continue 983 984 # The spin identification string. The residue name and spin num is not included to allow molecules with point mutations to be used as different models. 985 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num) 986 987 # Test that the spin number or name are set (one or both are essential for the identification of the atom). 988 if spin.num == None and spin.name == None: 989 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule.")) 990 continue 991 992 # The bond vector already exists. 993 if hasattr(spin, 'vector'): 994 obj = getattr(spin, 'vector') 995 if obj != None: 996 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists.")) 997 continue 998 999 # Get the bond info. 1000 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True) 1001 id2 = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name) 1002 1003 # No attached atom. 1004 if not bond_vectors: 1005 # Warning messages. 1006 if warnings: 1007 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ").")) 1008 1009 # Skip the spin. 1010 continue 1011 1012 # Set the attached atom name. 1013 if not hasattr(spin, 'attached_atom'): 1014 spin.attached_atom = attached_name 1015 elif spin.attached_atom != attached_name: 1016 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".") 1017 1018 # Initialise the average vector. 1019 if ave: 1020 ave_vector = zeros(3, float64) 1021 1022 # Loop over the individual vectors. 1023 for i in range(len(bond_vectors)): 1024 # Unit vector. 1025 if unit: 1026 # Normalisation factor. 1027 norm_factor = norm(bond_vectors[i]) 1028 1029 # Test for zero length. 1030 if norm_factor == 0.0: 1031 warn(RelaxZeroVectorWarning(spin_id1=id, spin_id2=id2)) 1032 1033 # Calculate the normalised vector. 1034 else: 1035 bond_vectors[i] = bond_vectors[i] / norm_factor 1036 1037 # Sum the vectors. 1038 if ave: 1039 ave_vector = ave_vector + bond_vectors[i] 1040 1041 # Average. 1042 if ave: 1043 vector = ave_vector / float(len(bond_vectors)) 1044 else: 1045 vector = bond_vectors 1046 1047 # Convert to a single vector if needed. 1048 if len(vector) == 1: 1049 vector = vector[0] 1050 1051 # Set the vector. 1052 setattr(spin, 'vector', vector) 1053 1054 # We have a vector! 1055 no_vectors = False 1056 1057 # Print out of modified spins. 1058 if verbosity: 1059 # The number of vectors. 1060 num = len(bond_vectors) 1061 plural = 's' 1062 if num == 1: 1063 plural = '' 1064 1065 if spin.name: 1066 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id)) 1067 else: 1068 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id)) 1069 1070 # Right, catch the problem of missing vectors to prevent massive user confusion! 1071 if no_vectors: 1072 raise RelaxError("No vectors could be extracted.")

1073 1074

1075 -def web_of_motion(file=None, dir=None, models=None, force=False):

1076 """Create a PDB representation of the motion between a set of models. 1077 1078 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object. 1079 1080 @keyword file: The name of the PDB file to write. 1081 @type file: str 1082 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1083 @type dir: str or None 1084 @keyword models: The optional list of models to restrict this to. 1085 @type models: list of int or None 1086 @keyword force: The force flag which if True will cause the file to be overwritten. 1087 @type force: bool 1088 """ 1089 1090 # Test if the current data pipe exists. 1091 pipes.test() 1092 1093 # Test if the structure exists. 1094 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 1095 raise RelaxNoPdbError 1096 1097 # Validate the models. 1098 cdp.structure.validate_models() 1099 1100 # Initialise the structural object. 1101 web = Internal() 1102 1103 # The model list. 1104 if models == None: 1105 models = [] 1106 for k in range(len(cdp.structure.structural_data)): 1107 models.append(cdp.structure.structural_data[k].num) 1108 1109 # Loop over the molecules. 1110 for i in range(len(cdp.structure.structural_data[0].mol)): 1111 # Alias the molecule of the first model. 1112 mol1 = cdp.structure.structural_data[0].mol[i] 1113 1114 # Loop over the atoms. 1115 for j in range(len(mol1.atom_name)): 1116 # Loop over the models. 1117 for k in range(len(cdp.structure.structural_data)): 1118 # Skip the model. 1119 if cdp.structure.structural_data[k].num not in models: 1120 continue 1121 1122 # Alias. 1123 mol = cdp.structure.structural_data[k].mol[i] 1124 1125 # Add the atom. 1126 web.add_atom(mol_name=mol1.mol_name, atom_name=mol.atom_name[j], res_name=mol.res_name[j], res_num=mol.res_num[j], pos=[mol.x[j], mol.y[j], mol.z[j]], element=mol.element[j], chain_id=mol.chain_id[j], segment_id=mol.seg_id[j], pdb_record=mol.pdb_record[j]) 1127 1128 # Loop over the models again, this time twice. 1129 for k in range(len(models)): 1130 for l in range(len(models)): 1131 # Skip identical atoms. 1132 if k == l: 1133 continue 1134 1135 # The atom index. 1136 index1 = j*len(models) + k 1137 index2 = j*len(models) + l 1138 1139 # Connect to the previous atoms. 1140 web.connect_atom(mol_name=mol1.mol_name, index1=index1, index2=index2) 1141 1142 # Append the PDB extension if needed. 1143 if isinstance(file, str): 1144 # The file path. 1145 file = get_file_path(file, dir) 1146 1147 # Add '.pdb' to the end of the file path if it isn't there yet. 1148 if not search(".pdb$", file): 1149 file += '.pdb' 1150 1151 # Open the file for writing. 1152 file = open_write_file(file, force=force) 1153 1154 # Write the structure. 1155 web.write_pdb(file)

1156 1157

1158 -def write_pdb(file=None, dir=None, model_num=None, compress_type=0, force=False):

1159 """The PDB writing function. 1160 1161 @keyword file: The name of the PDB file to write. This can also be a file instance. 1162 @type file: str or file instance 1163 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1164 @type dir: str or None 1165 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 1166 @type model_num: None or int 1167 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 1168 @type compress_type: int 1169 @keyword force: The force flag which if True will cause the file to be overwritten. 1170 @type force: bool 1171 """ 1172 1173 # Test if the current data pipe exists. 1174 pipes.test() 1175 1176 # Check if the structural object exists. 1177 if not hasattr(cdp, 'structure'): 1178 raise RelaxError("No structural data is present in the current data pipe.") 1179 1180 # Path handling. 1181 if isinstance(file, str): 1182 # The file path. 1183 file = get_file_path(file, dir) 1184 1185 # Add '.pdb' to the end of the file path if it isn't there yet. 1186 if not search(".pdb$", file): 1187 file = file + '.pdb' 1188 1189 # Open the file for writing. 1190 file = open_write_file(file, compress_type=compress_type, force=force) 1191 1192 # Write the structures. 1193 cdp.structure.write_pdb(file, model_num=model_num)

1194

Source Code for Module pipe_control.structure.main