pipe_control.structure.main

1 ############################################################################### 2 # # 3 # Copyright (C) 2003-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Python module imports. 23 from minfx.generic import generic_minimise 24 from numpy import array, float64, zeros 25 from numpy.linalg import norm 26 from os import F_OK, access, getcwd 27 from re import search 28 import sys 29 from warnings import warn 30 31 # relax module imports. 32 from lib.errors import RelaxError, RelaxFileError, RelaxNoPdbError, RelaxNoSequenceError 33 from lib.io import get_file_path, open_write_file, write_data, write_spin_data 34 from lib.structure.internal.displacements import Displacements 35 from lib.structure.internal.object import Internal 36 from lib.structure.represent.diffusion_tensor import diffusion_tensor 37 from lib.structure.statistics import atomic_rmsd 38 from lib.structure.superimpose import fit_to_first, fit_to_mean 39 from lib.warnings import RelaxWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning 40 from pipe_control import molmol, pipes 41 from pipe_control.interatomic import interatomic_loop 42 from pipe_control.mol_res_spin import create_spin, exists_mol_res_spin_data, generate_spin_id_unique, linear_ave, return_spin, spin_loop 43 from pipe_control.structure.mass import pipe_centre_of_mass 44 from status import Status; status = Status() 45 from target_functions.ens_pivot_finder import Pivot_finder 46 47

48 -def add_atom(mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):

49 """Add a new atom to the structural data object. 50 51 @keyword mol_name: The name of the molecule. 52 @type mol_name: str 53 @keyword atom_name: The atom name, e.g. 'H1'. 54 @type atom_name: str or None 55 @keyword res_name: The residue name. 56 @type res_name: str or None 57 @keyword res_num: The residue number. 58 @type res_num: int or None 59 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 60 @type pos: rank-1 or rank-2 array or list of float 61 @keyword element: The element symbol. 62 @type element: str or None 63 @keyword atom_num: The atom number. 64 @type atom_num: int or None 65 @keyword chain_id: The chain identifier. 66 @type chain_id: str or None 67 @keyword segment_id: The segment identifier. 68 @type segment_id: str or None 69 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 70 @type pdb_record: str or None 71 """ 72 73 # Test if the current data pipe exists. 74 pipes.test() 75 76 # Place the structural object into the relax data store if needed. 77 if not hasattr(cdp, 'structure'): 78 cdp.structure = Internal() 79 80 # Add the atoms. 81 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record)

82 83

84 -def add_model(model_num=None):

85 """Add a new model to the empty structural data object.""" 86 87 # Test if the current data pipe exists. 88 pipes.test() 89 90 # Place the structural object into the relax data store if needed. 91 if not hasattr(cdp, 'structure'): 92 cdp.structure = Internal() 93 94 # Check the structural object is empty. 95 if cdp.structure.num_molecules() != 0: 96 raise RelaxError("The internal structural object is not empty.") 97 98 # Add a model. 99 cdp.structure.structural_data.add_item(model_num=model_num) 100 print("Created the empty model number %s." % model_num)

101 102

103 -def connect_atom(index1=None, index2=None):

104 """Connect two atoms. 105 106 @keyword index1: The global index of the first atom. 107 @type index1: str 108 @keyword index2: The global index of the first atom. 109 @type index2: str 110 """ 111 112 # Test if the current data pipe exists. 113 pipes.test() 114 115 # Place the structural object into the relax data store if needed. 116 if not hasattr(cdp, 'structure'): 117 cdp.structure = Internal() 118 119 # Add the atoms. 120 cdp.structure.connect_atom(index1=index1, index2=index2)

121 122

123 -def create_cone_pdb(mol=None, cone=None, start_res=1, apex=None, axis=None, R=None, inc=None, scale=30.0, distribution='regular', file=None, dir=None, force=False, axis_flag=True):

124 """Create a PDB representation of the given cone object. 125 126 @keyword mol: The molecule container. 127 @type mol: MolContainer instance 128 @keyword cone: The cone object. This should provide the limit_check() method with determines the limits of the distribution accepting two arguments, the polar angle phi and the azimuthal angle theta, and return True if the point is in the limits or False if outside. It should also provide the theta_max() method for returning the theta value for the given phi, the phi_max() method for returning the phi value for the given theta. 129 @type cone: class instance 130 @keyword start_res: The starting residue number. 131 @type start_res: str 132 @keyword apex: The apex of the cone. 133 @type apex: rank-1, 3D numpy array 134 @keyword axis: The central axis of the cone. If not supplied, the z-axis will be used. 135 @type axis: rank-1, 3D numpy array 136 @keyword R: The rotation matrix. 137 @type R: rank-2, 3D numpy array 138 @keyword inc: The increment number used to determine the number of latitude and longitude lines. 139 @type inc: int 140 @keyword scale: The scaling factor to stretch the unit cone by. 141 @type scale: float 142 @keyword distribution: The type of point distribution to use. This can be 'uniform' or 'regular'. 143 @type distribution: str 144 @keyword file: The name of the PDB file to create. 145 @type file: str 146 @keyword dir: The name of the directory to place the PDB file into. 147 @type dir: str 148 @keyword force: Flag which if set to True will overwrite any pre-existing file. 149 @type force: bool 150 @keyword axis_flag: A flag which if True will create the cone's axis. 151 @type axis_flag: bool 152 """ 153 154 # No molecule supplied. 155 if mol == None: 156 # Create the structural object. 157 structure = Internal() 158 159 # Add a molecule. 160 structure.add_molecule(name='cone') 161 162 # Alias the single molecule from the single model. 163 mol = structure.structural_data[0].mol[0] 164 165 # Create the object. 166 cone(mol=mol, cone=cone, start_res=start_res, apex=apex, axis=axis, R=R, inc=inc, scale=scale, distribution=distribution, axis_flag=axis_flag) 167 168 # Create the PDB file. 169 if file != None: 170 print("\nGenerating the PDB file.") 171 pdb_file = open_write_file(file_name=file, dir=dir, force=force) 172 structure.write_pdb(pdb_file) 173 pdb_file.close() 174 175 # Add the file to the results file list. 176 if not hasattr(cdp, 'result_files'): 177 cdp.result_files = [] 178 cdp.result_files.append(['cone_pdb', 'Cone PDB', get_file_path(file, dir)]) 179 status.observers.result_file.notify()

180 181

182 -def create_diff_tensor_pdb(scale=1.8e-6, file=None, dir=None, force=False):

183 """Create the PDB representation of the diffusion tensor. 184 185 @keyword scale: The scaling factor for the diffusion tensor. 186 @type scale: float 187 @keyword file: The name of the PDB file to create. 188 @type file: str 189 @keyword dir: The name of the directory to place the PDB file into. 190 @type dir: str 191 @keyword force: Flag which if set to True will overwrite any pre-existing file. 192 @type force: bool 193 """ 194 195 # Test if the current data pipe exists. 196 pipes.test() 197 198 # Calculate the centre of mass. 199 com = pipe_centre_of_mass() 200 201 # Create the structural object. 202 structure = Internal() 203 204 # Create an array of data pipes to loop over (hybrid support). 205 if cdp.pipe_type == 'hybrid': 206 pipe_list = cdp.hybrid_pipes 207 else: 208 pipe_list = [pipes.cdp_name()] 209 210 # The molecule names. 211 if cdp.pipe_type == 'hybrid': 212 mol_names = [] 213 for pipe in pipe_list: 214 mol_names.append('diff_tensor_' % pipe) 215 else: 216 mol_names = ['diff_tensor'] 217 218 # Loop over the pipes. 219 for pipe_index in range(len(pipe_list)): 220 # Get the pipe container. 221 pipe = pipes.get_pipe(pipe_list[pipe_index]) 222 223 # Test if the diffusion tensor data is loaded. 224 if not hasattr(pipe, 'diff_tensor'): 225 raise RelaxNoTensorError('diffusion') 226 227 # Test if a structure has been loaded. 228 if not hasattr(cdp, 'structure'): 229 raise RelaxNoPdbError 230 231 # Add a new structure. 232 structure.add_molecule(name=mol_names[pipe_index]) 233 234 # Alias the single molecule from the single model. 235 mol = structure.get_molecule(mol_names[pipe_index]) 236 237 # The diffusion tensor type. 238 diff_type = pipe.diff_tensor.type 239 if diff_type == 'spheroid': 240 diff_type = pipe.diff_tensor.spheroid_type 241 242 # Simulation info. 243 sim_num = None 244 if hasattr(pipe.diff_tensor, 'tm_sim'): 245 # The number. 246 sim_num = len(pipe.diff_tensor.tm_sim) 247 248 # Tensor axes. 249 axes = [] 250 sim_axes = [] 251 if diff_type in ['oblate', 'prolate']: 252 axes.append(pipe.diff_tensor.Dpar * pipe.diff_tensor.Dpar_unit) 253 sim_axes.append([]) 254 if sim_num != None: 255 for i in range(sim_num): 256 sim_axes[0].append(pipe.diff_tensor.Dpar_sim[i] * pipe.diff_tensor.Dpar_unit_sim[i]) 257 258 if diff_type == 'ellipsoid': 259 axes.append(pipe.diff_tensor.Dx * pipe.diff_tensor.Dx_unit) 260 axes.append(pipe.diff_tensor.Dy * pipe.diff_tensor.Dy_unit) 261 axes.append(pipe.diff_tensor.Dz * pipe.diff_tensor.Dz_unit) 262 sim_axes.append([]) 263 sim_axes.append([]) 264 sim_axes.append([]) 265 if sim_num != None: 266 for i in range(sim_num): 267 sim_axes[0].append(pipe.diff_tensor.Dx_sim[i] * pipe.diff_tensor.Dx_unit_sim[i]) 268 sim_axes[1].append(pipe.diff_tensor.Dy_sim[i] * pipe.diff_tensor.Dy_unit_sim[i]) 269 sim_axes[2].append(pipe.diff_tensor.Dz_sim[i] * pipe.diff_tensor.Dz_unit_sim[i]) 270 271 # Create the object. 272 diffusion_tensor(mol=mol, tensor=pipe.diff_tensor.tensor, tensor_diag=pipe.diff_tensor.tensor_diag, diff_type=diff_type, rotation=pipe.diff_tensor.rotation, axes=axes, sim_axes=sim_axes, com=com, scale=scale) 273 274 275 # Create the PDB file. 276 ###################### 277 278 # Print out. 279 print("\nGenerating the PDB file.") 280 281 # Create the PDB file. 282 tensor_pdb_file = open_write_file(file, dir, force=force) 283 structure.write_pdb(tensor_pdb_file) 284 tensor_pdb_file.close() 285 286 # Add the file to the results file list. 287 if not hasattr(cdp, 'result_files'): 288 cdp.result_files = [] 289 if dir == None: 290 dir = getcwd() 291 cdp.result_files.append(['diff_tensor_pdb', 'Diffusion tensor PDB', get_file_path(file, dir)]) 292 status.observers.result_file.notify()

293 294

295 -def delete(atom_id=None):

296 """Delete structural data. 297 298 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 299 @type atom_id: str or None 300 """ 301 302 # Test if the current data pipe exists. 303 pipes.test() 304 305 # Run the object method. 306 if hasattr(cdp, 'structure'): 307 print("Deleting structural data from the current pipe.") 308 cdp.structure.delete(atom_id=atom_id) 309 else: 310 print("No structures are present.") 311 312 # Then remove any spin specific structural info. 313 print("Deleting all spin specific structural info.") 314 for spin in spin_loop(selection=atom_id): 315 # Delete positional information. 316 if hasattr(spin, 'pos'): 317 del spin.pos 318 319 # Then remove any interatomic vector structural info. 320 print("Deleting all interatomic vectors.") 321 for interatom in interatomic_loop(selection1=atom_id): 322 # Delete bond vectors. 323 if hasattr(interatom, 'vector'): 324 del interatom.vector

325 326

327 -def displacement(model_from=None, model_to=None, atom_id=None, centroid=None):

328 """Calculate the rotational and translational displacement between two structural models. 329 330 @keyword model_from: The optional model number for the starting position of the displacement. 331 @type model_from: int or None 332 @keyword model_to: The optional model number for the ending position of the displacement. 333 @type model_to: int or None 334 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 335 @type atom_id: str or None 336 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems. 337 @type centroid: list of float or numpy rank-1, 3D array 338 """ 339 340 # Test if the current data pipe exists. 341 pipes.test() 342 343 # Convert the model_from and model_to args to lists, is supplied. 344 if model_from != None: 345 model_from = [model_from] 346 if model_to != None: 347 model_to = [model_to] 348 349 # Create a list of all models. 350 models = [] 351 for model in cdp.structure.model_loop(): 352 models.append(model.num) 353 354 # Set model_from or model_to to all models if None. 355 if model_from == None: 356 model_from = models 357 if model_to == None: 358 model_to = models 359 360 # Initialise the data structure. 361 if not hasattr(cdp.structure, 'displacements'): 362 cdp.structure.displacements = Displacements() 363 364 # Loop over the starting models. 365 for i in range(len(model_from)): 366 # Assemble the atomic coordinates. 367 coord_from = [] 368 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_from[i], pos_flag=True): 369 coord_from.append(pos[0]) 370 371 # Loop over the ending models. 372 for j in range(len(model_to)): 373 # Assemble the atomic coordinates. 374 coord_to = [] 375 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model_to[j], pos_flag=True): 376 coord_to.append(pos[0]) 377 378 # Send to the base container for the calculations. 379 cdp.structure.displacements._calculate(model_from=model_from[i], model_to=model_to[j], coord_from=array(coord_from), coord_to=array(coord_to), centroid=centroid)

380 381

382 -def find_pivot(models=None, atom_id=None, init_pos=None, func_tol=1e-5, box_limit=200):

383 """Superimpose a set of structural models. 384 385 @keyword models: The list of models to use. If set to None, then all models will be used. 386 @type models: list of int or None 387 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 388 @type atom_id: str or None 389 @keyword init_pos: The starting pivot position for the pivot point optimisation. 390 @type init_pos: list of float or numpy rank-1, 3D array 391 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check. 392 @type func_tol: None or float 393 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x. 394 @type box_limit: int 395 """ 396 397 # Test if the current data pipe exists. 398 pipes.test() 399 400 # Initialised the starting position if needed. 401 if init_pos == None: 402 init_pos = zeros(3, float64) 403 init_pos = array(init_pos) 404 405 # Validate the models. 406 cdp.structure.validate_models() 407 408 # Create a list of all models. 409 if models == None: 410 models = [] 411 for model in cdp.structure.model_loop(): 412 models.append(model.num) 413 414 # Assemble the atomic coordinates of all models. 415 coord = [] 416 for model in models: 417 coord.append([]) 418 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 419 coord[-1].append(pos[0]) 420 coord[-1] = array(coord[-1]) 421 coord = array(coord) 422 423 # Linear constraints for the pivot position (between -1000 and 1000 Angstrom). 424 A = zeros((6, 3), float64) 425 b = zeros(6, float64) 426 for i in range(3): 427 A[2*i, i] = 1 428 A[2*i+1, i] = -1 429 b[2*i] = -box_limit 430 b[2*i+1] = -box_limit 431 432 # The target function. 433 finder = Pivot_finder(models, coord) 434 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1) 435 436 # No result. 437 if results == None: 438 return 439 440 # Store the data. 441 cdp.structure.pivot = results 442 443 # Print out. 444 print("Motional pivot found at: %s" % results)

445 446

447 -def get_pos(spin_id=None, str_id=None, ave_pos=False):

448 """Load the spins from the structural object into the relax data store. 449 450 @keyword spin_id: The molecule, residue, and spin identifier string. 451 @type spin_id: str 452 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 453 @type str_id: int or str 454 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 455 @type ave_pos: bool 456 """ 457 458 # Test if the current data pipe exists. 459 pipes.test() 460 461 # Test if the structure exists. 462 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 463 raise RelaxNoPdbError 464 465 # Loop over all atoms of the spin_id selection. 466 data = [] 467 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 468 # Remove the '+' regular expression character from the mol, res, and spin names! 469 if mol_name and search('\+', mol_name): 470 mol_name = mol_name.replace('+', '') 471 if res_name and search('\+', res_name): 472 res_name = res_name.replace('+', '') 473 if atom_name and search('\+', atom_name): 474 atom_name = atom_name.replace('+', '') 475 476 # The spin identification string. 477 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name) 478 479 # Get the spin container. 480 spin_cont = return_spin(id) 481 482 # Skip the spin if it doesn't exist. 483 if spin_cont == None: 484 continue 485 486 # Add the position vector to the spin container. 487 spin_cont.pos = pos 488 489 # Store the data for a printout at the end. 490 data.append([id, repr(pos)]) 491 492 # No positions found. 493 if not len(data): 494 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id) 495 496 # Update pseudo-atoms. 497 for spin in spin_loop(): 498 if hasattr(spin, 'members'): 499 # Get the spin positions. 500 positions = [] 501 for atom in spin.members: 502 # Get the spin container. 503 subspin = return_spin(atom) 504 505 # Test that the spin exists. 506 if subspin == None: 507 raise RelaxNoSpinError(atom) 508 509 # Test the position. 510 if not hasattr(subspin, 'pos') or subspin.pos == None or not len(subspin.pos): 511 raise RelaxError("Positional information is not available for the atom '%s'." % atom) 512 513 # Alias the position. 514 pos = subspin.pos 515 516 # Convert to a list of lists if not already. 517 multi_model = True 518 if type(pos[0]) in [float, float64]: 519 multi_model = False 520 pos = [pos] 521 522 # Store the position. 523 positions.append([]) 524 for i in range(len(pos)): 525 positions[-1].append(pos[i].tolist()) 526 527 # The averaging. 528 if spin.averaging == 'linear': 529 # Average pos. 530 ave = linear_ave(positions) 531 532 # Convert to the correct structure. 533 if multi_model: 534 spin.pos = ave 535 else: 536 spin.pos = ave[0] 537 538 # Print out. 539 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)

540 541

542 -def load_spins(spin_id=None, str_id=None, mol_name_target=None, ave_pos=False):

543 """Load the spins from the structural object into the relax data store. 544 545 @keyword spin_id: The molecule, residue, and spin identifier string. 546 @type spin_id: str 547 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 548 @type str_id: int or str 549 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures 550 @type mol_name_target: str or None 551 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 552 @type ave_pos: bool 553 """ 554 555 # Test if the current data pipe exists. 556 pipes.test() 557 558 # Test if the structure exists. 559 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 560 raise RelaxNoPdbError 561 562 # Print out. 563 print("Adding the following spins to the relax data store.\n") 564 565 # Init the data for printing out. 566 mol_names = [] 567 res_nums = [] 568 res_names = [] 569 spin_nums = [] 570 spin_names = [] 571 572 # Loop over all atoms of the spin_id selection. 573 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(atom_id=spin_id, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 574 # Override the molecule name. 575 if mol_name_target: 576 mol_name = mol_name_target 577 578 # Remove the '+' regular expression character from the mol, res, and spin names! 579 if mol_name and search('\+', mol_name): 580 mol_name = mol_name.replace('+', '') 581 if res_name and search('\+', res_name): 582 res_name = res_name.replace('+', '') 583 if atom_name and search('\+', atom_name): 584 atom_name = atom_name.replace('+', '') 585 586 # Generate a spin ID for the current atom. 587 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 588 589 # Create the spin. 590 try: 591 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 592 593 # Otherwise, get the spin container. 594 except RelaxError: 595 spin_cont = return_spin(id) 596 597 # Append all the spin ID info for printing later. 598 if mol_name_target: 599 mol_names.append(mol_name_target) 600 else: 601 mol_names.append(mol_name) 602 res_nums.append(res_num) 603 res_names.append(res_name) 604 spin_nums.append(atom_num) 605 spin_names.append(atom_name) 606 607 # Position vector. 608 spin_cont.pos = pos 609 610 # Add the element. 611 spin_cont.element = element 612 613 # Catch no data. 614 if len(mol_names) == 0: 615 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id)) 616 return 617 618 # Print out. 619 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)

620 621

622 -def read_gaussian(file=None, dir=None, set_mol_name=None, set_model_num=None, verbosity=1, fail=True):

623 """Read structures from a Gaussian log file. 624 625 626 @keyword file: The name of the Gaussian log file to read. 627 @type file: str 628 @keyword dir: The directory where the Gaussian log file is located. If set to None, then the file will be searched for in the current directory. 629 @type dir: str or None 630 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 631 @type set_mol_name: None, str, or list of str 632 @keyword set_model_num: Set the model number of the loaded molecule. 633 @type set_model_num: None, int, or list of int 634 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the Gaussian log file does not exist. If False, then a RelaxWarning will be trown instead. 635 @type fail: bool 636 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 637 @type verbosity: int 638 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the Gaussian log file does not exist. 639 """ 640 641 # Test if the current data pipe exists. 642 pipes.test() 643 644 # The file path. 645 file_path = get_file_path(file, dir) 646 647 # Try adding '.log' to the end of the file path, if the file can't be found. 648 if not access(file_path, F_OK): 649 file_path_orig = file_path 650 file_path = file_path + '.log' 651 652 # Test if the file exists. 653 if not access(file_path, F_OK): 654 if fail: 655 raise RelaxFileError('Gaussian log', file_path_orig) 656 else: 657 warn(RelaxNoPDBFileWarning(file_path)) 658 return 659 660 # Place the structural object into the relax data store. 661 if not hasattr(cdp, 'structure'): 662 cdp.structure = Internal() 663 664 # Load the structures. 665 cdp.structure.load_gaussian(file_path, set_mol_name=set_mol_name, set_model_num=set_model_num, verbosity=verbosity)

666 667

668 -def read_pdb(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=1, merge=False, fail=True):

669 """The PDB loading function. 670 671 @keyword file: The name of the PDB file to read. 672 @type file: str 673 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory. 674 @type dir: str or None 675 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are numbered consecutively from 1. If set to None, then all molecules will be loaded. 676 @type read_mol: None, int, or list of int 677 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 678 @type set_mol_name: None, str, or list of str 679 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 680 @type read_model: None, int, or list of int 681 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 682 @type set_model_num: None, int, or list of int 683 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 684 @type alt_loc: str or None 685 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 686 @type verbosity: int 687 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 688 @type merge: bool 689 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead. 690 @type fail: bool 691 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist. 692 """ 693 694 # Test if the current data pipe exists. 695 pipes.test() 696 697 # The file path. 698 file_path = get_file_path(file, dir) 699 700 # Try adding file extensions to the end of the file path, if the file can't be found. 701 file_path_orig = file_path 702 if not access(file_path, F_OK): 703 # List of possible extensions. 704 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']: 705 # Add the extension if the file can be found. 706 if access(file_path+ext, F_OK): 707 file_path = file_path + ext 708 709 # Test if the file exists. 710 if not access(file_path, F_OK): 711 if fail: 712 raise RelaxFileError('PDB', file_path_orig) 713 else: 714 warn(RelaxNoPDBFileWarning(file_path)) 715 return 716 717 # Place the internal structural object into the relax data store. 718 if not hasattr(cdp, 'structure'): 719 cdp.structure = Internal() 720 721 # Load the structures. 722 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge) 723 724 # Load into Molmol (if running). 725 molmol.molmol_obj.open_pdb()

726 727

728 -def read_xyz(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=1, fail=True):

729 """The XYZ loading function. 730 731 732 @keyword file: The name of the XYZ file to read. 733 @type file: str 734 @keyword dir: The directory where the XYZ file is located. If set to None, then the 735 file will be searched for in the current directory. 736 @type dir: str or None 737 @keyword read_mol: The molecule(s) to read from the file, independent of model. 738 If set to None, then all molecules will be loaded. 739 @type read_mol: None, int, or list of int 740 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then 741 the molecules will be automatically labelled based on the file name or 742 other information. 743 @type set_mol_name: None, str, or list of str 744 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models 745 will be loaded. 746 @type read_model: None, int, or list of int 747 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the 748 XYZ model numbers will be preserved, if they exist. 749 @type set_model_num: None, int, or list of int 750 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ 751 file does not exist. If False, then a RelaxWarning will be trown 752 instead. 753 @type fail: bool 754 @keyword verbosity: The amount of information to print to screen. Zero corresponds to 755 minimal output while higher values increase the amount of output. The 756 default value is 1. 757 @type verbosity: int 758 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file 759 does not exist. 760 """ 761 762 # Test if the current data pipe exists. 763 pipes.test() 764 765 # The file path. 766 file_path = get_file_path(file, dir) 767 768 # Try adding '.xyz' to the end of the file path, if the file can't be found. 769 if not access(file_path, F_OK): 770 file_path_orig = file_path 771 file_path = file_path + '.xyz' 772 773 # Test if the file exists. 774 if not access(file_path, F_OK): 775 if fail: 776 raise RelaxFileError('XYZ', file_path_orig) 777 else: 778 warn(RelaxNoPDBFileWarning(file_path)) 779 return 780 781 # Place the structural object into the relax data store. 782 if not hasattr(cdp, 'structure'): 783 cdp.structure = Internal() 784 785 # Load the structures. 786 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)

787 788

789 -def rmsd(atom_id=None, models=None):

790 """Calculate the RMSD between the loaded models. 791 792 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 793 @type atom_id: str or None 794 @keyword models: The list of models to calculate the RMDS of. If set to None, then all models will be used. 795 @type models: list of int or None 796 @return: The RMSD value. 797 @rtype: float 798 """ 799 800 # Test if the current data pipe exists. 801 pipes.test() 802 803 # Create a list of all models. 804 if models == None: 805 models = [] 806 for model in cdp.structure.model_loop(): 807 models.append(model.num) 808 809 # Assemble the atomic coordinates of all models. 810 coord = [] 811 for model in models: 812 coord.append([]) 813 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 814 coord[-1].append(pos[0]) 815 coord[-1] = array(coord[-1]) 816 817 # Calculate the RMSD. 818 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1) 819 820 # Return the RMSD. 821 return cdp.structure.rmsd

822 823

824 -def rotate(R=None, origin=None, model=None, atom_id=None):

825 """Rotate the structural data about the origin by the specified forwards rotation. 826 827 @keyword R: The forwards rotation matrix. 828 @type R: numpy 3D, rank-2 array or a 3x3 list of floats 829 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0]. 830 @type origin: numpy 3D, rank-1 array or list of len 3 or None 831 @keyword model: The model to rotate. If None, all models will be rotated. 832 @type model: int 833 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 834 @type atom_id: str or None 835 """ 836 837 # Test if the current data pipe exists. 838 pipes.test() 839 840 # Test if the structure exists. 841 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 842 raise RelaxNoPdbError 843 844 # Set the origin if not supplied. 845 if origin == None: 846 origin = [0, 0, 0] 847 848 # Convert the args to numpy float data structures. 849 R = array(R, float64) 850 origin = array(origin, float64) 851 852 # Call the specific code. 853 cdp.structure.rotate(R=R, origin=origin, model=model, atom_id=atom_id)

854 855

856 -def set_vector(spin=None, xh_vect=None):

857 """Place the XH unit vector into the spin container object. 858 859 @keyword spin: The spin container object. 860 @type spin: SpinContainer instance 861 @keyword xh_vect: The unit vector parallel to the XH bond. 862 @type xh_vect: array of len 3 863 """ 864 865 # Place the XH unit vector into the container. 866 spin.xh_vect = xh_vect

867 868

869 -def superimpose(models=None, method='fit to mean', atom_id=None, centroid=None):

870 """Superimpose a set of structural models. 871 872 @keyword models: The list of models to superimpose. If set to None, then all models will be used. 873 @type models: list of int or None 874 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'. 875 @type method: str 876 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 877 @type atom_id: str or None 878 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions. 879 @type centroid: list of float or numpy rank-1, 3D array 880 """ 881 882 # Check the method. 883 allowed = ['fit to mean', 'fit to first'] 884 if method not in allowed: 885 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed)) 886 887 # Test if the current data pipe exists. 888 pipes.test() 889 890 # Validate the models. 891 cdp.structure.validate_models() 892 893 # Create a list of all models. 894 if models == None: 895 models = [] 896 for model in cdp.structure.model_loop(): 897 models.append(model.num) 898 899 # Assemble the atomic coordinates of all models. 900 coord = [] 901 for model in models: 902 coord.append([]) 903 for pos in cdp.structure.atom_loop(atom_id=atom_id, model_num=model, pos_flag=True): 904 coord[-1].append(pos[0]) 905 coord[-1] = array(coord[-1]) 906 907 # The different algorithms. 908 if method == 'fit to mean': 909 T, R, pivot = fit_to_mean(models=models, coord=coord, centroid=centroid) 910 elif method == 'fit to first': 911 T, R, pivot = fit_to_first(models=models, coord=coord, centroid=centroid) 912 913 914 # Update to the new coordinates. 915 for i in range(len(models)): 916 # Translate the molecule first (the rotational pivot is defined in the first model). 917 translate(T=T[i], model=models[i]) 918 919 # Rotate the molecule. 920 rotate(R=R[i], origin=pivot[i], model=models[i])

921 922

923 -def translate(T=None, model=None, atom_id=None):

924 """Shift the structural data by the specified translation vector. 925 926 @keyword T: The translation vector. 927 @type T: numpy rank-1, 3D array or list of float 928 @keyword model: The model to translate. If None, all models will be rotated. 929 @type model: int or None 930 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 931 @type atom_id: str or None 932 """ 933 934 # Test if the current data pipe exists. 935 pipes.test() 936 937 # Test if the structure exists. 938 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 939 raise RelaxNoPdbError 940 941 # Convert the args to numpy float data structures. 942 T = array(T, float64) 943 944 # Call the specific code. 945 cdp.structure.translate(T=T, model=model, atom_id=atom_id)

946 947

948 -def vectors(spin_id1=None, spin_id2=None, model=None, verbosity=1, ave=True, unit=True):

949 """Extract the bond vectors from the loaded structures and store them in the spin container. 950 951 @keyword spin_id1: The spin identifier string of the first spin of the pair. 952 @type spin_id1: str 953 @keyword spin_id2: The spin identifier string of the second spin of the pair. 954 @type spin_id2: str 955 @keyword model: The model to extract the vector from. If None, all vectors will be extracted. 956 @type model: str 957 @keyword verbosity: The higher the value, the more information is printed to screen. 958 @type verbosity: int 959 @keyword ave: A flag which if True will cause the average of all vectors to be extracted. 960 @type ave: bool 961 @keyword unit: A flag which if True will cause the function to calculate the unit vectors. 962 @type unit: bool 963 """ 964 965 # Test if the current data pipe exists. 966 pipes.test() 967 968 # Test if the PDB file has been loaded. 969 if not hasattr(cdp, 'structure'): 970 raise RelaxNoPdbError 971 972 # Test if sequence data is loaded. 973 if not exists_mol_res_spin_data(): 974 raise RelaxNoSequenceError 975 976 # Print out. 977 if verbosity: 978 # Number of models. 979 num_models = cdp.structure.num_models() 980 981 # Multiple models loaded. 982 if num_models > 1: 983 if model: 984 print("Extracting vectors for model '%s'." % model) 985 else: 986 print("Extracting vectors for all %s models." % num_models) 987 if ave: 988 print("Averaging all vectors.") 989 990 # Single model loaded. 991 else: 992 print("Extracting vectors from the single model.") 993 994 # Unit vectors. 995 if unit: 996 print("Calculating the unit vectors.") 997 998 # Loop over the spins. 999 no_vectors = True 1000 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True): 1001 # Skip deselected spins. 1002 if not spin.select: 1003 continue 1004 1005 # The spin identification string. The residue name and spin num is not included to allow molecules with point mutations to be used as different models. 1006 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num) 1007 1008 # Test that the spin number or name are set (one or both are essential for the identification of the atom). 1009 if spin.num == None and spin.name == None: 1010 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule.")) 1011 continue 1012 1013 # The bond vector already exists. 1014 if hasattr(spin, 'vector'): 1015 obj = getattr(spin, 'vector') 1016 if obj != None: 1017 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists.")) 1018 continue 1019 1020 # Get the bond info. 1021 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True) 1022 id2 = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name) 1023 1024 # No attached atom. 1025 if not bond_vectors: 1026 # Warning messages. 1027 if warnings: 1028 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ").")) 1029 1030 # Skip the spin. 1031 continue 1032 1033 # Set the attached atom name. 1034 if not hasattr(spin, 'attached_atom'): 1035 spin.attached_atom = attached_name 1036 elif spin.attached_atom != attached_name: 1037 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".") 1038 1039 # Initialise the average vector. 1040 if ave: 1041 ave_vector = zeros(3, float64) 1042 1043 # Loop over the individual vectors. 1044 for i in range(len(bond_vectors)): 1045 # Unit vector. 1046 if unit: 1047 # Normalisation factor. 1048 norm_factor = norm(bond_vectors[i]) 1049 1050 # Test for zero length. 1051 if norm_factor == 0.0: 1052 warn(RelaxZeroVectorWarning(spin_id1=id, spin_id2=id2)) 1053 1054 # Calculate the normalised vector. 1055 else: 1056 bond_vectors[i] = bond_vectors[i] / norm_factor 1057 1058 # Sum the vectors. 1059 if ave: 1060 ave_vector = ave_vector + bond_vectors[i] 1061 1062 # Average. 1063 if ave: 1064 vector = ave_vector / float(len(bond_vectors)) 1065 else: 1066 vector = bond_vectors 1067 1068 # Convert to a single vector if needed. 1069 if len(vector) == 1: 1070 vector = vector[0] 1071 1072 # Set the vector. 1073 setattr(spin, 'vector', vector) 1074 1075 # We have a vector! 1076 no_vectors = False 1077 1078 # Print out of modified spins. 1079 if verbosity: 1080 # The number of vectors. 1081 num = len(bond_vectors) 1082 plural = 's' 1083 if num == 1: 1084 plural = '' 1085 1086 if spin.name: 1087 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id)) 1088 else: 1089 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id)) 1090 1091 # Right, catch the problem of missing vectors to prevent massive user confusion! 1092 if no_vectors: 1093 raise RelaxError("No vectors could be extracted.")

1094 1095

1096 -def web_of_motion(file=None, dir=None, models=None, force=False):

1097 """Create a PDB representation of the motion between a set of models. 1098 1099 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object. 1100 1101 @keyword file: The name of the PDB file to write. 1102 @type file: str 1103 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1104 @type dir: str or None 1105 @keyword models: The optional list of models to restrict this to. 1106 @type models: list of int or None 1107 @keyword force: The force flag which if True will cause the file to be overwritten. 1108 @type force: bool 1109 """ 1110 1111 # Test if the current data pipe exists. 1112 pipes.test() 1113 1114 # Test if the structure exists. 1115 if not hasattr(cdp, 'structure') or not cdp.structure.num_models() or not cdp.structure.num_molecules(): 1116 raise RelaxNoPdbError 1117 1118 # Validate the models. 1119 cdp.structure.validate_models() 1120 1121 # Initialise the structural object. 1122 web = Internal() 1123 1124 # The model list. 1125 if models == None: 1126 models = [] 1127 for k in range(len(cdp.structure.structural_data)): 1128 models.append(cdp.structure.structural_data[k].num) 1129 1130 # Loop over the molecules. 1131 for i in range(len(cdp.structure.structural_data[0].mol)): 1132 # Alias the molecule of the first model. 1133 mol1 = cdp.structure.structural_data[0].mol[i] 1134 1135 # Loop over the atoms. 1136 for j in range(len(mol1.atom_name)): 1137 # Loop over the models. 1138 for k in range(len(cdp.structure.structural_data)): 1139 # Skip the model. 1140 if cdp.structure.structural_data[k].num not in models: 1141 continue 1142 1143 # Alias. 1144 mol = cdp.structure.structural_data[k].mol[i] 1145 1146 # Add the atom. 1147 web.add_atom(mol_name=mol1.mol_name, atom_name=mol.atom_name[j], res_name=mol.res_name[j], res_num=mol.res_num[j], pos=[mol.x[j], mol.y[j], mol.z[j]], element=mol.element[j], chain_id=mol.chain_id[j], segment_id=mol.seg_id[j], pdb_record=mol.pdb_record[j]) 1148 1149 # Loop over the models again, this time twice. 1150 for k in range(len(models)): 1151 for l in range(len(models)): 1152 # Skip identical atoms. 1153 if k == l: 1154 continue 1155 1156 # The atom index. 1157 index1 = j*len(models) + k 1158 index2 = j*len(models) + l 1159 1160 # Connect to the previous atoms. 1161 web.connect_atom(mol_name=mol1.mol_name, index1=index1, index2=index2) 1162 1163 # Append the PDB extension if needed. 1164 if isinstance(file, str): 1165 # The file path. 1166 file = get_file_path(file, dir) 1167 1168 # Add '.pdb' to the end of the file path if it isn't there yet. 1169 if not search(".pdb$", file): 1170 file += '.pdb' 1171 1172 # Open the file for writing. 1173 file = open_write_file(file, force=force) 1174 1175 # Write the structure. 1176 web.write_pdb(file)

1177 1178

1179 -def write_pdb(file=None, dir=None, model_num=None, compress_type=0, force=False):

1180 """The PDB writing function. 1181 1182 @keyword file: The name of the PDB file to write. This can also be a file instance. 1183 @type file: str or file instance 1184 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1185 @type dir: str or None 1186 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 1187 @type model_num: None or int 1188 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 1189 @type compress_type: int 1190 @keyword force: The force flag which if True will cause the file to be overwritten. 1191 @type force: bool 1192 """ 1193 1194 # Test if the current data pipe exists. 1195 pipes.test() 1196 1197 # Check if the structural object exists. 1198 if not hasattr(cdp, 'structure'): 1199 raise RelaxError("No structural data is present in the current data pipe.") 1200 1201 # Path handling. 1202 if isinstance(file, str): 1203 # The file path. 1204 file = get_file_path(file, dir) 1205 1206 # Add '.pdb' to the end of the file path if it isn't there yet. 1207 if not search(".pdb$", file): 1208 file = file + '.pdb' 1209 1210 # Open the file for writing. 1211 file = open_write_file(file, compress_type=compress_type, force=force) 1212 1213 # Write the structures. 1214 cdp.structure.write_pdb(file, model_num=model_num)

1215

Source Code for Module pipe_control.structure.main