pipe_control.structure.main

61 """Add a new atom to the structural data object. 62 63 @keyword mol_name: The name of the molecule. 64 @type mol_name: str 65 @keyword atom_name: The atom name, e.g. 'H1'. 66 @type atom_name: str or None 67 @keyword res_name: The residue name. 68 @type res_name: str or None 69 @keyword res_num: The residue number. 70 @type res_num: int or None 71 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models. 72 @type pos: rank-1 or rank-2 array or list of float 73 @keyword element: The element symbol. 74 @type element: str or None 75 @keyword atom_num: The atom number. 76 @type atom_num: int or None 77 @keyword chain_id: The chain identifier. 78 @type chain_id: str or None 79 @keyword segment_id: The segment identifier. 80 @type segment_id: str or None 81 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'. 82 @type pdb_record: str or None 83 """ 84 85 # Test if the current data pipe exists. 86 check_pipe() 87 88 # Place the structural object into the relax data store if needed. 89 if not hasattr(cdp, 'structure'): 90 cdp.structure = Internal() 91 92 # Add the atoms. 93 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record, sort=True)

116 """Assemble the common atomic coordinates taking sequence alignments into account. 117 118 @keyword pipes: The data pipes to assemble the coordinates from. 119 @type pipes: None or list of str 120 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used. 121 @type models: None or list of lists of int 122 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument. 123 @type molecules: None or list of lists of str 124 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 125 @type atom_id: str or None 126 @keyword lists: A flag which if true will cause the object ID list per molecule, the model number list per molecule, and the molecule name list per molecule to also be returned. 127 @type lists: bool 128 @return: The array of atomic coordinates (first dimension is the model and/or molecule, the second are the atoms, and the third are the coordinates); a list of unique IDs for each structural object, model, and molecule; the common list of molecule names; the common list of residue names; the common list of residue numbers; the common list of atom names; the common list of element names. 129 @rtype: numpy rank-3 float64 array, list of str, list of str, list of str, list of int, list of str, list of str 130 """ 131 132 # Assemble the structural objects. 133 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules) 134 135 # Assemble the atomic coordinates of all molecules. 136 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models, atom_id=atom_id) 137 138 # No data. 139 if mol_names == []: 140 if atom_id != None: 141 raise RelaxError("No structural data matching the atom ID string '%s' can be found." % atom_id) 142 else: 143 raise RelaxError("No structural data can be found.") 144 145 # Are all molecules the same? 146 same_mol = True 147 for mol in molecule_list: 148 if mol != molecule_list[0]: 149 same_mol = False 150 151 # Handle sequence alignments - retrieve the alignment. 152 align = None 153 if hasattr(ds, 'sequence_alignments'): 154 align = ds.sequence_alignments.find_alignment(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes) 155 if align != None: 156 # Printout. 157 print("\nSequence alignment found - common atoms will be determined based on this MSA:") 158 for i in range(len(align.object_ids)): 159 print(align.strings[i]) 160 161 # Alias the required data structures. 162 strings = align.strings 163 gaps = align.gaps 164 165 # Handle sequence alignments - no alignment required. 166 elif len(objects) == 1 and same_mol: 167 # Printout. 168 print("\nSequence alignment disabled as only models with identical molecule, residue and atomic sequences are being superimposed.") 169 170 # Set the one letter codes to be the alignment strings. 171 strings = one_letter_codes 172 173 # Create an empty gap data structure. 174 gaps = [] 175 for mol_index in range(num_mols): 176 gaps.append([]) 177 for i in range(len(one_letter_codes[mol_index])): 178 gaps[mol_index].append(0) 179 180 # Handle sequence alignments - fall back alignment based on residue numbering. 181 else: 182 # Printout. 183 print("\nSequence alignment cannot be found - falling back to a residue number based alignment.") 184 185 # Convert the residue number data structure. 186 res_num_list = [] 187 for mol_index in range(num_mols): 188 res_num_list.append([]) 189 for i in range(len(one_letter_codes[mol_index])): 190 key = list(res_nums[mol_index][i].keys())[0] 191 res_num_list[mol_index].append(res_nums[mol_index][i][key]) 192 193 # Sequence alignment. 194 strings, gaps = msa_residue_numbers(one_letter_codes, residue_numbers=res_num_list) 195 196 # Create the residue skipping data structure. 197 skip = msa_residue_skipping(strings=strings, gaps=gaps) 198 199 # Assemble and return the atomic coordinates and common atom information. 200 coord, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common = assemble_coord_array(atom_pos=atom_pos, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, sequences=one_letter_codes, skip=skip) 201 if lists: 202 return coord, ids, object_id_list, model_list, molecule_list, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common 203 else: 204 return coord, ids, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common

208 """Assemble the atomic coordinates. 209 210 @keyword pipes: The data pipes to assemble the coordinates from. 211 @type pipes: None or list of str 212 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used. 213 @type models: None or list of lists of int 214 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument. 215 @type molecules: None or list of lists of str 216 @return: The structural objects, structural object names, and data pipes list. 217 @rtype: list of lib.structure.internal.object.Internal instances, list of str, list of str 218 """ 219 220 # The data pipes to use. 221 if pipes == None: 222 pipes = [cdp_name()] 223 num_pipes = len(pipes) 224 225 # Checks. 226 for pipe in pipes: 227 check_pipe(pipe) 228 229 # Check the models and molecules arguments. 230 if models != None: 231 if len(models) != num_pipes: 232 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes)) 233 if molecules != None: 234 if len(molecules) != num_pipes: 235 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes)) 236 237 # Assemble the structural objects. 238 objects = [] 239 object_names = [] 240 for pipe_index in range(len(pipes)): 241 dp = get_pipe(pipes[pipe_index]) 242 objects.append(dp.structure) 243 object_names.append(pipes[pipe_index]) 244 245 # Return the structural objects, object names, and the new pipes list. 246 return objects, object_names, pipes

250 """Write out a correlation matrix of pairwise interatomic distance fluctuations between different structures. 251 252 @keyword pipes: The data pipes to generate the interatomic distance fluctuation correlation matrix for. 253 @type pipes: None or list of str 254 @keyword models: The list of models to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument. If set to None, then all models will be used. 255 @type models: None or list of lists of int 256 @keyword molecules: The list of molecules to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument. 257 @type molecules: None or list of lists of str 258 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. 259 @type atom_id: str or None 260 @keyword measure: The type of fluctuation to measure. This can be either 'distance' or 'angle'. 261 @type measure: str 262 @keyword file: The name of the file to write. 263 @type file: str 264 @keyword format: The output format. This can be set to "text" for text file output, or "gnuplot" for creating a gnuplot script. 265 @type format: str 266 @keyword dir: The directory where the file will be placed. If set to None, then the file will be placed in the current directory. 267 @type dir: str or None 268 @keyword force: The force flag which if True will cause the file to be overwritten. 269 @type force: bool 270 """ 271 272 # Checks. 273 check_pipe() 274 check_structure() 275 allowed_measures = ['distance', 'angle', 'parallax shift'] 276 if measure not in allowed_measures: 277 raise RelaxError("The measure '%s' must be one of %s." % (measure, allowed_measures)) 278 279 # Assemble the structural coordinates. 280 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 281 282 # The number of dimensions. 283 n = len(atom_names) 284 m = len(coord) 285 286 # Check that more than one structure is present. 287 if not m > 1: 288 raise RelaxError("Two or more structures are required.") 289 290 # The labels as spin ID strings. 291 labels = [] 292 for i in range(n): 293 labels.append(generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_name=atom_names[i])) 294 295 # Initialise the SD matrix and other structures. 296 matrix = zeros((n, n), float64) 297 dist = zeros(m, float64) 298 vectors = zeros((m, 3), float64) 299 parallax_vectors = zeros((m, 3), float64) 300 angles = zeros(m, float64) 301 302 # Generate the pairwise distance SD matrix. 303 if measure == 'distance': 304 for i in range(n): 305 for j in range(n): 306 # Only calculate the upper triangle to avoid duplicate calculations. 307 if j > i: 308 continue 309 310 # The interatomic distances between each structure. 311 for k in range(m): 312 dist[k] = norm(coord[k, i] - coord[k, j]) 313 314 # Calculate and store the corrected sample standard deviation. 315 matrix[i, j] = matrix[j, i] = std(dist, ddof=1) 316 317 # Generate the pairwise angle SD matrix. 318 elif measure == 'angle': 319 # Loop over the atom pairs. 320 for i in range(n): 321 for j in range(n): 322 # Only calculate the upper triangle to avoid duplicate calculations. 323 if j > i: 324 continue 325 326 # The interatomic vectors between each structure. 327 for k in range(m): 328 vectors[k] = coord[k, i] - coord[k, j] 329 330 # The average vector. 331 ave_vect = average(vectors, axis=0) 332 333 # The intervector angles. 334 for k in range(m): 335 angles[k] = vector_angle_atan2(ave_vect, vectors[k]) 336 337 # Calculate and store the corrected sample standard deviation. 338 matrix[i, j] = matrix[j, i] = std(angles, ddof=1) 339 340 # Generate the pairwise parallax shift SD matrix. 341 elif measure == 'parallax shift': 342 # Loop over the atom pairs. 343 for i in range(n): 344 for j in range(n): 345 # Only calculate the upper triangle to avoid duplicate calculations. 346 if j > i: 347 continue 348 349 # The interatomic vectors between each structure. 350 for k in range(m): 351 vectors[k] = coord[k, i] - coord[k, j] 352 353 # The average vector. 354 ave_vect = average(vectors, axis=0) 355 356 # Catch the zero vector. 357 length = norm(ave_vect) 358 if length == 0.0: 359 matrix[i, j] = matrix[j, i] = 0.0 360 continue 361 362 # The unit average vector. 363 unit = ave_vect / length 364 365 # The parallax shift. 366 for k in range(m): 367 # The projection onto the average vector. 368 proj = dot(vectors[k], unit) * unit 369 370 # The distance shift. 371 dist[k] = norm(vectors[k] - proj) 372 373 # Calculate and store the corrected sample standard deviation. 374 matrix[i, j] = matrix[j, i] = std(dist, ddof=1) 375 376 # Call the plotting API. 377 correlation_matrix(format=format, matrix=matrix, labels=labels, file=file, dir=dir, force=force)

381 """Calculate a mean structure. 382 383 @keyword pipes: The data pipes containing structures to average. 384 @type pipes: None or list of str 385 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used. 386 @type models: None or list of lists of int 387 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument. 388 @type molecules: None or list of lists of str 389 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 390 @type atom_id: str or None 391 @keyword set_mol_name: The molecule name for the averaged molecule. 392 @type set_mol_name: None or str 393 @keyword set_model_num: The model number for the averaged molecule. 394 @type set_model_num: None or int 395 """ 396 397 # Checks. 398 check_pipe() 399 400 # Assemble the structural coordinates. 401 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 402 403 # Calculate the mean structure. 404 struct = mean(coord, axis=0) 405 406 # Place the structural object into the relax data store if needed. 407 if not hasattr(cdp, 'structure'): 408 cdp.structure = Internal() 409 410 # Store the data. 411 cdp.structure.add_coordinates(coord=struct, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, set_mol_name=set_mol_name, set_model_num=set_model_num)

454 """Create the PDB representation of the diffusion tensor. 455 456 @keyword scale: The scaling factor for the diffusion tensor. 457 @type scale: float 458 @keyword file: The name of the PDB file to create. 459 @type file: str 460 @keyword dir: The name of the directory to place the PDB file into. 461 @type dir: str 462 @keyword force: Flag which if set to True will overwrite any pre-existing file. 463 @type force: bool 464 """ 465 466 # Test if the current data pipe exists. 467 check_pipe() 468 469 # Calculate the centre of mass. 470 if hasattr(cdp, 'structure') and not cdp.structure.empty(): 471 com = pipe_centre_of_mass() 472 else: 473 com = zeros(3, float64) 474 475 # Create the structural object. 476 structure = Internal() 477 478 # Create an array of data pipes to loop over (hybrid support). 479 if cdp.pipe_type == 'hybrid': 480 pipe_list = cdp.hybrid_pipes 481 else: 482 pipe_list = [pipes.cdp_name()] 483 484 # The molecule names. 485 if cdp.pipe_type == 'hybrid': 486 mol_names = [] 487 for pipe in pipe_list: 488 mol_names.append('diff_tensor_' % pipe) 489 else: 490 mol_names = ['diff_tensor'] 491 492 # Loop over the pipes. 493 for pipe_index in range(len(pipe_list)): 494 # Get the pipe container. 495 pipe = pipes.get_pipe(pipe_list[pipe_index]) 496 497 # Test if the diffusion tensor data is loaded. 498 if not hasattr(pipe, 'diff_tensor'): 499 raise RelaxNoTensorError('diffusion') 500 501 # Add a new structure. 502 structure.add_molecule(name=mol_names[pipe_index]) 503 504 # Alias the single molecule from the single model. 505 mol = structure.get_molecule(mol_names[pipe_index]) 506 507 # The diffusion tensor type. 508 diff_type = pipe.diff_tensor.type 509 if diff_type == 'spheroid': 510 diff_type = pipe.diff_tensor.spheroid_type 511 512 # Simulation info. 513 sim_num = None 514 if hasattr(pipe.diff_tensor, 'tm_sim'): 515 # The number. 516 sim_num = len(pipe.diff_tensor.tm_sim) 517 518 # Tensor axes. 519 axes = [] 520 sim_axes = [] 521 if diff_type in ['oblate', 'prolate']: 522 axes.append(pipe.diff_tensor.Dpar * pipe.diff_tensor.Dpar_unit) 523 sim_axes.append([]) 524 if sim_num != None: 525 for i in range(sim_num): 526 sim_axes[0].append(pipe.diff_tensor.Dpar_sim[i] * pipe.diff_tensor.Dpar_unit_sim[i]) 527 528 if diff_type == 'ellipsoid': 529 axes.append(pipe.diff_tensor.Dx * pipe.diff_tensor.Dx_unit) 530 axes.append(pipe.diff_tensor.Dy * pipe.diff_tensor.Dy_unit) 531 axes.append(pipe.diff_tensor.Dz * pipe.diff_tensor.Dz_unit) 532 sim_axes.append([]) 533 sim_axes.append([]) 534 sim_axes.append([]) 535 if sim_num != None: 536 for i in range(sim_num): 537 sim_axes[0].append(pipe.diff_tensor.Dx_sim[i] * pipe.diff_tensor.Dx_unit_sim[i]) 538 sim_axes[1].append(pipe.diff_tensor.Dy_sim[i] * pipe.diff_tensor.Dy_unit_sim[i]) 539 sim_axes[2].append(pipe.diff_tensor.Dz_sim[i] * pipe.diff_tensor.Dz_unit_sim[i]) 540 541 # Create the object. 542 diffusion_tensor(mol=mol, tensor=pipe.diff_tensor.tensor, tensor_diag=pipe.diff_tensor.tensor_diag, diff_type=diff_type, rotation=pipe.diff_tensor.rotation, axes=axes, sim_axes=sim_axes, com=com, scale=scale) 543 544 545 # Create the PDB file. 546 ###################### 547 548 # Print out. 549 print("\nGenerating the PDB file.") 550 551 # Create the PDB file. 552 tensor_pdb_file = open_write_file(file, dir, force=force) 553 structure.write_pdb(tensor_pdb_file) 554 tensor_pdb_file.close() 555 556 # Add the file to the results file list. 557 if not hasattr(cdp, 'result_files'): 558 cdp.result_files = [] 559 if dir == None: 560 dir = getcwd() 561 cdp.result_files.append(['diff_tensor_pdb', 'Diffusion tensor PDB', get_file_path(file, dir)]) 562 status.observers.result_file.notify()

566 """Delete structural data. 567 568 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted. 569 @type atom_id: str or None 570 @keyword model: Individual structural models from a loaded ensemble can be deleted by specifying the model number. 571 @type model: None or int 572 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 573 @type verbosity: int 574 @keyword spin_info: A flag which if True will cause all structural information in the spin containers and interatomic data containers to be deleted as well. If False, then only the 3D structural data will be deleted. 575 @type spin_info: bool 576 """ 577 578 # Test if the current data pipe exists. 579 check_pipe() 580 581 # Run the object method. 582 if hasattr(cdp, 'structure'): 583 if verbosity: 584 print("Deleting structural data from the current pipe.") 585 selection = None 586 if atom_id != None: 587 selection = cdp.structure.selection(atom_id=atom_id) 588 cdp.structure.delete(model=model, selection=selection, verbosity=verbosity) 589 elif verbosity: 590 print("No structures are present.") 591 592 # Skip the rest. 593 if not spin_info: 594 return 595 596 # Then remove any spin specific structural info. 597 if verbosity: 598 print("Deleting all spin specific structural info.") 599 for spin in spin_loop(selection=atom_id): 600 # Delete positional information. 601 if hasattr(spin, 'pos'): 602 del spin.pos 603 604 # Then remove any interatomic vector structural info. 605 if verbosity: 606 print("Deleting all interatomic vectors.") 607 for interatom in interatomic_loop(selection1=atom_id): 608 # Delete bond vectors. 609 if hasattr(interatom, 'vector'): 610 del interatom.vector

634 """Calculate the rotational and translational displacement between structures or models. 635 636 All results will be placed into the current data pipe cdp.structure.displacements data structure. 637 638 639 @keyword pipes: The data pipes to determine the displacements for. 640 @type pipes: None or list of str 641 @keyword models: The list of models to determine the displacements for. The number of elements must match the pipes argument. If set to None, then all models will be used. 642 @type models: None or list of lists of int 643 @keyword molecules: The list of molecules to determine the displacements for. The number of elements must match the pipes argument. 644 @type molecules: None or list of lists of str 645 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. 646 @type atom_id: str or None 647 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems. 648 @type centroid: list of float or numpy rank-1, 3D array 649 """ 650 651 # Test if the current data pipe exists. 652 check_pipe() 653 654 # Assemble the structural coordinates. 655 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 656 657 # Initialise the data structure. 658 if not hasattr(cdp.structure, 'displacments'): 659 cdp.structure.displacements = Displacements() 660 661 # Double loop over all structures, sending the data to the base container for the calculations. 662 for i in range(len(ids)): 663 for j in range(len(ids)): 664 cdp.structure.displacements._calculate(id_from=ids[i], id_to=ids[j], coord_from=coord[i], coord_to=coord[j], centroid=centroid)

668 """Find the pivoted motion of a set of structural models or structures. 669 670 The pivot will be placed into the current data pipe cdp.structure.pivot data structure. 671 672 673 @keyword pipes: The data pipes to use in the motional pivot algorithm. 674 @type pipes: None or list of str 675 @keyword models: The list of models to use. The number of elements must match the pipes argument. If set to None, then all models will be used. 676 @type models: None or list of lists of int 677 @keyword molecules: The list of molecules to find the pivoted motion for. The number of elements must match the pipes argument. 678 @type molecules: None or list of lists of str 679 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 680 @type atom_id: str or None 681 @keyword init_pos: The starting pivot position for the pivot point optimisation. 682 @type init_pos: list of float or numpy rank-1, 3D array 683 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check. 684 @type func_tol: None or float 685 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x. 686 @type box_limit: int 687 """ 688 689 # Test if the current data pipe exists. 690 check_pipe() 691 692 # Initialised the starting position if needed. 693 if init_pos == None: 694 init_pos = zeros(3, float64) 695 init_pos = array(init_pos) 696 697 # Assemble the structural coordinates. 698 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 699 700 # Linear constraints for the pivot position (between -1000 and 1000 Angstrom). 701 A = zeros((6, 3), float64) 702 b = zeros(6, float64) 703 for i in range(3): 704 A[2*i, i] = 1 705 A[2*i+1, i] = -1 706 b[2*i] = -box_limit 707 b[2*i+1] = -box_limit 708 709 # The target function. 710 finder = Pivot_finder(list(range(len(coord))), coord) 711 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1) 712 713 # No result. 714 if results is None: 715 return 716 717 # Store the data. 718 cdp.structure.pivot = results 719 720 # Print out. 721 print("Motional pivot found at: %s" % results)

725 """Load the spins from the structural object into the relax data store. 726 727 @keyword spin_id: The molecule, residue, and spin identifier string. 728 @type spin_id: str 729 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 730 @type str_id: int or str 731 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 732 @type ave_pos: bool 733 """ 734 735 # Checks. 736 check_pipe() 737 check_structure() 738 739 # The selection object. 740 selection = cdp.structure.selection(atom_id=spin_id) 741 742 # Loop over all atoms of the spin_id selection. 743 data = [] 744 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 745 # Remove the '+' regular expression character from the mol, res, and spin names! 746 if mol_name and search('\+', mol_name): 747 mol_name = mol_name.replace('+', '') 748 if res_name and search('\+', res_name): 749 res_name = res_name.replace('+', '') 750 if atom_name and search('\+', atom_name): 751 atom_name = atom_name.replace('+', '') 752 753 # The spin identification string. 754 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name) 755 756 # Get the spin container. 757 spin_cont = return_spin(id) 758 759 # Skip the spin if it doesn't exist. 760 if spin_cont == None: 761 continue 762 763 # Add the position vector to the spin container. 764 spin_cont.pos = pos 765 766 # Store the data for a printout at the end. 767 data.append([id, repr(pos)]) 768 769 # No positions found. 770 if not len(data): 771 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id) 772 773 # Update pseudo-atoms. 774 for spin in spin_loop(): 775 if hasattr(spin, 'members'): 776 # Get the spin positions. 777 positions = [] 778 for atom in spin.members: 779 # Get the spin container. 780 subspin = return_spin(atom) 781 782 # Test that the spin exists. 783 if subspin == None: 784 raise RelaxNoSpinError(atom) 785 786 # Test the position. 787 if not hasattr(subspin, 'pos') or subspin.pos is None or not len(subspin.pos): 788 raise RelaxError("Positional information is not available for the atom '%s'." % atom) 789 790 # Alias the position. 791 pos = subspin.pos 792 793 # Convert to a list of lists if not already. 794 multi_model = True 795 if type(pos[0]) in [float, float64]: 796 multi_model = False 797 pos = [pos] 798 799 # Store the position. 800 positions.append([]) 801 for i in range(len(pos)): 802 positions[-1].append(pos[i].tolist()) 803 804 # The averaging. 805 if spin.averaging == 'linear': 806 # Average pos. 807 ave = linear_ave(positions) 808 809 # Convert to the correct structure. 810 if multi_model: 811 spin.pos = ave 812 else: 813 spin.pos = ave[0] 814 815 # Print out. 816 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)

820 """Load the spins from the structural object into the relax data store. 821 822 @keyword spin_id: The molecule, residue, and spin identifier string. 823 @type spin_id: str 824 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 825 @type str_id: int or str 826 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from. 827 @type from_mols: list of str or None 828 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures 829 @type mol_name_target: str or None 830 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 831 @type ave_pos: bool 832 @keyword spin_num: A flag specifying if the spin number should be loaded. 833 @type spin_num: bool 834 """ 835 836 # The multi-molecule case. 837 if from_mols != None: 838 load_spins_multi_mol(spin_id=spin_id, str_id=str_id, from_mols=from_mols, mol_name_target=mol_name_target, ave_pos=ave_pos, spin_num=spin_num) 839 return 840 841 # Checks. 842 check_pipe() 843 check_structure() 844 845 # Print out. 846 print("Adding the following spins to the relax data store.\n") 847 848 # Init the data for printing out. 849 mol_names = [] 850 res_nums = [] 851 res_names = [] 852 spin_nums = [] 853 spin_names = [] 854 855 # Loop over all atoms of the spin_id selection. 856 selection = cdp.structure.selection(atom_id=spin_id) 857 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 858 # Override the molecule name. 859 if mol_name_target: 860 mol_name = mol_name_target 861 862 # No spin number. 863 if not spin_num: 864 atom_num = None 865 866 # Remove the '+' regular expression character from the mol, res, and spin names! 867 if mol_name and search('\+', mol_name): 868 mol_name = mol_name.replace('+', '') 869 if res_name and search('\+', res_name): 870 res_name = res_name.replace('+', '') 871 if atom_name and search('\+', atom_name): 872 atom_name = atom_name.replace('+', '') 873 874 # Generate a spin ID for the current atom. 875 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 876 877 # Create the spin. 878 try: 879 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name) 880 881 # Otherwise, get the spin container. 882 except RelaxError: 883 spin_cont = return_spin(id) 884 885 # Append all the spin ID info for printing later. 886 if mol_name_target: 887 mol_names.append(mol_name_target) 888 else: 889 mol_names.append(mol_name) 890 res_nums.append(res_num) 891 res_names.append(res_name) 892 spin_nums.append(atom_num) 893 spin_names.append(atom_name) 894 895 # Position vector. 896 if hasattr(spin_cont, 'pos') and spin_cont.pos != None and (spin_cont.pos.shape != pos.shape or (spin_cont.pos != pos).any()): 897 warn(RelaxWarning("Positional information already exists for the spin %s, appending the new positions." % id)) 898 spin_cont.pos = concatenate((spin_cont.pos, pos)) 899 else: 900 spin_cont.pos = pos 901 902 # Add the element. 903 spin_cont.element = element 904 905 # Catch no data. 906 if len(mol_names) == 0: 907 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id)) 908 return 909 910 # Print out. 911 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names) 912 913 # Set the number of states for use in the specific analyses. 914 cdp.N = cdp.structure.num_models()

918 """Load the spins from the structural object into the relax data store. 919 920 @keyword spin_id: The molecule, residue, and spin identifier string. 921 @type spin_id: str 922 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number. 923 @type str_id: int or str 924 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from. 925 @type from_mols: list of str or None 926 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures 927 @type mol_name_target: str or None 928 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer. 929 @type ave_pos: bool 930 @keyword spin_num: A flag specifying if the spin number should be loaded. 931 @type spin_num: bool 932 """ 933 934 # Checks. 935 check_pipe() 936 check_structure() 937 938 # The target molecule name must be supplied. 939 if mol_name_target == None: 940 raise RelaxError("The target molecule name must be supplied when specifying multiple molecules to load spins from.") 941 942 # Disallow multiple structural models. 943 if cdp.structure.num_models() != 1: 944 raise RelaxError("Only a single structural model is allowed when specifying multiple molecules to load spins from.") 945 946 # Split up the selection string. 947 if spin_id: 948 mol_token, res_token, spin_token = tokenise(spin_id) 949 if mol_token != None: 950 raise RelaxError("The spin ID string cannot contain molecular information when specifying multiple molecules to load spins from.") 951 952 # Print out. 953 print("Adding the following spins to the relax data store.\n") 954 955 # Initialise the data structures. 956 ids = [] 957 res_nums = {} 958 res_names = {} 959 spin_names = {} 960 positions = {} 961 elements = {} 962 963 # Loop over all target molecules. 964 for mol_name in from_mols: 965 # Add the molecule name as a key for the positions structure, and initialise as a dictionary for the spin IDs. 966 positions[mol_name] = {} 967 968 # Create a new spin ID with the molecule name. 969 new_id = '#' + mol_name 970 if spin_id != None: 971 new_id += spin_id 972 973 # Loop over all atoms of the new spin ID selection. 974 selection = cdp.structure.selection(atom_id=new_id) 975 for res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos): 976 # Remove the '+' regular expression character from the res and atom names. 977 if res_name and search('\+', res_name): 978 res_name = res_name.replace('+', '') 979 if atom_name and search('\+', atom_name): 980 atom_name = atom_name.replace('+', '') 981 982 # No spin number. 983 if not spin_num: 984 atom_num = None 985 986 # Generate a spin ID for the current atom. 987 id = generate_spin_id_unique(mol_name=mol_name_target, res_num=res_num, res_name=res_name, spin_name=atom_name) 988 989 # Store the position info in all cases, collapsing list of lists into single lists when needed. 990 if is_float(pos[0]): 991 positions[mol_name][id] = pos 992 else: 993 positions[mol_name][id] = pos[0] 994 995 # Not a new ID. 996 if id in ids: 997 continue 998 999 # Store the ID, residue, spin, element and position info. 1000 ids.append(id) 1001 res_nums[id] = res_num 1002 res_names[id] = res_name 1003 spin_names[id] = atom_name 1004 elements[id] = element 1005 1006 # Catch no data. 1007 if len(ids) == 0: 1008 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id)) 1009 return 1010 1011 # Create the spin containers. 1012 mol_names2 = [] 1013 res_nums2 = [] 1014 res_names2 = [] 1015 spin_names2 = [] 1016 for id in ids: 1017 # Fetch the spin. 1018 spin_cont = return_spin(id) 1019 1020 # Create the spin if it does not exist. 1021 if spin_cont == None: 1022 spin_cont = create_spin(mol_name=mol_name_target, res_num=res_nums[id], res_name=res_names[id], spin_name=spin_names[id]) 1023 1024 # Position vector. 1025 spin_cont.pos = [] 1026 for mol_name in from_mols: 1027 if id in positions[mol_name]: 1028 spin_cont.pos.append(positions[mol_name][id]) 1029 else: 1030 spin_cont.pos.append(None) 1031 1032 # Add the element. 1033 spin_cont.element = elements[id] 1034 1035 # Update the structures for the printout. 1036 mol_names2.append(mol_name_target) 1037 res_nums2.append(res_nums[id]) 1038 res_names2.append(res_names[id]) 1039 spin_names2.append(spin_names[id]) 1040 1041 # Print out. 1042 write_spin_data(file=sys.stdout, mol_names=mol_names2, res_nums=res_nums2, res_names=res_names2, spin_names=spin_names2) 1043 1044 # Set the number of states for use in the specific analyses. 1045 cdp.N = len(from_mols)

1049 """PC analysis of the motions between all the loaded models. 1050 1051 @keyword pipes: The data pipes to perform the PC analysis on. 1052 @type pipes: None or list of str 1053 @keyword models: The list of models to perform the PC analysis on. The number of elements must match the pipes argument. If set to None, then all models will be used. 1054 @type models: None or list of lists of int 1055 @keyword molecules: The list of molecules to perform the PC analysis on. The number of elements must match the pipes argument. 1056 @type molecules: None or list of lists of str 1057 @keyword obs_pipes: The data pipes in the PC analysis which will have zero weight. These structures are for comparison. 1058 @type obs_pipes: None or list of str 1059 @keyword obs_models: The list of models in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. If set to None, then all models will be used. 1060 @type obs_models: None or list of lists of int 1061 @keyword obs_molecules: The list of molecules in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. 1062 @type obs_molecules: None or list of lists of str 1063 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. 1064 @type atom_id: str or None 1065 @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd'). 1066 @type algorithm: str 1067 @keyword num_modes: The number of PCA modes to calculate. 1068 @type num_modes: int 1069 @keyword format: The graph format to use. 1070 @type format: str 1071 @keyword dir: The optional directory to place the graphs into. 1072 @type dir: str 1073 """ 1074 1075 # Checks. 1076 check_pipe() 1077 1078 # Assemble the structural coordinates. 1079 coord, ids, object_id_list, model_list, molecule_list, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id, lists=True) 1080 1081 # Determine the structure weights. 1082 M = len(coord) 1083 if obs_pipes == None: 1084 obs_pipes = [] 1085 weights = ones(M, float64) 1086 for struct in range(M): 1087 # Is the structure in the 'observing' lists? 1088 for i in range(len(obs_pipes)): 1089 # Matching data pipe. 1090 if object_id_list[struct] == obs_pipes[i]: 1091 # Matching molecules. 1092 if obs_molecules == None or molecule_list[struct] == obs_molecules[i]: 1093 # Matching models. 1094 if obs_models == None or model_list[struct] == obs_models[i]: 1095 weights[struct] = 0.0 1096 1097 # Perform the PC analysis. 1098 print("\n\nStarting the PCA analysis.\n") 1099 values, vectors, proj = pca_analysis(coord=coord, weights=weights, algorithm=algorithm, num_modes=num_modes) 1100 1101 # Store the values. 1102 cdp.structure.pca_values = values 1103 cdp.structure.pca_vectors = vectors 1104 cdp.structure.pca_proj = proj 1105 1106 # Generate the graphs. 1107 for mode in range(num_modes - 1): 1108 # Assemble the data. 1109 data = [[[]]] 1110 current = None 1111 labels = [] 1112 for struct in range(M): 1113 # Create a unique ID for pipe and molecule name. 1114 id = "%s - %s" % (object_id_list[struct], molecule_list[struct]) 1115 if current == None: 1116 current = id 1117 labels.append(current) 1118 1119 # Start a new set. 1120 if current != id: 1121 data[-1].append([]) 1122 current = id 1123 labels.append(current) 1124 1125 # Add the projection. 1126 data[-1][-1].append([proj[mode, struct], proj[mode+1, struct]]) 1127 1128 # The number of graph sets. 1129 sets = len(labels) 1130 1131 # Open the file for writing. 1132 file = open_write_file("graph_pc%s_pc%s.agr" % (mode+1, mode+2), dir=dir, force=True) 1133 1134 # The header. 1135 write_xy_header(format=format, file=file, title="Principle component projections", sets=[sets], set_names=[labels], axis_labels=[['PC mode %i (\cE\C)' % (mode+1), 'PC mode %i (\cE\C)' % (mode+2)]], linestyle=[[0]*sets]) 1136 1137 # The data. 1138 write_xy_data(format=format, data=data, file=file, graph_type='xy') 1139 1140 # Close the file. 1141 file.close()

1145 """Read structures from a Gaussian log file. 1146 1147 1148 @keyword file: The name of the Gaussian log file to read. 1149 @type file: str 1150 @keyword dir: The directory where the Gaussian log file is located. If set to None, then the file will be searched for in the current directory. 1151 @type dir: str or None 1152 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1153 @type set_mol_name: None, str, or list of str 1154 @keyword set_model_num: Set the model number of the loaded molecule. 1155 @type set_model_num: None, int, or list of int 1156 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the Gaussian log file does not exist. If False, then a RelaxWarning will be trown instead. 1157 @type fail: bool 1158 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 1159 @type verbosity: int 1160 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the Gaussian log file does not exist. 1161 """ 1162 1163 # Test if the current data pipe exists. 1164 check_pipe() 1165 1166 # The file path. 1167 file_path = get_file_path(file, dir) 1168 1169 # Try adding '.log' to the end of the file path, if the file can't be found. 1170 if not access(file_path, F_OK): 1171 file_path_orig = file_path 1172 file_path = file_path + '.log' 1173 1174 # Test if the file exists. 1175 if not access(file_path, F_OK): 1176 if fail: 1177 raise RelaxFileError('Gaussian log', file_path_orig) 1178 else: 1179 warn(RelaxNoPDBFileWarning(file_path)) 1180 return 1181 1182 # Place the structural object into the relax data store. 1183 if not hasattr(cdp, 'structure'): 1184 cdp.structure = Internal() 1185 1186 # Load the structures. 1187 cdp.structure.load_gaussian(file_path, set_mol_name=set_mol_name, set_model_num=set_model_num, verbosity=verbosity)

1191 """The PDB loading function. 1192 1193 @keyword file: The name of the PDB file to read. 1194 @type file: str 1195 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory. 1196 @type dir: str or None 1197 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are numbered consecutively from 1. If set to None, then all molecules will be loaded. 1198 @type read_mol: None, int, or list of int 1199 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information. 1200 @type set_mol_name: None, str, or list of str 1201 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded. 1202 @type read_model: None, int, or list of int 1203 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist. 1204 @type set_model_num: None, int, or list of int 1205 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use. 1206 @type alt_loc: str or None 1207 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1. 1208 @type verbosity: int 1209 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures. 1210 @type merge: bool 1211 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead. 1212 @type fail: bool 1213 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist. 1214 """ 1215 1216 # Test if the current data pipe exists. 1217 check_pipe() 1218 1219 # The file path. 1220 file_path = get_file_path(file, dir) 1221 1222 # Try adding file extensions to the end of the file path, if the file can't be found. 1223 file_path_orig = file_path 1224 if not access(file_path, F_OK): 1225 # List of possible extensions. 1226 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']: 1227 # Add the extension if the file can be found. 1228 if access(file_path+ext, F_OK): 1229 file_path = file_path + ext 1230 1231 # Test if the file exists. 1232 if not access(file_path, F_OK): 1233 if fail: 1234 raise RelaxFileError('PDB', file_path_orig) 1235 else: 1236 warn(RelaxNoPDBFileWarning(file_path)) 1237 return 1238 1239 # Place the internal structural object into the relax data store. 1240 if not hasattr(cdp, 'structure'): 1241 cdp.structure = Internal() 1242 1243 # Load the structures. 1244 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge) 1245 1246 # Load into Molmol (if running). 1247 molmol.molmol_obj.open_pdb()

1251 """The XYZ loading function. 1252 1253 1254 @keyword file: The name of the XYZ file to read. 1255 @type file: str 1256 @keyword dir: The directory where the XYZ file is located. If set to None, then the 1257 file will be searched for in the current directory. 1258 @type dir: str or None 1259 @keyword read_mol: The molecule(s) to read from the file, independent of model. 1260 If set to None, then all molecules will be loaded. 1261 @type read_mol: None, int, or list of int 1262 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then 1263 the molecules will be automatically labelled based on the file name or 1264 other information. 1265 @type set_mol_name: None, str, or list of str 1266 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models 1267 will be loaded. 1268 @type read_model: None, int, or list of int 1269 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the 1270 XYZ model numbers will be preserved, if they exist. 1271 @type set_model_num: None, int, or list of int 1272 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ 1273 file does not exist. If False, then a RelaxWarning will be trown 1274 instead. 1275 @type fail: bool 1276 @keyword verbosity: The amount of information to print to screen. Zero corresponds to 1277 minimal output while higher values increase the amount of output. The 1278 default value is 1. 1279 @type verbosity: int 1280 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file 1281 does not exist. 1282 """ 1283 1284 # Test if the current data pipe exists. 1285 check_pipe() 1286 1287 # The file path. 1288 file_path = get_file_path(file, dir) 1289 1290 # Try adding '.xyz' to the end of the file path, if the file can't be found. 1291 if not access(file_path, F_OK): 1292 file_path_orig = file_path 1293 file_path = file_path + '.xyz' 1294 1295 # Test if the file exists. 1296 if not access(file_path, F_OK): 1297 if fail: 1298 raise RelaxFileError('XYZ', file_path_orig) 1299 else: 1300 warn(RelaxNoPDBFileWarning(file_path)) 1301 return 1302 1303 # Place the structural object into the relax data store. 1304 if not hasattr(cdp, 'structure'): 1305 cdp.structure = Internal() 1306 1307 # Load the structures. 1308 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)

1312 """Calculate the RMSD between the loaded models. 1313 1314 The RMSD value will be placed into the current data pipe cdp.structure.rmsd data structure. 1315 1316 1317 @keyword pipes: The data pipes to determine the RMSD for. 1318 @type pipes: None or list of str 1319 @keyword models: The list of models to determine the RMSD for. The number of elements must match the pipes argument. If set to None, then all models will be used. 1320 @type models: None or list of lists of int 1321 @keyword molecules: The list of molecules to determine the RMSD for. The number of elements must match the pipes argument. 1322 @type molecules: None or list of lists of str 1323 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. 1324 @type atom_id: str or None 1325 @keyword atomic: A flag which if True will allow for per-atom RMSDs to be additionally calculated. 1326 @type atomic: bool 1327 @return: The RMSD value. 1328 @rtype: float 1329 """ 1330 1331 # Test if the current data pipe exists. 1332 check_pipe() 1333 1334 # Assemble the structural coordinates. 1335 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 1336 1337 # Per-atom RMSDs. 1338 if atomic: 1339 # Printout. 1340 print("\nCalculating the atomic-level RMSDs.") 1341 1342 # Calculate the per-atom RMSDs. 1343 rmsd = per_atom_rmsd(coord, verbosity=0) 1344 1345 # Loop over the atoms. 1346 for i in range(len(res_nums)): 1347 # The spin identification string. 1348 id = generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_num=i, spin_name=atom_names[i]) 1349 1350 # Get the spin container. 1351 spin_cont = return_spin(id) 1352 1353 # Skip the spin if it doesn't exist. 1354 if spin_cont == None: 1355 continue 1356 1357 # Store the value. 1358 spin_cont.pos_rmsd = rmsd[i] 1359 1360 # Calculate the RMSD. 1361 print("\nCalculating the global RMSD.") 1362 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1) 1363 1364 # Return the global RMSD. 1365 return cdp.structure.rmsd

1369 """Rotate the structural data about the origin by the specified forwards rotation. 1370 1371 @keyword R: The forwards rotation matrix. 1372 @type R: numpy 3D, rank-2 array or a 3x3 list of floats 1373 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0]. 1374 @type origin: numpy 3D, rank-1 array or list of len 3 or None 1375 @keyword model: The model to rotate. If None, all models will be rotated. 1376 @type model: int 1377 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 1378 @type atom_id: str or None 1379 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe. 1380 @type pipe_name: None or str 1381 """ 1382 1383 # Defaults. 1384 if pipe_name == None: 1385 pipe_name = cdp_name() 1386 1387 # Checks. 1388 check_pipe(pipe_name) 1389 check_structure(pipe_name) 1390 1391 # Get the data pipe. 1392 dp = get_pipe(pipe_name) 1393 1394 # Set the origin if not supplied. 1395 if origin is None: 1396 origin = [0, 0, 0] 1397 1398 # Convert the args to numpy float data structures. 1399 R = array(R, float64) 1400 origin = array(origin, float64) 1401 1402 # Call the specific code. 1403 selection = dp.structure.selection(atom_id=atom_id) 1404 dp.structure.rotate(R=R, origin=origin, model=model, selection=selection) 1405 1406 # Final printout. 1407 if model != None: 1408 print("Rotated %i atoms of model %i." % (selection.count_atoms(), model)) 1409 else: 1410 print("Rotated %i atoms." % selection.count_atoms())

1414 """Superimpose a set of related, but not identical structures. 1415 1416 @keyword pipes: The data pipes to include in the alignment and superimposition. 1417 @type pipes: None or list of str 1418 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used. 1419 @type models: list of lists of int or None 1420 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument. 1421 @type molecules: None or list of str 1422 @keyword msa_algorithm: The multiple sequence alignment (MSA) algorithm to use. 1423 @type msa_algorithm: str 1424 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm to use. 1425 @type pairwise_algorithm: str 1426 @keyword matrix: The substitution matrix to use. 1427 @type matrix: str 1428 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number. 1429 @type gap_open_penalty: float 1430 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number. 1431 @type gap_extend_penalty: float 1432 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence. 1433 @type end_gap_open_penalty: float 1434 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence. 1435 @type end_gap_extend_penalty: float 1436 """ 1437 1438 # Assemble the structural objects. 1439 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules) 1440 1441 # Assemble the atomic coordinates of all molecules. 1442 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models) 1443 1444 # Convert the residue number data structure. 1445 res_num_list = [] 1446 for mol_index in range(num_mols): 1447 res_num_list.append([]) 1448 for i in range(len(one_letter_codes[mol_index])): 1449 key = list(res_nums[mol_index][i].keys())[0] 1450 res_num_list[mol_index].append(res_nums[mol_index][i][key]) 1451 1452 # MSA. 1453 strings, gaps = msa_general(one_letter_codes, residue_numbers=res_num_list, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty) 1454 1455 # Set up the data store object. 1456 if not hasattr(ds, 'sequence_alignments'): 1457 ds.sequence_alignments = Sequence_alignments() 1458 1459 # Set some unused arguments to None for storage. 1460 if msa_algorithm == 'residue number': 1461 pairwise_algorithm = None 1462 matrix = None 1463 gap_open_penalty = None 1464 gap_extend_penalty = None 1465 end_gap_open_penalty = None 1466 end_gap_extend_penalty = None 1467 1468 # Store the alignment. 1469 ds.sequence_alignments.add(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)

1486 """Generator function for looping over all internal structural objects, models and molecules. 1487 1488 @keyword pipes: The data pipes to loop over. 1489 @type pipes: None or list of str 1490 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used. 1491 @type models: None or list of lists of int 1492 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument. 1493 @type molecules: None or list of lists of str 1494 @keyword atom_id: The molecule, residue, and atom identifier string of the coordinates of interest. This matches the spin ID string format. 1495 @type atom_id: None or str 1496 @return: The data pipe index, model number, and molecule name. 1497 @rtype: int, int or None, str 1498 """ 1499 1500 # The data pipes to use. 1501 if pipes == None: 1502 pipes = [cdp_name()] 1503 num_pipes = len(pipes) 1504 1505 # Checks. 1506 for pipe in pipes: 1507 check_pipe(pipe) 1508 1509 # Check the models and molecules arguments. 1510 if models != None: 1511 if len(models) != num_pipes: 1512 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes)) 1513 if molecules != None: 1514 if len(molecules) != num_pipes: 1515 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes)) 1516 1517 # Assemble the structural objects. 1518 objects = [] 1519 for pipe_index in range(len(pipes)): 1520 dp = get_pipe(pipes[pipe_index]) 1521 objects.append(dp.structure) 1522 1523 # Call the library method to do all of the work. 1524 for pipe_index, model_num, mol_name in loop_coord_structures(objects=objects, models=models, molecules=molecules, atom_id=atom_id): 1525 yield pipe_index, model_num, mol_name

1529 """Superimpose a set of structures. 1530 1531 @keyword pipes: The data pipes to include in the alignment and superimposition. 1532 @type pipes: None or list of str 1533 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used. 1534 @type models: list of lists of int or None 1535 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument. 1536 @type molecules: None or list of str 1537 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. 1538 @type atom_id: str or None 1539 @keyword displace_id: The atom ID string for restricting the displacement to a subset of all atoms. If not set, then all atoms will be translated and rotated. This can be a list of atom IDs with each element corresponding to one of the structures. 1540 @type displace_id: None, str, or list of str 1541 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'. 1542 @type method: str 1543 @keyword centre_type: The type of centre to superimpose over. This can either be the standard centroid superimposition or the CoM could be used instead. 1544 @type centre_type: str 1545 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions. 1546 @type centroid: list of float or numpy rank-1, 3D array 1547 """ 1548 1549 # Check the method. 1550 allowed = ['fit to mean', 'fit to first'] 1551 if method not in allowed: 1552 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed)) 1553 1554 # Check the type. 1555 allowed = ['centroid', 'CoM'] 1556 if centre_type not in allowed: 1557 raise RelaxError("The superimposition centre type '%s' is unknown. It must be one of %s." % (centre_type, allowed)) 1558 1559 # The data pipes to use. 1560 if pipes == None: 1561 pipes = [cdp_name()] 1562 1563 # Assemble the structural coordinates. 1564 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 1565 1566 # Catch missing data. 1567 if len(coord[0]) == 0: 1568 raise RelaxError("No common atoms could be found between the structures.") 1569 1570 # The different algorithms. 1571 if method == 'fit to mean': 1572 T, R, pivot = fit_to_mean(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid) 1573 elif method == 'fit to first': 1574 T, R, pivot = fit_to_first(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid) 1575 1576 # Loop over all pipes, models, and molecules. 1577 i = 0 1578 for pipe_index, model_num, mol_name in structure_loop(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id): 1579 # The current displacement ID. 1580 curr_displace_id = None 1581 if isinstance(displace_id, str): 1582 curr_displace_id = displace_id 1583 elif isinstance(displace_id, list): 1584 if len(displace_id) <= i: 1585 raise RelaxError("Not enough displacement ID strings have been provided.") 1586 curr_displace_id = displace_id[i] 1587 1588 # Add the molecule name to the displacement ID if required. 1589 id = curr_displace_id 1590 if id == None or (mol_name and not search('#', id)): 1591 if curr_displace_id == None: 1592 id = '#%s' % mol_name 1593 elif search('#', curr_displace_id): 1594 id = curr_displace_id 1595 else: 1596 id = '#%s%s' % (mol_name, curr_displace_id) 1597 1598 # Translate the molecule first (the rotational pivot is defined in the first model). 1599 translate(T=T[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id) 1600 1601 # Rotate the molecule. 1602 rotate(R=R[i], origin=pivot[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id) 1603 1604 # Increment the index. 1605 i += 1

1609 """Shift the structural data by the specified translation vector. 1610 1611 @keyword T: The translation vector. 1612 @type T: numpy rank-1, 3D array or list of float 1613 @keyword model: The model to translate. If None, all models will be rotated. 1614 @type model: int or None 1615 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used. 1616 @type atom_id: str or None 1617 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe. 1618 @type pipe_name: None or str 1619 """ 1620 1621 # Defaults. 1622 if pipe_name == None: 1623 pipe_name = cdp_name() 1624 1625 # Checks. 1626 check_pipe(pipe_name) 1627 check_structure(pipe_name) 1628 1629 # Get the data pipe. 1630 dp = get_pipe(pipe_name) 1631 1632 # Convert the args to numpy float data structures. 1633 T = array(T, float64) 1634 1635 # Call the specific code. 1636 selection = dp.structure.selection(atom_id=atom_id) 1637 dp.structure.translate(T=T, model=model, selection=selection) 1638 1639 # Final printout. 1640 if model != None: 1641 print("Translated %i atoms of model %i." % (selection.count_atoms(), model)) 1642 else: 1643 print("Translated %i atoms." % selection.count_atoms())

1647 """Extract the bond vectors from the loaded structures and store them in the spin container. 1648 1649 @keyword spin_id1: The spin identifier string of the first spin of the pair. 1650 @type spin_id1: str 1651 @keyword spin_id2: The spin identifier string of the second spin of the pair. 1652 @type spin_id2: str 1653 @keyword model: The model to extract the vector from. If None, all vectors will be extracted. 1654 @type model: str 1655 @keyword verbosity: The higher the value, the more information is printed to screen. 1656 @type verbosity: int 1657 @keyword ave: A flag which if True will cause the average of all vectors to be extracted. 1658 @type ave: bool 1659 @keyword unit: A flag which if True will cause the function to calculate the unit vectors. 1660 @type unit: bool 1661 """ 1662 1663 # Checks. 1664 check_pipe() 1665 check_structure() 1666 check_mol_res_spin_data() 1667 1668 # Print out. 1669 if verbosity: 1670 # Number of models. 1671 num_models = cdp.structure.num_models() 1672 1673 # Multiple models loaded. 1674 if num_models > 1: 1675 if model: 1676 print("Extracting vectors for model '%s'." % model) 1677 else: 1678 print("Extracting vectors for all %s models." % num_models) 1679 if ave: 1680 print("Averaging all vectors.") 1681 1682 # Single model loaded. 1683 else: 1684 print("Extracting vectors from the single model.") 1685 1686 # Unit vectors. 1687 if unit: 1688 print("Calculating the unit vectors.") 1689 1690 # Loop over the spins. 1691 no_vectors = True 1692 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True): 1693 # Skip deselected spins. 1694 if not spin.select: 1695 continue 1696 1697 # The spin identification string. The residue name and spin num is not included to allow molecules with point mutations to be used as different models. 1698 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num) 1699 1700 # Test that the spin number or name are set (one or both are essential for the identification of the atom). 1701 if spin.num == None and spin.name == None: 1702 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule.")) 1703 continue 1704 1705 # The bond vector already exists. 1706 if hasattr(spin, 'vector'): 1707 obj = getattr(spin, 'vector') 1708 if obj != None: 1709 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists.")) 1710 continue 1711 1712 # Get the bond info. 1713 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True) 1714 id2 = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name) 1715 1716 # No attached atom. 1717 if not bond_vectors: 1718 # Warning messages. 1719 if warnings: 1720 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ").")) 1721 1722 # Skip the spin. 1723 continue 1724 1725 # Set the attached atom name. 1726 if not hasattr(spin, 'attached_atom'): 1727 spin.attached_atom = attached_name 1728 elif spin.attached_atom != attached_name: 1729 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".") 1730 1731 # Initialise the average vector. 1732 if ave: 1733 ave_vector = zeros(3, float64) 1734 1735 # Loop over the individual vectors. 1736 for i in range(len(bond_vectors)): 1737 # Unit vector. 1738 if unit: 1739 # Normalisation factor. 1740 norm_factor = norm(bond_vectors[i]) 1741 1742 # Test for zero length. 1743 if norm_factor == 0.0: 1744 warn(RelaxZeroVectorWarning(spin_id1=id, spin_id2=id2)) 1745 1746 # Calculate the normalised vector. 1747 else: 1748 bond_vectors[i] = bond_vectors[i] / norm_factor 1749 1750 # Sum the vectors. 1751 if ave: 1752 ave_vector = ave_vector + bond_vectors[i] 1753 1754 # Average. 1755 if ave: 1756 vector = ave_vector / float(len(bond_vectors)) 1757 else: 1758 vector = bond_vectors 1759 1760 # Convert to a single vector if needed. 1761 if len(vector) == 1: 1762 vector = vector[0] 1763 1764 # Set the vector. 1765 setattr(spin, 'vector', vector) 1766 1767 # We have a vector! 1768 no_vectors = False 1769 1770 # Print out of modified spins. 1771 if verbosity: 1772 # The number of vectors. 1773 num = len(bond_vectors) 1774 plural = 's' 1775 if num == 1: 1776 plural = '' 1777 1778 if spin.name: 1779 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id)) 1780 else: 1781 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id)) 1782 1783 # Right, catch the problem of missing vectors to prevent massive user confusion! 1784 if no_vectors: 1785 raise RelaxError("No vectors could be extracted.")

1789 """Create a PDB representation of the motion between a set of models. 1790 1791 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object. 1792 1793 1794 @keyword pipes: The data pipes to generate the web between. 1795 @type pipes: None or list of str 1796 @keyword models: The list of models to generate the web between. The number of elements must match the pipes argument. If set to None, then all models will be used. 1797 @type models: None or list of lists of int 1798 @keyword molecules: The list of molecules to generate the web between. The number of elements must match the pipes argument. 1799 @type molecules: None or list of lists of str 1800 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format. 1801 @type atom_id: str or None 1802 @keyword file: The name of the PDB file to write. 1803 @type file: str 1804 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1805 @type dir: str or None 1806 @keyword force: The force flag which if True will cause the file to be overwritten. 1807 @type force: bool 1808 """ 1809 1810 # Checks. 1811 check_pipe() 1812 check_structure() 1813 1814 # Assemble the structural coordinates. 1815 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id) 1816 1817 # Check that more than one structure is present. 1818 if not len(coord) > 1: 1819 raise RelaxError("Two or more structures are required.") 1820 1821 # Initialise the structural object. 1822 web = Internal() 1823 1824 # Loop over the atoms. 1825 for atom_index in range(len(atom_names)): 1826 # Loop over the structures. 1827 for struct_index in range(len(ids)): 1828 # Add the atom. 1829 web.add_atom(mol_name=mol_names[atom_index], atom_name=atom_names[atom_index], res_name=res_names[atom_index], res_num=res_nums[atom_index], pos=coord[struct_index, atom_index], element=elements[atom_index], sort=False) 1830 1831 # Loop over the structures again, this time twice. 1832 for k in range(len(ids)): 1833 for l in range(len(ids)): 1834 # Skip identical atoms. 1835 if k == l: 1836 continue 1837 1838 # The atom index. 1839 index1 = atom_index*len(ids) + k 1840 index2 = atom_index*len(ids) + l 1841 1842 # Connect to the previous atoms. 1843 web.connect_atom(mol_name=mol_names[atom_index], index1=index1, index2=index2) 1844 1845 # Append the PDB extension if needed. 1846 if isinstance(file, str): 1847 # The file path. 1848 file = get_file_path(file, dir) 1849 1850 # Add '.pdb' to the end of the file path if it isn't there yet. 1851 if not search(".pdb$", file): 1852 file += '.pdb' 1853 1854 # Open the file for writing. 1855 file = open_write_file(file, force=force) 1856 1857 # Write the structure. 1858 web.write_pdb(file)

1862 """The PDB writing function. 1863 1864 @keyword file: The name of the PDB file to write. This can also be a file instance. 1865 @type file: str or file instance 1866 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory. 1867 @type dir: str or None 1868 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file. 1869 @type model_num: None or int 1870 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression. 1871 @type compress_type: int 1872 @keyword force: The force flag which if True will cause the file to be overwritten. 1873 @type force: bool 1874 """ 1875 1876 # Test if the current data pipe exists. 1877 check_pipe() 1878 1879 # Check if the structural object exists. 1880 if not hasattr(cdp, 'structure'): 1881 raise RelaxError("No structural data is present in the current data pipe.") 1882 1883 # Path handling. 1884 if isinstance(file, str): 1885 # The file path. 1886 file = get_file_path(file, dir) 1887 1888 # Add '.pdb' to the end of the file path if it isn't there yet. 1889 if not search(".pdb$", file): 1890 file = file + '.pdb' 1891 1892 # Open the file for writing. 1893 file = open_write_file(file, compress_type=compress_type, force=force) 1894 1895 # Write the structures. 1896 cdp.structure.write_pdb(file, model_num=model_num)

Source Code for Module pipe_control.structure.main