1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 from minfx.generic import generic_minimise
24 from numpy import array, average, concatenate, dot, float64, mean, ones, std, zeros
25 from numpy.linalg import norm
26 from os import F_OK, access, getcwd
27 from re import search
28 import sys
29 from warnings import warn
30
31
32 from data_store import Relax_data_store; ds = Relax_data_store()
33 from data_store.seq_align import Sequence_alignments
34 from lib.check_types import is_float
35 from lib.errors import RelaxError, RelaxFileError
36 from lib.geometry.vectors import vector_angle_atan2
37 from lib.io import get_file_path, open_write_file, write_data
38 from lib.plotting.api import correlation_matrix, write_xy_data, write_xy_header
39 from lib.selection import tokenise
40 from lib.sequence import write_spin_data
41 from lib.sequence_alignment.msa import msa_general, msa_residue_numbers, msa_residue_skipping
42 from lib.structure.internal.coordinates import assemble_atomic_coordinates, assemble_coord_array, loop_coord_structures
43 from lib.structure.internal.displacements import Displacements
44 from lib.structure.internal.object import Internal
45 from lib.structure.pca import pca_analysis
46 from lib.structure.represent.diffusion_tensor import diffusion_tensor
47 from lib.structure.statistics import atomic_rmsd, per_atom_rmsd
48 from lib.structure.superimpose import fit_to_first, fit_to_mean
49 from lib.warnings import RelaxWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning
50 from pipe_control import molmol, pipes
51 from pipe_control.interatomic import interatomic_loop
52 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, linear_ave, return_spin, spin_loop
53 from pipe_control.pipes import cdp_name, check_pipe, get_pipe
54 from pipe_control.structure.checks import check_structure
55 from pipe_control.structure.mass import pipe_centre_of_mass
56 from status import Status; status = Status()
57 from target_functions.ens_pivot_finder import Pivot_finder
58
59
60 -def add_atom(mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
61 """Add a new atom to the structural data object.
62
63 @keyword mol_name: The name of the molecule.
64 @type mol_name: str
65 @keyword atom_name: The atom name, e.g. 'H1'.
66 @type atom_name: str or None
67 @keyword res_name: The residue name.
68 @type res_name: str or None
69 @keyword res_num: The residue number.
70 @type res_num: int or None
71 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models.
72 @type pos: rank-1 or rank-2 array or list of float
73 @keyword element: The element symbol.
74 @type element: str or None
75 @keyword atom_num: The atom number.
76 @type atom_num: int or None
77 @keyword chain_id: The chain identifier.
78 @type chain_id: str or None
79 @keyword segment_id: The segment identifier.
80 @type segment_id: str or None
81 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
82 @type pdb_record: str or None
83 """
84
85
86 check_pipe()
87
88
89 if not hasattr(cdp, 'structure'):
90 cdp.structure = Internal()
91
92
93 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record, sort=True)
94
95
97 """Add a new model to the empty structural data object."""
98
99
100 check_pipe()
101
102
103 if not hasattr(cdp, 'structure'):
104 cdp.structure = Internal()
105
106
107 if cdp.structure.num_molecules() != 0:
108 raise RelaxError("The internal structural object is not empty.")
109
110
111 cdp.structure.structural_data.add_item(model_num=model_num)
112 print("Created the empty model number %s." % model_num)
113
114
116 """Assemble the common atomic coordinates taking sequence alignments into account.
117
118 @keyword pipes: The data pipes to assemble the coordinates from.
119 @type pipes: None or list of str
120 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
121 @type models: None or list of lists of int
122 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
123 @type molecules: None or list of lists of str
124 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
125 @type atom_id: str or None
126 @keyword lists: A flag which if true will cause the object ID list per molecule, the model number list per molecule, and the molecule name list per molecule to also be returned.
127 @type lists: bool
128 @return: The array of atomic coordinates (first dimension is the model and/or molecule, the second are the atoms, and the third are the coordinates); a list of unique IDs for each structural object, model, and molecule; the common list of molecule names; the common list of residue names; the common list of residue numbers; the common list of atom names; the common list of element names.
129 @rtype: numpy rank-3 float64 array, list of str, list of str, list of str, list of int, list of str, list of str
130 """
131
132
133 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules)
134
135
136 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models, atom_id=atom_id)
137
138
139 if mol_names == []:
140 if atom_id != None:
141 raise RelaxError("No structural data matching the atom ID string '%s' can be found." % atom_id)
142 else:
143 raise RelaxError("No structural data can be found.")
144
145
146 same_mol = True
147 for mol in molecule_list:
148 if mol != molecule_list[0]:
149 same_mol = False
150
151
152 align = None
153 if hasattr(ds, 'sequence_alignments'):
154 align = ds.sequence_alignments.find_alignment(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes)
155 if align != None:
156
157 print("\nSequence alignment found - common atoms will be determined based on this MSA:")
158 for i in range(len(align.object_ids)):
159 print(align.strings[i])
160
161
162 strings = align.strings
163 gaps = align.gaps
164
165
166 elif len(objects) == 1 and same_mol:
167
168 print("\nSequence alignment disabled as only models with identical molecule, residue and atomic sequences are being superimposed.")
169
170
171 strings = one_letter_codes
172
173
174 gaps = []
175 for mol_index in range(num_mols):
176 gaps.append([])
177 for i in range(len(one_letter_codes[mol_index])):
178 gaps[mol_index].append(0)
179
180
181 else:
182
183 print("\nSequence alignment cannot be found - falling back to a residue number based alignment.")
184
185
186 res_num_list = []
187 for mol_index in range(num_mols):
188 res_num_list.append([])
189 for i in range(len(one_letter_codes[mol_index])):
190 key = list(res_nums[mol_index][i].keys())[0]
191 res_num_list[mol_index].append(res_nums[mol_index][i][key])
192
193
194 strings, gaps = msa_residue_numbers(one_letter_codes, residue_numbers=res_num_list)
195
196
197 skip = msa_residue_skipping(strings=strings, gaps=gaps)
198
199
200 coord, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common = assemble_coord_array(atom_pos=atom_pos, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, sequences=one_letter_codes, skip=skip)
201 if lists:
202 return coord, ids, object_id_list, model_list, molecule_list, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common
203 else:
204 return coord, ids, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common
205
206
208 """Assemble the atomic coordinates.
209
210 @keyword pipes: The data pipes to assemble the coordinates from.
211 @type pipes: None or list of str
212 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
213 @type models: None or list of lists of int
214 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
215 @type molecules: None or list of lists of str
216 @return: The structural objects, structural object names, and data pipes list.
217 @rtype: list of lib.structure.internal.object.Internal instances, list of str, list of str
218 """
219
220
221 if pipes == None:
222 pipes = [cdp_name()]
223 num_pipes = len(pipes)
224
225
226 for pipe in pipes:
227 check_pipe(pipe)
228
229
230 if models != None:
231 if len(models) != num_pipes:
232 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes))
233 if molecules != None:
234 if len(molecules) != num_pipes:
235 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes))
236
237
238 objects = []
239 object_names = []
240 for pipe_index in range(len(pipes)):
241 dp = get_pipe(pipes[pipe_index])
242 objects.append(dp.structure)
243 object_names.append(pipes[pipe_index])
244
245
246 return objects, object_names, pipes
247
248
249 -def atomic_fluctuations(pipes=None, models=None, molecules=None, atom_id=None, measure='distance', file=None, format='text', dir=None, force=False):
250 """Write out a correlation matrix of pairwise interatomic distance fluctuations between different structures.
251
252 @keyword pipes: The data pipes to generate the interatomic distance fluctuation correlation matrix for.
253 @type pipes: None or list of str
254 @keyword models: The list of models to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument. If set to None, then all models will be used.
255 @type models: None or list of lists of int
256 @keyword molecules: The list of molecules to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument.
257 @type molecules: None or list of lists of str
258 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
259 @type atom_id: str or None
260 @keyword measure: The type of fluctuation to measure. This can be either 'distance' or 'angle'.
261 @type measure: str
262 @keyword file: The name of the file to write.
263 @type file: str
264 @keyword format: The output format. This can be set to "text" for text file output, or "gnuplot" for creating a gnuplot script.
265 @type format: str
266 @keyword dir: The directory where the file will be placed. If set to None, then the file will be placed in the current directory.
267 @type dir: str or None
268 @keyword force: The force flag which if True will cause the file to be overwritten.
269 @type force: bool
270 """
271
272
273 check_pipe()
274 check_structure()
275 allowed_measures = ['distance', 'angle', 'parallax shift']
276 if measure not in allowed_measures:
277 raise RelaxError("The measure '%s' must be one of %s." % (measure, allowed_measures))
278
279
280 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
281
282
283 n = len(atom_names)
284 m = len(coord)
285
286
287 if not m > 1:
288 raise RelaxError("Two or more structures are required.")
289
290
291 labels = []
292 for i in range(n):
293 labels.append(generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_name=atom_names[i]))
294
295
296 matrix = zeros((n, n), float64)
297 dist = zeros(m, float64)
298 vectors = zeros((m, 3), float64)
299 parallax_vectors = zeros((m, 3), float64)
300 angles = zeros(m, float64)
301
302
303 if measure == 'distance':
304 for i in range(n):
305 for j in range(n):
306
307 if j > i:
308 continue
309
310
311 for k in range(m):
312 dist[k] = norm(coord[k, i] - coord[k, j])
313
314
315 matrix[i, j] = matrix[j, i] = std(dist, ddof=1)
316
317
318 elif measure == 'angle':
319
320 for i in range(n):
321 for j in range(n):
322
323 if j > i:
324 continue
325
326
327 for k in range(m):
328 vectors[k] = coord[k, i] - coord[k, j]
329
330
331 ave_vect = average(vectors, axis=0)
332
333
334 for k in range(m):
335 angles[k] = vector_angle_atan2(ave_vect, vectors[k])
336
337
338 matrix[i, j] = matrix[j, i] = std(angles, ddof=1)
339
340
341 elif measure == 'parallax shift':
342
343 for i in range(n):
344 for j in range(n):
345
346 if j > i:
347 continue
348
349
350 for k in range(m):
351 vectors[k] = coord[k, i] - coord[k, j]
352
353
354 ave_vect = average(vectors, axis=0)
355
356
357 length = norm(ave_vect)
358 if length == 0.0:
359 matrix[i, j] = matrix[j, i] = 0.0
360 continue
361
362
363 unit = ave_vect / length
364
365
366 for k in range(m):
367
368 proj = dot(vectors[k], unit) * unit
369
370
371 dist[k] = norm(vectors[k] - proj)
372
373
374 matrix[i, j] = matrix[j, i] = std(dist, ddof=1)
375
376
377 correlation_matrix(format=format, matrix=matrix, labels=labels, file=file, dir=dir, force=force)
378
379
380 -def average_structure(pipes=None, models=None, molecules=None, atom_id=None, set_mol_name=None, set_model_num=None):
381 """Calculate a mean structure.
382
383 @keyword pipes: The data pipes containing structures to average.
384 @type pipes: None or list of str
385 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
386 @type models: None or list of lists of int
387 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
388 @type molecules: None or list of lists of str
389 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
390 @type atom_id: str or None
391 @keyword set_mol_name: The molecule name for the averaged molecule.
392 @type set_mol_name: None or str
393 @keyword set_model_num: The model number for the averaged molecule.
394 @type set_model_num: None or int
395 """
396
397
398 check_pipe()
399
400
401 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
402
403
404 struct = mean(coord, axis=0)
405
406
407 if not hasattr(cdp, 'structure'):
408 cdp.structure = Internal()
409
410
411 cdp.structure.add_coordinates(coord=struct, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, set_mol_name=set_mol_name, set_model_num=set_model_num)
412
413
415 """Connect two atoms.
416
417 @keyword index1: The global index of the first atom.
418 @type index1: str
419 @keyword index2: The global index of the first atom.
420 @type index2: str
421 """
422
423
424 check_pipe()
425
426
427 if not hasattr(cdp, 'structure'):
428 cdp.structure = Internal()
429
430
431 cdp.structure.connect_atom(index1=index1, index2=index2)
432
433
451
452
454 """Create the PDB representation of the diffusion tensor.
455
456 @keyword scale: The scaling factor for the diffusion tensor.
457 @type scale: float
458 @keyword file: The name of the PDB file to create.
459 @type file: str
460 @keyword dir: The name of the directory to place the PDB file into.
461 @type dir: str
462 @keyword force: Flag which if set to True will overwrite any pre-existing file.
463 @type force: bool
464 """
465
466
467 check_pipe()
468
469
470 if hasattr(cdp, 'structure') and not cdp.structure.empty():
471 com = pipe_centre_of_mass()
472 else:
473 com = zeros(3, float64)
474
475
476 structure = Internal()
477
478
479 if cdp.pipe_type == 'hybrid':
480 pipe_list = cdp.hybrid_pipes
481 else:
482 pipe_list = [pipes.cdp_name()]
483
484
485 if cdp.pipe_type == 'hybrid':
486 mol_names = []
487 for pipe in pipe_list:
488 mol_names.append('diff_tensor_' % pipe)
489 else:
490 mol_names = ['diff_tensor']
491
492
493 for pipe_index in range(len(pipe_list)):
494
495 pipe = pipes.get_pipe(pipe_list[pipe_index])
496
497
498 if not hasattr(pipe, 'diff_tensor'):
499 raise RelaxNoTensorError('diffusion')
500
501
502 structure.add_molecule(name=mol_names[pipe_index])
503
504
505 mol = structure.get_molecule(mol_names[pipe_index])
506
507
508 diff_type = pipe.diff_tensor.type
509 if diff_type == 'spheroid':
510 diff_type = pipe.diff_tensor.spheroid_type
511
512
513 sim_num = None
514 if hasattr(pipe.diff_tensor, 'tm_sim'):
515
516 sim_num = len(pipe.diff_tensor.tm_sim)
517
518
519 axes = []
520 sim_axes = []
521 if diff_type in ['oblate', 'prolate']:
522 axes.append(pipe.diff_tensor.Dpar * pipe.diff_tensor.Dpar_unit)
523 sim_axes.append([])
524 if sim_num != None:
525 for i in range(sim_num):
526 sim_axes[0].append(pipe.diff_tensor.Dpar_sim[i] * pipe.diff_tensor.Dpar_unit_sim[i])
527
528 if diff_type == 'ellipsoid':
529 axes.append(pipe.diff_tensor.Dx * pipe.diff_tensor.Dx_unit)
530 axes.append(pipe.diff_tensor.Dy * pipe.diff_tensor.Dy_unit)
531 axes.append(pipe.diff_tensor.Dz * pipe.diff_tensor.Dz_unit)
532 sim_axes.append([])
533 sim_axes.append([])
534 sim_axes.append([])
535 if sim_num != None:
536 for i in range(sim_num):
537 sim_axes[0].append(pipe.diff_tensor.Dx_sim[i] * pipe.diff_tensor.Dx_unit_sim[i])
538 sim_axes[1].append(pipe.diff_tensor.Dy_sim[i] * pipe.diff_tensor.Dy_unit_sim[i])
539 sim_axes[2].append(pipe.diff_tensor.Dz_sim[i] * pipe.diff_tensor.Dz_unit_sim[i])
540
541
542 diffusion_tensor(mol=mol, tensor=pipe.diff_tensor.tensor, tensor_diag=pipe.diff_tensor.tensor_diag, diff_type=diff_type, rotation=pipe.diff_tensor.rotation, axes=axes, sim_axes=sim_axes, com=com, scale=scale)
543
544
545
546
547
548
549 print("\nGenerating the PDB file.")
550
551
552 tensor_pdb_file = open_write_file(file, dir, force=force)
553 structure.write_pdb(tensor_pdb_file)
554 tensor_pdb_file.close()
555
556
557 if not hasattr(cdp, 'result_files'):
558 cdp.result_files = []
559 if dir == None:
560 dir = getcwd()
561 cdp.result_files.append(['diff_tensor_pdb', 'Diffusion tensor PDB', get_file_path(file, dir)])
562 status.observers.result_file.notify()
563
564
565 -def delete(atom_id=None, model=None, verbosity=1, spin_info=True):
566 """Delete structural data.
567
568 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted.
569 @type atom_id: str or None
570 @keyword model: Individual structural models from a loaded ensemble can be deleted by specifying the model number.
571 @type model: None or int
572 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
573 @type verbosity: int
574 @keyword spin_info: A flag which if True will cause all structural information in the spin containers and interatomic data containers to be deleted as well. If False, then only the 3D structural data will be deleted.
575 @type spin_info: bool
576 """
577
578
579 check_pipe()
580
581
582 if hasattr(cdp, 'structure'):
583 if verbosity:
584 print("Deleting structural data from the current pipe.")
585 selection = None
586 if atom_id != None:
587 selection = cdp.structure.selection(atom_id=atom_id)
588 cdp.structure.delete(model=model, selection=selection, verbosity=verbosity)
589 elif verbosity:
590 print("No structures are present.")
591
592
593 if not spin_info:
594 return
595
596
597 if verbosity:
598 print("Deleting all spin specific structural info.")
599 for spin in spin_loop(selection=atom_id):
600
601 if hasattr(spin, 'pos'):
602 del spin.pos
603
604
605 if verbosity:
606 print("Deleting all interatomic vectors.")
607 for interatom in interatomic_loop(selection1=atom_id):
608
609 if hasattr(interatom, 'vector'):
610 del interatom.vector
611
612
614 """Delete all secondary structure information.
615
616 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
617 @type verbosity: int
618 """
619
620
621 check_pipe()
622
623
624 if hasattr(cdp, 'structure'):
625 if verbosity:
626 print("Deleting secondary structure information from the current pipe.")
627 cdp.structure.delete_ss(verbosity=verbosity)
628 elif verbosity:
629 print("No structures are present.")
630
631
632
633 -def displacement(pipes=None, models=None, molecules=None, atom_id=None, centroid=None):
634 """Calculate the rotational and translational displacement between structures or models.
635
636 All results will be placed into the current data pipe cdp.structure.displacements data structure.
637
638
639 @keyword pipes: The data pipes to determine the displacements for.
640 @type pipes: None or list of str
641 @keyword models: The list of models to determine the displacements for. The number of elements must match the pipes argument. If set to None, then all models will be used.
642 @type models: None or list of lists of int
643 @keyword molecules: The list of molecules to determine the displacements for. The number of elements must match the pipes argument.
644 @type molecules: None or list of lists of str
645 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
646 @type atom_id: str or None
647 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems.
648 @type centroid: list of float or numpy rank-1, 3D array
649 """
650
651
652 check_pipe()
653
654
655 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
656
657
658 if not hasattr(cdp.structure, 'displacments'):
659 cdp.structure.displacements = Displacements()
660
661
662 for i in range(len(ids)):
663 for j in range(len(ids)):
664 cdp.structure.displacements._calculate(id_from=ids[i], id_to=ids[j], coord_from=coord[i], coord_to=coord[j], centroid=centroid)
665
666
667 -def find_pivot(pipes=None, models=None, molecules=None, atom_id=None, init_pos=None, func_tol=1e-5, box_limit=200):
668 """Find the pivoted motion of a set of structural models or structures.
669
670 The pivot will be placed into the current data pipe cdp.structure.pivot data structure.
671
672
673 @keyword pipes: The data pipes to use in the motional pivot algorithm.
674 @type pipes: None or list of str
675 @keyword models: The list of models to use. The number of elements must match the pipes argument. If set to None, then all models will be used.
676 @type models: None or list of lists of int
677 @keyword molecules: The list of molecules to find the pivoted motion for. The number of elements must match the pipes argument.
678 @type molecules: None or list of lists of str
679 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
680 @type atom_id: str or None
681 @keyword init_pos: The starting pivot position for the pivot point optimisation.
682 @type init_pos: list of float or numpy rank-1, 3D array
683 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check.
684 @type func_tol: None or float
685 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x.
686 @type box_limit: int
687 """
688
689
690 check_pipe()
691
692
693 if init_pos == None:
694 init_pos = zeros(3, float64)
695 init_pos = array(init_pos)
696
697
698 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
699
700
701 A = zeros((6, 3), float64)
702 b = zeros(6, float64)
703 for i in range(3):
704 A[2*i, i] = 1
705 A[2*i+1, i] = -1
706 b[2*i] = -box_limit
707 b[2*i+1] = -box_limit
708
709
710 finder = Pivot_finder(list(range(len(coord))), coord)
711 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1)
712
713
714 if results is None:
715 return
716
717
718 cdp.structure.pivot = results
719
720
721 print("Motional pivot found at: %s" % results)
722
723
724 -def get_pos(spin_id=None, str_id=None, ave_pos=False):
725 """Load the spins from the structural object into the relax data store.
726
727 @keyword spin_id: The molecule, residue, and spin identifier string.
728 @type spin_id: str
729 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
730 @type str_id: int or str
731 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
732 @type ave_pos: bool
733 """
734
735
736 check_pipe()
737 check_structure()
738
739
740 selection = cdp.structure.selection(atom_id=spin_id)
741
742
743 data = []
744 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
745
746 if mol_name and search('\+', mol_name):
747 mol_name = mol_name.replace('+', '')
748 if res_name and search('\+', res_name):
749 res_name = res_name.replace('+', '')
750 if atom_name and search('\+', atom_name):
751 atom_name = atom_name.replace('+', '')
752
753
754 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name)
755
756
757 spin_cont = return_spin(id)
758
759
760 if spin_cont == None:
761 continue
762
763
764 spin_cont.pos = pos
765
766
767 data.append([id, repr(pos)])
768
769
770 if not len(data):
771 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id)
772
773
774 for spin in spin_loop():
775 if hasattr(spin, 'members'):
776
777 positions = []
778 for atom in spin.members:
779
780 subspin = return_spin(atom)
781
782
783 if subspin == None:
784 raise RelaxNoSpinError(atom)
785
786
787 if not hasattr(subspin, 'pos') or subspin.pos is None or not len(subspin.pos):
788 raise RelaxError("Positional information is not available for the atom '%s'." % atom)
789
790
791 pos = subspin.pos
792
793
794 multi_model = True
795 if type(pos[0]) in [float, float64]:
796 multi_model = False
797 pos = [pos]
798
799
800 positions.append([])
801 for i in range(len(pos)):
802 positions[-1].append(pos[i].tolist())
803
804
805 if spin.averaging == 'linear':
806
807 ave = linear_ave(positions)
808
809
810 if multi_model:
811 spin.pos = ave
812 else:
813 spin.pos = ave[0]
814
815
816 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)
817
818
819 -def load_spins(spin_id=None, str_id=None, from_mols=None, mol_name_target=None, ave_pos=False, spin_num=True):
820 """Load the spins from the structural object into the relax data store.
821
822 @keyword spin_id: The molecule, residue, and spin identifier string.
823 @type spin_id: str
824 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
825 @type str_id: int or str
826 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from.
827 @type from_mols: list of str or None
828 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures
829 @type mol_name_target: str or None
830 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
831 @type ave_pos: bool
832 @keyword spin_num: A flag specifying if the spin number should be loaded.
833 @type spin_num: bool
834 """
835
836
837 if from_mols != None:
838 load_spins_multi_mol(spin_id=spin_id, str_id=str_id, from_mols=from_mols, mol_name_target=mol_name_target, ave_pos=ave_pos, spin_num=spin_num)
839 return
840
841
842 check_pipe()
843 check_structure()
844
845
846 print("Adding the following spins to the relax data store.\n")
847
848
849 mol_names = []
850 res_nums = []
851 res_names = []
852 spin_nums = []
853 spin_names = []
854
855
856 selection = cdp.structure.selection(atom_id=spin_id)
857 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
858
859 if mol_name_target:
860 mol_name = mol_name_target
861
862
863 if not spin_num:
864 atom_num = None
865
866
867 if mol_name and search('\+', mol_name):
868 mol_name = mol_name.replace('+', '')
869 if res_name and search('\+', res_name):
870 res_name = res_name.replace('+', '')
871 if atom_name and search('\+', atom_name):
872 atom_name = atom_name.replace('+', '')
873
874
875 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name)
876
877
878 try:
879 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name)
880
881
882 except RelaxError:
883 spin_cont = return_spin(id)
884
885
886 if mol_name_target:
887 mol_names.append(mol_name_target)
888 else:
889 mol_names.append(mol_name)
890 res_nums.append(res_num)
891 res_names.append(res_name)
892 spin_nums.append(atom_num)
893 spin_names.append(atom_name)
894
895
896 if hasattr(spin_cont, 'pos') and spin_cont.pos != None and (spin_cont.pos.shape != pos.shape or (spin_cont.pos != pos).any()):
897 warn(RelaxWarning("Positional information already exists for the spin %s, appending the new positions." % id))
898 spin_cont.pos = concatenate((spin_cont.pos, pos))
899 else:
900 spin_cont.pos = pos
901
902
903 spin_cont.element = element
904
905
906 if len(mol_names) == 0:
907 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id))
908 return
909
910
911 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)
912
913
914 cdp.N = cdp.structure.num_models()
915
916
917 -def load_spins_multi_mol(spin_id=None, str_id=None, from_mols=None, mol_name_target=None, ave_pos=False, spin_num=True):
918 """Load the spins from the structural object into the relax data store.
919
920 @keyword spin_id: The molecule, residue, and spin identifier string.
921 @type spin_id: str
922 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
923 @type str_id: int or str
924 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from.
925 @type from_mols: list of str or None
926 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures
927 @type mol_name_target: str or None
928 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
929 @type ave_pos: bool
930 @keyword spin_num: A flag specifying if the spin number should be loaded.
931 @type spin_num: bool
932 """
933
934
935 check_pipe()
936 check_structure()
937
938
939 if mol_name_target == None:
940 raise RelaxError("The target molecule name must be supplied when specifying multiple molecules to load spins from.")
941
942
943 if cdp.structure.num_models() != 1:
944 raise RelaxError("Only a single structural model is allowed when specifying multiple molecules to load spins from.")
945
946
947 if spin_id:
948 mol_token, res_token, spin_token = tokenise(spin_id)
949 if mol_token != None:
950 raise RelaxError("The spin ID string cannot contain molecular information when specifying multiple molecules to load spins from.")
951
952
953 print("Adding the following spins to the relax data store.\n")
954
955
956 ids = []
957 res_nums = {}
958 res_names = {}
959 spin_names = {}
960 positions = {}
961 elements = {}
962
963
964 for mol_name in from_mols:
965
966 positions[mol_name] = {}
967
968
969 new_id = '#' + mol_name
970 if spin_id != None:
971 new_id += spin_id
972
973
974 selection = cdp.structure.selection(atom_id=new_id)
975 for res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
976
977 if res_name and search('\+', res_name):
978 res_name = res_name.replace('+', '')
979 if atom_name and search('\+', atom_name):
980 atom_name = atom_name.replace('+', '')
981
982
983 if not spin_num:
984 atom_num = None
985
986
987 id = generate_spin_id_unique(mol_name=mol_name_target, res_num=res_num, res_name=res_name, spin_name=atom_name)
988
989
990 if is_float(pos[0]):
991 positions[mol_name][id] = pos
992 else:
993 positions[mol_name][id] = pos[0]
994
995
996 if id in ids:
997 continue
998
999
1000 ids.append(id)
1001 res_nums[id] = res_num
1002 res_names[id] = res_name
1003 spin_names[id] = atom_name
1004 elements[id] = element
1005
1006
1007 if len(ids) == 0:
1008 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id))
1009 return
1010
1011
1012 mol_names2 = []
1013 res_nums2 = []
1014 res_names2 = []
1015 spin_names2 = []
1016 for id in ids:
1017
1018 spin_cont = return_spin(id)
1019
1020
1021 if spin_cont == None:
1022 spin_cont = create_spin(mol_name=mol_name_target, res_num=res_nums[id], res_name=res_names[id], spin_name=spin_names[id])
1023
1024
1025 spin_cont.pos = []
1026 for mol_name in from_mols:
1027 if id in positions[mol_name]:
1028 spin_cont.pos.append(positions[mol_name][id])
1029 else:
1030 spin_cont.pos.append(None)
1031
1032
1033 spin_cont.element = elements[id]
1034
1035
1036 mol_names2.append(mol_name_target)
1037 res_nums2.append(res_nums[id])
1038 res_names2.append(res_names[id])
1039 spin_names2.append(spin_names[id])
1040
1041
1042 write_spin_data(file=sys.stdout, mol_names=mol_names2, res_nums=res_nums2, res_names=res_names2, spin_names=spin_names2)
1043
1044
1045 cdp.N = len(from_mols)
1046
1047
1048 -def pca(pipes=None, models=None, molecules=None, obs_pipes=None, obs_models=None, obs_molecules=None, atom_id=None, algorithm=None, num_modes=4, format='grace', dir=None):
1049 """PC analysis of the motions between all the loaded models.
1050
1051 @keyword pipes: The data pipes to perform the PC analysis on.
1052 @type pipes: None or list of str
1053 @keyword models: The list of models to perform the PC analysis on. The number of elements must match the pipes argument. If set to None, then all models will be used.
1054 @type models: None or list of lists of int
1055 @keyword molecules: The list of molecules to perform the PC analysis on. The number of elements must match the pipes argument.
1056 @type molecules: None or list of lists of str
1057 @keyword obs_pipes: The data pipes in the PC analysis which will have zero weight. These structures are for comparison.
1058 @type obs_pipes: None or list of str
1059 @keyword obs_models: The list of models in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. If set to None, then all models will be used.
1060 @type obs_models: None or list of lists of int
1061 @keyword obs_molecules: The list of molecules in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument.
1062 @type obs_molecules: None or list of lists of str
1063 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1064 @type atom_id: str or None
1065 @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd').
1066 @type algorithm: str
1067 @keyword num_modes: The number of PCA modes to calculate.
1068 @type num_modes: int
1069 @keyword format: The graph format to use.
1070 @type format: str
1071 @keyword dir: The optional directory to place the graphs into.
1072 @type dir: str
1073 """
1074
1075
1076 check_pipe()
1077
1078
1079 coord, ids, object_id_list, model_list, molecule_list, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id, lists=True)
1080
1081
1082 M = len(coord)
1083 if obs_pipes == None:
1084 obs_pipes = []
1085 weights = ones(M, float64)
1086 for struct in range(M):
1087
1088 for i in range(len(obs_pipes)):
1089
1090 if object_id_list[struct] == obs_pipes[i]:
1091
1092 if obs_molecules == None or molecule_list[struct] == obs_molecules[i]:
1093
1094 if obs_models == None or model_list[struct] == obs_models[i]:
1095 weights[struct] = 0.0
1096
1097
1098 print("\n\nStarting the PCA analysis.\n")
1099 values, vectors, proj = pca_analysis(coord=coord, weights=weights, algorithm=algorithm, num_modes=num_modes)
1100
1101
1102 cdp.structure.pca_values = values
1103 cdp.structure.pca_vectors = vectors
1104 cdp.structure.pca_proj = proj
1105
1106
1107 for mode in range(num_modes - 1):
1108
1109 data = [[[]]]
1110 current = None
1111 labels = []
1112 for struct in range(M):
1113
1114 id = "%s - %s" % (object_id_list[struct], molecule_list[struct])
1115 if current == None:
1116 current = id
1117 labels.append(current)
1118
1119
1120 if current != id:
1121 data[-1].append([])
1122 current = id
1123 labels.append(current)
1124
1125
1126 data[-1][-1].append([proj[mode, struct], proj[mode+1, struct]])
1127
1128
1129 sets = len(labels)
1130
1131
1132 file = open_write_file("graph_pc%s_pc%s.agr" % (mode+1, mode+2), dir=dir, force=True)
1133
1134
1135 write_xy_header(format=format, file=file, title="Principle component projections", sets=[sets], set_names=[labels], axis_labels=[['PC mode %i (\cE\C)' % (mode+1), 'PC mode %i (\cE\C)' % (mode+2)]], linestyle=[[0]*sets])
1136
1137
1138 write_xy_data(format=format, data=data, file=file, graph_type='xy')
1139
1140
1141 file.close()
1142
1143
1144 -def read_gaussian(file=None, dir=None, set_mol_name=None, set_model_num=None, verbosity=1, fail=True):
1145 """Read structures from a Gaussian log file.
1146
1147
1148 @keyword file: The name of the Gaussian log file to read.
1149 @type file: str
1150 @keyword dir: The directory where the Gaussian log file is located. If set to None, then the file will be searched for in the current directory.
1151 @type dir: str or None
1152 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information.
1153 @type set_mol_name: None, str, or list of str
1154 @keyword set_model_num: Set the model number of the loaded molecule.
1155 @type set_model_num: None, int, or list of int
1156 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the Gaussian log file does not exist. If False, then a RelaxWarning will be trown instead.
1157 @type fail: bool
1158 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
1159 @type verbosity: int
1160 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the Gaussian log file does not exist.
1161 """
1162
1163
1164 check_pipe()
1165
1166
1167 file_path = get_file_path(file, dir)
1168
1169
1170 if not access(file_path, F_OK):
1171 file_path_orig = file_path
1172 file_path = file_path + '.log'
1173
1174
1175 if not access(file_path, F_OK):
1176 if fail:
1177 raise RelaxFileError('Gaussian log', file_path_orig)
1178 else:
1179 warn(RelaxNoPDBFileWarning(file_path))
1180 return
1181
1182
1183 if not hasattr(cdp, 'structure'):
1184 cdp.structure = Internal()
1185
1186
1187 cdp.structure.load_gaussian(file_path, set_mol_name=set_mol_name, set_model_num=set_model_num, verbosity=verbosity)
1188
1189
1190 -def read_pdb(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=1, merge=False, fail=True):
1191 """The PDB loading function.
1192
1193 @keyword file: The name of the PDB file to read.
1194 @type file: str
1195 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory.
1196 @type dir: str or None
1197 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are numbered consecutively from 1. If set to None, then all molecules will be loaded.
1198 @type read_mol: None, int, or list of int
1199 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information.
1200 @type set_mol_name: None, str, or list of str
1201 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded.
1202 @type read_model: None, int, or list of int
1203 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist.
1204 @type set_model_num: None, int, or list of int
1205 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
1206 @type alt_loc: str or None
1207 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
1208 @type verbosity: int
1209 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures.
1210 @type merge: bool
1211 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead.
1212 @type fail: bool
1213 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist.
1214 """
1215
1216
1217 check_pipe()
1218
1219
1220 file_path = get_file_path(file, dir)
1221
1222
1223 file_path_orig = file_path
1224 if not access(file_path, F_OK):
1225
1226 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']:
1227
1228 if access(file_path+ext, F_OK):
1229 file_path = file_path + ext
1230
1231
1232 if not access(file_path, F_OK):
1233 if fail:
1234 raise RelaxFileError('PDB', file_path_orig)
1235 else:
1236 warn(RelaxNoPDBFileWarning(file_path))
1237 return
1238
1239
1240 if not hasattr(cdp, 'structure'):
1241 cdp.structure = Internal()
1242
1243
1244 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge)
1245
1246
1247 molmol.molmol_obj.open_pdb()
1248
1249
1250 -def read_xyz(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=1, fail=True):
1251 """The XYZ loading function.
1252
1253
1254 @keyword file: The name of the XYZ file to read.
1255 @type file: str
1256 @keyword dir: The directory where the XYZ file is located. If set to None, then the
1257 file will be searched for in the current directory.
1258 @type dir: str or None
1259 @keyword read_mol: The molecule(s) to read from the file, independent of model.
1260 If set to None, then all molecules will be loaded.
1261 @type read_mol: None, int, or list of int
1262 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then
1263 the molecules will be automatically labelled based on the file name or
1264 other information.
1265 @type set_mol_name: None, str, or list of str
1266 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models
1267 will be loaded.
1268 @type read_model: None, int, or list of int
1269 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the
1270 XYZ model numbers will be preserved, if they exist.
1271 @type set_model_num: None, int, or list of int
1272 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ
1273 file does not exist. If False, then a RelaxWarning will be trown
1274 instead.
1275 @type fail: bool
1276 @keyword verbosity: The amount of information to print to screen. Zero corresponds to
1277 minimal output while higher values increase the amount of output. The
1278 default value is 1.
1279 @type verbosity: int
1280 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file
1281 does not exist.
1282 """
1283
1284
1285 check_pipe()
1286
1287
1288 file_path = get_file_path(file, dir)
1289
1290
1291 if not access(file_path, F_OK):
1292 file_path_orig = file_path
1293 file_path = file_path + '.xyz'
1294
1295
1296 if not access(file_path, F_OK):
1297 if fail:
1298 raise RelaxFileError('XYZ', file_path_orig)
1299 else:
1300 warn(RelaxNoPDBFileWarning(file_path))
1301 return
1302
1303
1304 if not hasattr(cdp, 'structure'):
1305 cdp.structure = Internal()
1306
1307
1308 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)
1309
1310
1311 -def rmsd(pipes=None, models=None, molecules=None, atom_id=None, atomic=False):
1312 """Calculate the RMSD between the loaded models.
1313
1314 The RMSD value will be placed into the current data pipe cdp.structure.rmsd data structure.
1315
1316
1317 @keyword pipes: The data pipes to determine the RMSD for.
1318 @type pipes: None or list of str
1319 @keyword models: The list of models to determine the RMSD for. The number of elements must match the pipes argument. If set to None, then all models will be used.
1320 @type models: None or list of lists of int
1321 @keyword molecules: The list of molecules to determine the RMSD for. The number of elements must match the pipes argument.
1322 @type molecules: None or list of lists of str
1323 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1324 @type atom_id: str or None
1325 @keyword atomic: A flag which if True will allow for per-atom RMSDs to be additionally calculated.
1326 @type atomic: bool
1327 @return: The RMSD value.
1328 @rtype: float
1329 """
1330
1331
1332 check_pipe()
1333
1334
1335 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1336
1337
1338 if atomic:
1339
1340 print("\nCalculating the atomic-level RMSDs.")
1341
1342
1343 rmsd = per_atom_rmsd(coord, verbosity=0)
1344
1345
1346 for i in range(len(res_nums)):
1347
1348 id = generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_num=i, spin_name=atom_names[i])
1349
1350
1351 spin_cont = return_spin(id)
1352
1353
1354 if spin_cont == None:
1355 continue
1356
1357
1358 spin_cont.pos_rmsd = rmsd[i]
1359
1360
1361 print("\nCalculating the global RMSD.")
1362 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1)
1363
1364
1365 return cdp.structure.rmsd
1366
1367
1368 -def rotate(R=None, origin=None, model=None, atom_id=None, pipe_name=None):
1369 """Rotate the structural data about the origin by the specified forwards rotation.
1370
1371 @keyword R: The forwards rotation matrix.
1372 @type R: numpy 3D, rank-2 array or a 3x3 list of floats
1373 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0].
1374 @type origin: numpy 3D, rank-1 array or list of len 3 or None
1375 @keyword model: The model to rotate. If None, all models will be rotated.
1376 @type model: int
1377 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1378 @type atom_id: str or None
1379 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe.
1380 @type pipe_name: None or str
1381 """
1382
1383
1384 if pipe_name == None:
1385 pipe_name = cdp_name()
1386
1387
1388 check_pipe(pipe_name)
1389 check_structure(pipe_name)
1390
1391
1392 dp = get_pipe(pipe_name)
1393
1394
1395 if origin is None:
1396 origin = [0, 0, 0]
1397
1398
1399 R = array(R, float64)
1400 origin = array(origin, float64)
1401
1402
1403 selection = dp.structure.selection(atom_id=atom_id)
1404 dp.structure.rotate(R=R, origin=origin, model=model, selection=selection)
1405
1406
1407 if model != None:
1408 print("Rotated %i atoms of model %i." % (selection.count_atoms(), model))
1409 else:
1410 print("Rotated %i atoms." % selection.count_atoms())
1411
1412
1413 -def sequence_alignment(pipes=None, models=None, molecules=None, msa_algorithm='Central Star', pairwise_algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
1414 """Superimpose a set of related, but not identical structures.
1415
1416 @keyword pipes: The data pipes to include in the alignment and superimposition.
1417 @type pipes: None or list of str
1418 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used.
1419 @type models: list of lists of int or None
1420 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument.
1421 @type molecules: None or list of str
1422 @keyword msa_algorithm: The multiple sequence alignment (MSA) algorithm to use.
1423 @type msa_algorithm: str
1424 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm to use.
1425 @type pairwise_algorithm: str
1426 @keyword matrix: The substitution matrix to use.
1427 @type matrix: str
1428 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number.
1429 @type gap_open_penalty: float
1430 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number.
1431 @type gap_extend_penalty: float
1432 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence.
1433 @type end_gap_open_penalty: float
1434 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence.
1435 @type end_gap_extend_penalty: float
1436 """
1437
1438
1439 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules)
1440
1441
1442 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models)
1443
1444
1445 res_num_list = []
1446 for mol_index in range(num_mols):
1447 res_num_list.append([])
1448 for i in range(len(one_letter_codes[mol_index])):
1449 key = list(res_nums[mol_index][i].keys())[0]
1450 res_num_list[mol_index].append(res_nums[mol_index][i][key])
1451
1452
1453 strings, gaps = msa_general(one_letter_codes, residue_numbers=res_num_list, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)
1454
1455
1456 if not hasattr(ds, 'sequence_alignments'):
1457 ds.sequence_alignments = Sequence_alignments()
1458
1459
1460 if msa_algorithm == 'residue number':
1461 pairwise_algorithm = None
1462 matrix = None
1463 gap_open_penalty = None
1464 gap_extend_penalty = None
1465 end_gap_open_penalty = None
1466 end_gap_extend_penalty = None
1467
1468
1469 ds.sequence_alignments.add(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)
1470
1471
1473 """Place the XH unit vector into the spin container object.
1474
1475 @keyword spin: The spin container object.
1476 @type spin: SpinContainer instance
1477 @keyword xh_vect: The unit vector parallel to the XH bond.
1478 @type xh_vect: array of len 3
1479 """
1480
1481
1482 spin.xh_vect = xh_vect
1483
1484
1485 -def structure_loop(pipes=None, molecules=None, models=None, atom_id=None):
1486 """Generator function for looping over all internal structural objects, models and molecules.
1487
1488 @keyword pipes: The data pipes to loop over.
1489 @type pipes: None or list of str
1490 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
1491 @type models: None or list of lists of int
1492 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
1493 @type molecules: None or list of lists of str
1494 @keyword atom_id: The molecule, residue, and atom identifier string of the coordinates of interest. This matches the spin ID string format.
1495 @type atom_id: None or str
1496 @return: The data pipe index, model number, and molecule name.
1497 @rtype: int, int or None, str
1498 """
1499
1500
1501 if pipes == None:
1502 pipes = [cdp_name()]
1503 num_pipes = len(pipes)
1504
1505
1506 for pipe in pipes:
1507 check_pipe(pipe)
1508
1509
1510 if models != None:
1511 if len(models) != num_pipes:
1512 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes))
1513 if molecules != None:
1514 if len(molecules) != num_pipes:
1515 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes))
1516
1517
1518 objects = []
1519 for pipe_index in range(len(pipes)):
1520 dp = get_pipe(pipes[pipe_index])
1521 objects.append(dp.structure)
1522
1523
1524 for pipe_index, model_num, mol_name in loop_coord_structures(objects=objects, models=models, molecules=molecules, atom_id=atom_id):
1525 yield pipe_index, model_num, mol_name
1526
1527
1528 -def superimpose(pipes=None, models=None, molecules=None, atom_id=None, displace_id=None, method='fit to mean', centre_type="centroid", centroid=None):
1529 """Superimpose a set of structures.
1530
1531 @keyword pipes: The data pipes to include in the alignment and superimposition.
1532 @type pipes: None or list of str
1533 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used.
1534 @type models: list of lists of int or None
1535 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument.
1536 @type molecules: None or list of str
1537 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
1538 @type atom_id: str or None
1539 @keyword displace_id: The atom ID string for restricting the displacement to a subset of all atoms. If not set, then all atoms will be translated and rotated. This can be a list of atom IDs with each element corresponding to one of the structures.
1540 @type displace_id: None, str, or list of str
1541 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'.
1542 @type method: str
1543 @keyword centre_type: The type of centre to superimpose over. This can either be the standard centroid superimposition or the CoM could be used instead.
1544 @type centre_type: str
1545 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions.
1546 @type centroid: list of float or numpy rank-1, 3D array
1547 """
1548
1549
1550 allowed = ['fit to mean', 'fit to first']
1551 if method not in allowed:
1552 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed))
1553
1554
1555 allowed = ['centroid', 'CoM']
1556 if centre_type not in allowed:
1557 raise RelaxError("The superimposition centre type '%s' is unknown. It must be one of %s." % (centre_type, allowed))
1558
1559
1560 if pipes == None:
1561 pipes = [cdp_name()]
1562
1563
1564 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1565
1566
1567 if len(coord[0]) == 0:
1568 raise RelaxError("No common atoms could be found between the structures.")
1569
1570
1571 if method == 'fit to mean':
1572 T, R, pivot = fit_to_mean(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid)
1573 elif method == 'fit to first':
1574 T, R, pivot = fit_to_first(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid)
1575
1576
1577 i = 0
1578 for pipe_index, model_num, mol_name in structure_loop(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id):
1579
1580 curr_displace_id = None
1581 if isinstance(displace_id, str):
1582 curr_displace_id = displace_id
1583 elif isinstance(displace_id, list):
1584 if len(displace_id) <= i:
1585 raise RelaxError("Not enough displacement ID strings have been provided.")
1586 curr_displace_id = displace_id[i]
1587
1588
1589 id = curr_displace_id
1590 if id == None or (mol_name and not search('#', id)):
1591 if curr_displace_id == None:
1592 id = '#%s' % mol_name
1593 elif search('#', curr_displace_id):
1594 id = curr_displace_id
1595 else:
1596 id = '#%s%s' % (mol_name, curr_displace_id)
1597
1598
1599 translate(T=T[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id)
1600
1601
1602 rotate(R=R[i], origin=pivot[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id)
1603
1604
1605 i += 1
1606
1607
1608 -def translate(T=None, model=None, atom_id=None, pipe_name=None):
1609 """Shift the structural data by the specified translation vector.
1610
1611 @keyword T: The translation vector.
1612 @type T: numpy rank-1, 3D array or list of float
1613 @keyword model: The model to translate. If None, all models will be rotated.
1614 @type model: int or None
1615 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1616 @type atom_id: str or None
1617 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe.
1618 @type pipe_name: None or str
1619 """
1620
1621
1622 if pipe_name == None:
1623 pipe_name = cdp_name()
1624
1625
1626 check_pipe(pipe_name)
1627 check_structure(pipe_name)
1628
1629
1630 dp = get_pipe(pipe_name)
1631
1632
1633 T = array(T, float64)
1634
1635
1636 selection = dp.structure.selection(atom_id=atom_id)
1637 dp.structure.translate(T=T, model=model, selection=selection)
1638
1639
1640 if model != None:
1641 print("Translated %i atoms of model %i." % (selection.count_atoms(), model))
1642 else:
1643 print("Translated %i atoms." % selection.count_atoms())
1644
1645
1646 -def vectors(spin_id1=None, spin_id2=None, model=None, verbosity=1, ave=True, unit=True):
1647 """Extract the bond vectors from the loaded structures and store them in the spin container.
1648
1649 @keyword spin_id1: The spin identifier string of the first spin of the pair.
1650 @type spin_id1: str
1651 @keyword spin_id2: The spin identifier string of the second spin of the pair.
1652 @type spin_id2: str
1653 @keyword model: The model to extract the vector from. If None, all vectors will be extracted.
1654 @type model: str
1655 @keyword verbosity: The higher the value, the more information is printed to screen.
1656 @type verbosity: int
1657 @keyword ave: A flag which if True will cause the average of all vectors to be extracted.
1658 @type ave: bool
1659 @keyword unit: A flag which if True will cause the function to calculate the unit vectors.
1660 @type unit: bool
1661 """
1662
1663
1664 check_pipe()
1665 check_structure()
1666 check_mol_res_spin_data()
1667
1668
1669 if verbosity:
1670
1671 num_models = cdp.structure.num_models()
1672
1673
1674 if num_models > 1:
1675 if model:
1676 print("Extracting vectors for model '%s'." % model)
1677 else:
1678 print("Extracting vectors for all %s models." % num_models)
1679 if ave:
1680 print("Averaging all vectors.")
1681
1682
1683 else:
1684 print("Extracting vectors from the single model.")
1685
1686
1687 if unit:
1688 print("Calculating the unit vectors.")
1689
1690
1691 no_vectors = True
1692 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True):
1693
1694 if not spin.select:
1695 continue
1696
1697
1698 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num)
1699
1700
1701 if spin.num == None and spin.name == None:
1702 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule."))
1703 continue
1704
1705
1706 if hasattr(spin, 'vector'):
1707 obj = getattr(spin, 'vector')
1708 if obj != None:
1709 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists."))
1710 continue
1711
1712
1713 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True)
1714 id2 = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name)
1715
1716
1717 if not bond_vectors:
1718
1719 if warnings:
1720 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ")."))
1721
1722
1723 continue
1724
1725
1726 if not hasattr(spin, 'attached_atom'):
1727 spin.attached_atom = attached_name
1728 elif spin.attached_atom != attached_name:
1729 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".")
1730
1731
1732 if ave:
1733 ave_vector = zeros(3, float64)
1734
1735
1736 for i in range(len(bond_vectors)):
1737
1738 if unit:
1739
1740 norm_factor = norm(bond_vectors[i])
1741
1742
1743 if norm_factor == 0.0:
1744 warn(RelaxZeroVectorWarning(spin_id1=id, spin_id2=id2))
1745
1746
1747 else:
1748 bond_vectors[i] = bond_vectors[i] / norm_factor
1749
1750
1751 if ave:
1752 ave_vector = ave_vector + bond_vectors[i]
1753
1754
1755 if ave:
1756 vector = ave_vector / float(len(bond_vectors))
1757 else:
1758 vector = bond_vectors
1759
1760
1761 if len(vector) == 1:
1762 vector = vector[0]
1763
1764
1765 setattr(spin, 'vector', vector)
1766
1767
1768 no_vectors = False
1769
1770
1771 if verbosity:
1772
1773 num = len(bond_vectors)
1774 plural = 's'
1775 if num == 1:
1776 plural = ''
1777
1778 if spin.name:
1779 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id))
1780 else:
1781 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id))
1782
1783
1784 if no_vectors:
1785 raise RelaxError("No vectors could be extracted.")
1786
1787
1788 -def web_of_motion(pipes=None, models=None, molecules=None, atom_id=None, file=None, dir=None, force=False):
1789 """Create a PDB representation of the motion between a set of models.
1790
1791 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object.
1792
1793
1794 @keyword pipes: The data pipes to generate the web between.
1795 @type pipes: None or list of str
1796 @keyword models: The list of models to generate the web between. The number of elements must match the pipes argument. If set to None, then all models will be used.
1797 @type models: None or list of lists of int
1798 @keyword molecules: The list of molecules to generate the web between. The number of elements must match the pipes argument.
1799 @type molecules: None or list of lists of str
1800 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1801 @type atom_id: str or None
1802 @keyword file: The name of the PDB file to write.
1803 @type file: str
1804 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory.
1805 @type dir: str or None
1806 @keyword force: The force flag which if True will cause the file to be overwritten.
1807 @type force: bool
1808 """
1809
1810
1811 check_pipe()
1812 check_structure()
1813
1814
1815 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1816
1817
1818 if not len(coord) > 1:
1819 raise RelaxError("Two or more structures are required.")
1820
1821
1822 web = Internal()
1823
1824
1825 for atom_index in range(len(atom_names)):
1826
1827 for struct_index in range(len(ids)):
1828
1829 web.add_atom(mol_name=mol_names[atom_index], atom_name=atom_names[atom_index], res_name=res_names[atom_index], res_num=res_nums[atom_index], pos=coord[struct_index, atom_index], element=elements[atom_index], sort=False)
1830
1831
1832 for k in range(len(ids)):
1833 for l in range(len(ids)):
1834
1835 if k == l:
1836 continue
1837
1838
1839 index1 = atom_index*len(ids) + k
1840 index2 = atom_index*len(ids) + l
1841
1842
1843 web.connect_atom(mol_name=mol_names[atom_index], index1=index1, index2=index2)
1844
1845
1846 if isinstance(file, str):
1847
1848 file = get_file_path(file, dir)
1849
1850
1851 if not search(".pdb$", file):
1852 file += '.pdb'
1853
1854
1855 file = open_write_file(file, force=force)
1856
1857
1858 web.write_pdb(file)
1859
1860
1861 -def write_pdb(file=None, dir=None, model_num=None, compress_type=0, force=False):
1862 """The PDB writing function.
1863
1864 @keyword file: The name of the PDB file to write. This can also be a file instance.
1865 @type file: str or file instance
1866 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory.
1867 @type dir: str or None
1868 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file.
1869 @type model_num: None or int
1870 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression.
1871 @type compress_type: int
1872 @keyword force: The force flag which if True will cause the file to be overwritten.
1873 @type force: bool
1874 """
1875
1876
1877 check_pipe()
1878
1879
1880 if not hasattr(cdp, 'structure'):
1881 raise RelaxError("No structural data is present in the current data pipe.")
1882
1883
1884 if isinstance(file, str):
1885
1886 file = get_file_path(file, dir)
1887
1888
1889 if not search(".pdb$", file):
1890 file = file + '.pdb'
1891
1892
1893 file = open_write_file(file, compress_type=compress_type, force=force)
1894
1895
1896 cdp.structure.write_pdb(file, model_num=model_num)
1897