1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 from minfx.generic import generic_minimise
27 from numpy import array, average, concatenate, dot, float64, mean, ones, std, zeros
28 from numpy.linalg import norm
29 from os import F_OK, access, getcwd
30 from re import search
31 import sys
32 from warnings import warn
33
34
35 from data_store import Relax_data_store; ds = Relax_data_store()
36 from data_store.seq_align import Sequence_alignments
37 from lib.check_types import is_float
38 from lib.errors import RelaxError, RelaxFileError
39 from lib.geometry.vectors import vector_angle_atan2
40 from lib.io import get_file_path, open_write_file, write_data
41 from lib.plotting.api import correlation_matrix, write_xy_data, write_xy_header
42 from lib.selection import tokenise
43 from lib.sequence import write_spin_data
44 from lib.sequence_alignment.msa import msa_general, msa_residue_numbers, msa_residue_skipping
45 from lib.structure.internal.coordinates import assemble_atomic_coordinates, assemble_coord_array, loop_coord_structures
46 from lib.structure.internal.displacements import Displacements
47 from lib.structure.internal.object import Internal
48 from lib.structure.pca import pca_analysis
49 from lib.structure.represent.diffusion_tensor import diffusion_tensor
50 from lib.structure.statistics import atomic_rmsd, per_atom_rmsd
51 from lib.structure.superimpose import fit_to_first, fit_to_mean
52 from lib.warnings import RelaxWarning, RelaxNoPDBFileWarning, RelaxZeroVectorWarning
53 from pipe_control import molmol, pipes
54 from pipe_control.interatomic import interatomic_loop
55 from pipe_control.mol_res_spin import check_mol_res_spin_data, create_spin, generate_spin_id_unique, linear_ave, return_spin, spin_loop
56 from pipe_control.pipes import cdp_name, check_pipe, get_pipe
57 from pipe_control.structure.checks import check_structure
58 from pipe_control.structure.mass import pipe_centre_of_mass
59 from status import Status; status = Status()
60 from target_functions.ens_pivot_finder import Pivot_finder
61
62
63 -def add_atom(mol_name=None, atom_name=None, res_name=None, res_num=None, pos=[None, None, None], element=None, atom_num=None, chain_id=None, segment_id=None, pdb_record=None):
64 """Add a new atom to the structural data object.
65
66 @keyword mol_name: The name of the molecule.
67 @type mol_name: str
68 @keyword atom_name: The atom name, e.g. 'H1'.
69 @type atom_name: str or None
70 @keyword res_name: The residue name.
71 @type res_name: str or None
72 @keyword res_num: The residue number.
73 @type res_num: int or None
74 @keyword pos: The position vector of coordinates. If a rank-2 array is supplied, the length of the first dimension must match the number of models.
75 @type pos: rank-1 or rank-2 array or list of float
76 @keyword element: The element symbol.
77 @type element: str or None
78 @keyword atom_num: The atom number.
79 @type atom_num: int or None
80 @keyword chain_id: The chain identifier.
81 @type chain_id: str or None
82 @keyword segment_id: The segment identifier.
83 @type segment_id: str or None
84 @keyword pdb_record: The optional PDB record name, e.g. 'ATOM' or 'HETATM'.
85 @type pdb_record: str or None
86 """
87
88
89 check_pipe()
90
91
92 if not hasattr(cdp, 'structure'):
93 cdp.structure = Internal()
94
95
96 cdp.structure.add_atom(mol_name=mol_name, atom_name=atom_name, res_name=res_name, res_num=res_num, pos=pos, element=element, atom_num=atom_num, chain_id=chain_id, segment_id=segment_id, pdb_record=pdb_record, sort=True)
97
98
99 -def add_helix(start=None, end=None, mol_name=None):
100 """Define alpha helical secondary structure for the structural data object.
101
102 @keyword start: The residue number for the start of the helix.
103 @type start: int
104 @keyword end: The residue number for the end of the helix.
105 @type end: int
106 @keyword mol_name: Define the secondary structure for a specific molecule.
107 @type mol_name: str or None
108 """
109
110
111 check_pipe()
112 check_structure()
113
114
115 cdp.structure.add_helix(res_start=start, res_end=end, mol_name=mol_name)
116
117
119 """Add a new model to the empty structural data object."""
120
121
122 check_pipe()
123
124
125 if not hasattr(cdp, 'structure'):
126 cdp.structure = Internal()
127
128
129 if cdp.structure.num_molecules() != 0:
130 raise RelaxError("The internal structural object is not empty.")
131
132
133 cdp.structure.structural_data.add_item(model_num=model_num)
134 print("Created the empty model number %s." % model_num)
135
136
137 -def add_sheet(strand=1, sheet_id='A', strand_count=2, strand_sense=0, start=None, end=None, mol_name=None, current_atom=None, prev_atom=None):
138 """Define beta sheet secondary structure for the structural data object.
139
140 @keyword strand: Strand number which starts at 1 for each strand within a sheet and increases by one.
141 @type strand: int
142 @keyword sheet_id: The sheet identifier. To match the PDB standard, sheet IDs should range from 'A' to 'Z'.
143 @type sheet_id: str
144 @keyword strand_count: The number of strands in the sheet.
145 @type strand_count: int
146 @keyword strand_sense: Sense of strand with respect to previous strand in the sheet. 0 if first strand, 1 if parallel, -1 if anti-parallel.
147 @type strand_sense: int
148 @keyword start: The residue number for the start of the sheet.
149 @type start: int
150 @keyword end: The residue number for the end of the sheet.
151 @type end: int
152 @keyword mol_name: Define the secondary structure for a specific molecule.
153 @type mol_name: str or None
154 @keyword current_atom: The name of the first atom in the current strand, to link the current back to the previous strand.
155 @type current_atom: str or None
156 @keyword prev_atom: The name of the last atom in the previous strand, to link the current back to the previous strand.
157 @type prev_atom: str or None
158 """
159
160
161 check_pipe()
162 check_structure()
163
164
165 cdp.structure.add_sheet(strand=strand, sheet_id=sheet_id, strand_count=strand_count, strand_sense=strand_sense, res_start=start, res_end=end, mol_name=mol_name, current_atom=current_atom, prev_atom=prev_atom)
166
167
169 """Assemble the common atomic coordinates taking sequence alignments into account.
170
171 @keyword pipes: The data pipes to assemble the coordinates from.
172 @type pipes: None or list of str
173 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
174 @type models: None or list of lists of int
175 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
176 @type molecules: None or list of lists of str
177 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
178 @type atom_id: str or None
179 @keyword lists: A flag which if true will cause the object ID list per molecule, the model number list per molecule, and the molecule name list per molecule to also be returned.
180 @type lists: bool
181 @return: The array of atomic coordinates (first dimension is the model and/or molecule, the second are the atoms, and the third are the coordinates); a list of unique IDs for each structural object, model, and molecule; the common list of molecule names; the common list of residue names; the common list of residue numbers; the common list of atom names; the common list of element names.
182 @rtype: numpy rank-3 float64 array, list of str, list of str, list of str, list of int, list of str, list of str
183 """
184
185
186 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules)
187
188
189 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models, atom_id=atom_id)
190
191
192 if mol_names == []:
193 if atom_id != None:
194 raise RelaxError("No structural data matching the atom ID string '%s' can be found." % atom_id)
195 else:
196 raise RelaxError("No structural data can be found.")
197
198
199 same_mol = True
200 for mol in molecule_list:
201 if mol != molecule_list[0]:
202 same_mol = False
203
204
205 align = None
206 if hasattr(ds, 'sequence_alignments'):
207 align = ds.sequence_alignments.find_alignment(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes)
208 if align != None:
209
210 print("\nSequence alignment found - common atoms will be determined based on this MSA:")
211 for i in range(len(align.object_ids)):
212 print(align.strings[i])
213
214
215 strings = align.strings
216 gaps = align.gaps
217
218
219 elif len(objects) == 1 and same_mol:
220
221 print("\nSequence alignment disabled as only models with identical molecule, residue and atomic sequences are being superimposed.")
222
223
224 strings = one_letter_codes
225
226
227 gaps = []
228 for mol_index in range(num_mols):
229 gaps.append([])
230 for i in range(len(one_letter_codes[mol_index])):
231 gaps[mol_index].append(0)
232
233
234 else:
235
236 print("\nSequence alignment cannot be found - falling back to a residue number based alignment.")
237
238
239 res_num_list = []
240 for mol_index in range(num_mols):
241 res_num_list.append([])
242 for i in range(len(one_letter_codes[mol_index])):
243 key = list(res_nums[mol_index][i].keys())[0]
244 res_num_list[mol_index].append(res_nums[mol_index][i][key])
245
246
247 strings, gaps = msa_residue_numbers(one_letter_codes, residue_numbers=res_num_list)
248
249
250 skip = msa_residue_skipping(strings=strings, gaps=gaps)
251
252
253 coord, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common = assemble_coord_array(atom_pos=atom_pos, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, sequences=one_letter_codes, skip=skip)
254 if lists:
255 return coord, ids, object_id_list, model_list, molecule_list, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common
256 else:
257 return coord, ids, mol_name_common, res_name_common, res_num_common, atom_name_common, element_common
258
259
261 """Assemble the atomic coordinates.
262
263 @keyword pipes: The data pipes to assemble the coordinates from.
264 @type pipes: None or list of str
265 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
266 @type models: None or list of lists of int
267 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
268 @type molecules: None or list of lists of str
269 @return: The structural objects, structural object names, and data pipes list.
270 @rtype: list of lib.structure.internal.object.Internal instances, list of str, list of str
271 """
272
273
274 if pipes == None:
275 pipes = [cdp_name()]
276 num_pipes = len(pipes)
277
278
279 for pipe in pipes:
280 check_pipe(pipe)
281
282
283 if models != None:
284 if len(models) != num_pipes:
285 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes))
286 if molecules != None:
287 if len(molecules) != num_pipes:
288 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes))
289
290
291 objects = []
292 object_names = []
293 for pipe_index in range(len(pipes)):
294 dp = get_pipe(pipes[pipe_index])
295 objects.append(dp.structure)
296 object_names.append(pipes[pipe_index])
297
298
299 return objects, object_names, pipes
300
301
302 -def atomic_fluctuations(pipes=None, models=None, molecules=None, atom_id=None, measure='distance', file=None, format='text', dir=None, force=False):
303 """Write out a correlation matrix of pairwise interatomic distance fluctuations between different structures.
304
305 @keyword pipes: The data pipes to generate the interatomic distance fluctuation correlation matrix for.
306 @type pipes: None or list of str
307 @keyword models: The list of models to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument. If set to None, then all models will be used.
308 @type models: None or list of lists of int
309 @keyword molecules: The list of molecules to generate the interatomic distance fluctuation correlation matrix for. The number of elements must match the pipes argument.
310 @type molecules: None or list of lists of str
311 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
312 @type atom_id: str or None
313 @keyword measure: The type of fluctuation to measure. This can be either 'distance' or 'angle'.
314 @type measure: str
315 @keyword file: The name of the file to write.
316 @type file: str
317 @keyword format: The output format. This can be set to "text" for text file output, or "gnuplot" for creating a gnuplot script.
318 @type format: str
319 @keyword dir: The directory where the file will be placed. If set to None, then the file will be placed in the current directory.
320 @type dir: str or None
321 @keyword force: The force flag which if True will cause the file to be overwritten.
322 @type force: bool
323 """
324
325
326 check_pipe()
327 check_structure()
328 allowed_measures = ['distance', 'angle', 'parallax shift']
329 if measure not in allowed_measures:
330 raise RelaxError("The measure '%s' must be one of %s." % (measure, allowed_measures))
331
332
333 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
334
335
336 n = len(atom_names)
337 m = len(coord)
338
339
340 if not m > 1:
341 raise RelaxError("Two or more structures are required.")
342
343
344 labels = []
345 for i in range(n):
346 labels.append(generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_name=atom_names[i]))
347
348
349 matrix = zeros((n, n), float64)
350 dist = zeros(m, float64)
351 vectors = zeros((m, 3), float64)
352 parallax_vectors = zeros((m, 3), float64)
353 angles = zeros(m, float64)
354
355
356 if measure == 'distance':
357 for i in range(n):
358 for j in range(n):
359
360 if j > i:
361 continue
362
363
364 for k in range(m):
365 dist[k] = norm(coord[k, i] - coord[k, j])
366
367
368 matrix[i, j] = matrix[j, i] = std(dist, ddof=1)
369
370
371 elif measure == 'angle':
372
373 for i in range(n):
374 for j in range(n):
375
376 if j > i:
377 continue
378
379
380 for k in range(m):
381 vectors[k] = coord[k, i] - coord[k, j]
382
383
384 ave_vect = average(vectors, axis=0)
385
386
387 for k in range(m):
388 angles[k] = vector_angle_atan2(ave_vect, vectors[k])
389
390
391 matrix[i, j] = matrix[j, i] = std(angles, ddof=1)
392
393
394 elif measure == 'parallax shift':
395
396 for i in range(n):
397 for j in range(n):
398
399 if j > i:
400 continue
401
402
403 for k in range(m):
404 vectors[k] = coord[k, i] - coord[k, j]
405
406
407 ave_vect = average(vectors, axis=0)
408
409
410 length = norm(ave_vect)
411 if length == 0.0:
412 matrix[i, j] = matrix[j, i] = 0.0
413 continue
414
415
416 unit = ave_vect / length
417
418
419 for k in range(m):
420
421 proj = dot(vectors[k], unit) * unit
422
423
424 dist[k] = norm(vectors[k] - proj)
425
426
427 matrix[i, j] = matrix[j, i] = std(dist, ddof=1)
428
429
430 correlation_matrix(format=format, matrix=matrix, labels=labels, file=file, dir=dir, force=force)
431
432
433 -def average_structure(pipes=None, models=None, molecules=None, atom_id=None, set_mol_name=None, set_model_num=None):
434 """Calculate a mean structure.
435
436 @keyword pipes: The data pipes containing structures to average.
437 @type pipes: None or list of str
438 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
439 @type models: None or list of lists of int
440 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
441 @type molecules: None or list of lists of str
442 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
443 @type atom_id: str or None
444 @keyword set_mol_name: The molecule name for the averaged molecule.
445 @type set_mol_name: None or str
446 @keyword set_model_num: The model number for the averaged molecule.
447 @type set_model_num: None or int
448 """
449
450
451 check_pipe()
452
453
454 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
455
456
457 struct = mean(coord, axis=0)
458
459
460 if not hasattr(cdp, 'structure'):
461 cdp.structure = Internal()
462
463
464 cdp.structure.add_coordinates(coord=struct, mol_names=mol_names, res_names=res_names, res_nums=res_nums, atom_names=atom_names, elements=elements, set_mol_name=set_mol_name, set_model_num=set_model_num)
465
466
468 """Connect two atoms.
469
470 @keyword index1: The global index of the first atom.
471 @type index1: str
472 @keyword index2: The global index of the first atom.
473 @type index2: str
474 """
475
476
477 check_pipe()
478
479
480 if not hasattr(cdp, 'structure'):
481 cdp.structure = Internal()
482
483
484 cdp.structure.connect_atom(index1=index1, index2=index2)
485
486
504
505
507 """Create the PDB representation of the diffusion tensor.
508
509 @keyword scale: The scaling factor for the diffusion tensor.
510 @type scale: float
511 @keyword file: The name of the PDB file to create.
512 @type file: str
513 @keyword dir: The name of the directory to place the PDB file into.
514 @type dir: str
515 @keyword force: Flag which if set to True will overwrite any pre-existing file.
516 @type force: bool
517 """
518
519
520 check_pipe()
521
522
523 if hasattr(cdp, 'structure') and not cdp.structure.empty():
524 com = pipe_centre_of_mass()
525 else:
526 com = zeros(3, float64)
527
528
529 structure = Internal()
530
531
532 if cdp.pipe_type == 'hybrid':
533 pipe_list = cdp.hybrid_pipes
534 else:
535 pipe_list = [pipes.cdp_name()]
536
537
538 if cdp.pipe_type == 'hybrid':
539 mol_names = []
540 for pipe in pipe_list:
541 mol_names.append('diff_tensor_' % pipe)
542 else:
543 mol_names = ['diff_tensor']
544
545
546 for pipe_index in range(len(pipe_list)):
547
548 pipe = pipes.get_pipe(pipe_list[pipe_index])
549
550
551 if not hasattr(pipe, 'diff_tensor'):
552 raise RelaxNoTensorError('diffusion')
553
554
555 structure.add_molecule(name=mol_names[pipe_index])
556
557
558 mol = structure.get_molecule(mol_names[pipe_index])
559
560
561 diff_type = pipe.diff_tensor.type
562 if diff_type == 'spheroid':
563 diff_type = pipe.diff_tensor.spheroid_type
564
565
566 sim_num = None
567 if hasattr(pipe.diff_tensor, 'tm_sim'):
568
569 sim_num = len(pipe.diff_tensor.tm_sim)
570
571
572 axes = []
573 sim_axes = []
574 if diff_type in ['oblate', 'prolate']:
575 axes.append(pipe.diff_tensor.Dpar * pipe.diff_tensor.Dpar_unit)
576 sim_axes.append([])
577 if sim_num != None:
578 for i in range(sim_num):
579 sim_axes[0].append(pipe.diff_tensor.Dpar_sim[i] * pipe.diff_tensor.Dpar_unit_sim[i])
580
581 if diff_type == 'ellipsoid':
582 axes.append(pipe.diff_tensor.Dx * pipe.diff_tensor.Dx_unit)
583 axes.append(pipe.diff_tensor.Dy * pipe.diff_tensor.Dy_unit)
584 axes.append(pipe.diff_tensor.Dz * pipe.diff_tensor.Dz_unit)
585 sim_axes.append([])
586 sim_axes.append([])
587 sim_axes.append([])
588 if sim_num != None:
589 for i in range(sim_num):
590 sim_axes[0].append(pipe.diff_tensor.Dx_sim[i] * pipe.diff_tensor.Dx_unit_sim[i])
591 sim_axes[1].append(pipe.diff_tensor.Dy_sim[i] * pipe.diff_tensor.Dy_unit_sim[i])
592 sim_axes[2].append(pipe.diff_tensor.Dz_sim[i] * pipe.diff_tensor.Dz_unit_sim[i])
593
594
595 diffusion_tensor(mol=mol, tensor=pipe.diff_tensor.tensor, tensor_diag=pipe.diff_tensor.tensor_diag, diff_type=diff_type, rotation=pipe.diff_tensor.rotation, axes=axes, sim_axes=sim_axes, com=com, scale=scale)
596
597
598
599
600
601
602 print("\nGenerating the PDB file.")
603
604
605 tensor_pdb_file = open_write_file(file, dir, force=force)
606 structure.write_pdb(tensor_pdb_file)
607 tensor_pdb_file.close()
608
609
610 if not hasattr(cdp, 'result_files'):
611 cdp.result_files = []
612 if dir == None:
613 dir = getcwd()
614 cdp.result_files.append(['diff_tensor_pdb', 'Diffusion tensor PDB', get_file_path(file, dir)])
615 status.observers.result_file.notify()
616
617
618 -def delete(atom_id=None, model=None, verbosity=1, spin_info=True):
619 """Delete structural data.
620
621 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format. If not given, then all structural data will be deleted.
622 @type atom_id: str or None
623 @keyword model: Individual structural models from a loaded ensemble can be deleted by specifying the model number.
624 @type model: None or int
625 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
626 @type verbosity: int
627 @keyword spin_info: A flag which if True will cause all structural information in the spin containers and interatomic data containers to be deleted as well. If False, then only the 3D structural data will be deleted.
628 @type spin_info: bool
629 """
630
631
632 check_pipe()
633
634
635 if hasattr(cdp, 'structure'):
636 if verbosity:
637 print("Deleting structural data from the current pipe.")
638 selection = None
639 if atom_id != None:
640 selection = cdp.structure.selection(atom_id=atom_id)
641 cdp.structure.delete(model=model, selection=selection, verbosity=verbosity)
642 elif verbosity:
643 print("No structures are present.")
644
645
646 if not spin_info:
647 return
648
649
650 if verbosity:
651 print("Deleting all spin specific structural info.")
652 for spin in spin_loop(selection=atom_id):
653
654 if hasattr(spin, 'pos'):
655 del spin.pos
656
657
658 if verbosity:
659 print("Deleting all interatomic vectors.")
660 for interatom in interatomic_loop(selection1=atom_id):
661
662 if hasattr(interatom, 'vector'):
663 del interatom.vector
664
665
667 """Delete all secondary structure information.
668
669 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
670 @type verbosity: int
671 """
672
673
674 check_pipe()
675
676
677 if hasattr(cdp, 'structure'):
678 if verbosity:
679 print("Deleting secondary structure information from the current pipe.")
680 cdp.structure.delete_ss(verbosity=verbosity)
681 elif verbosity:
682 print("No structures are present.")
683
684
685
686 -def displacement(pipes=None, models=None, molecules=None, atom_id=None, centroid=None):
687 """Calculate the rotational and translational displacement between structures or models.
688
689 All results will be placed into the current data pipe cdp.structure.displacements data structure.
690
691
692 @keyword pipes: The data pipes to determine the displacements for.
693 @type pipes: None or list of str
694 @keyword models: The list of models to determine the displacements for. The number of elements must match the pipes argument. If set to None, then all models will be used.
695 @type models: None or list of lists of int
696 @keyword molecules: The list of molecules to determine the displacements for. The number of elements must match the pipes argument.
697 @type molecules: None or list of lists of str
698 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
699 @type atom_id: str or None
700 @keyword centroid: An alternative position of the centroid, used for studying pivoted systems.
701 @type centroid: list of float or numpy rank-1, 3D array
702 """
703
704
705 check_pipe()
706
707
708 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
709
710
711 if not hasattr(cdp.structure, 'displacments'):
712 cdp.structure.displacements = Displacements()
713
714
715 for i in range(len(ids)):
716 for j in range(len(ids)):
717 cdp.structure.displacements._calculate(id_from=ids[i], id_to=ids[j], coord_from=coord[i], coord_to=coord[j], centroid=centroid)
718
719
720 -def find_pivot(pipes=None, models=None, molecules=None, atom_id=None, init_pos=None, func_tol=1e-5, box_limit=200):
721 """Find the pivoted motion of a set of structural models or structures.
722
723 The pivot will be placed into the current data pipe cdp.structure.pivot data structure.
724
725
726 @keyword pipes: The data pipes to use in the motional pivot algorithm.
727 @type pipes: None or list of str
728 @keyword models: The list of models to use. The number of elements must match the pipes argument. If set to None, then all models will be used.
729 @type models: None or list of lists of int
730 @keyword molecules: The list of molecules to find the pivoted motion for. The number of elements must match the pipes argument.
731 @type molecules: None or list of lists of str
732 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
733 @type atom_id: str or None
734 @keyword init_pos: The starting pivot position for the pivot point optimisation.
735 @type init_pos: list of float or numpy rank-1, 3D array
736 @keyword func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check.
737 @type func_tol: None or float
738 @keyword box_limit: The simplex optimisation used in this function is constrained withing a box of +/- x Angstrom containing the pivot point using the logarithmic barrier function. This argument is the value of x.
739 @type box_limit: int
740 """
741
742
743 check_pipe()
744
745
746 if init_pos == None:
747 init_pos = zeros(3, float64)
748 init_pos = array(init_pos)
749
750
751 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
752
753
754 A = zeros((6, 3), float64)
755 b = zeros(6, float64)
756 for i in range(3):
757 A[2*i, i] = 1
758 A[2*i+1, i] = -1
759 b[2*i] = -box_limit
760 b[2*i+1] = -box_limit
761
762
763 finder = Pivot_finder(list(range(len(coord))), coord)
764 results = generic_minimise(func=finder.func, x0=init_pos, min_algor='Log barrier', min_options=('simplex',), A=A, b=b, func_tol=func_tol, print_flag=1)
765
766
767 if results is None:
768 return
769
770
771 cdp.structure.pivot = results
772
773
774 print("Motional pivot found at: %s" % results)
775
776
777 -def get_pos(spin_id=None, str_id=None, ave_pos=False):
778 """Load the spins from the structural object into the relax data store.
779
780 @keyword spin_id: The molecule, residue, and spin identifier string.
781 @type spin_id: str
782 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
783 @type str_id: int or str
784 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
785 @type ave_pos: bool
786 """
787
788
789 check_pipe()
790 check_structure()
791
792
793 selection = cdp.structure.selection(atom_id=spin_id)
794
795
796 data = []
797 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
798
799 if mol_name and search(r'\+', mol_name):
800 mol_name = mol_name.replace('+', '')
801 if res_name and search(r'\+', res_name):
802 res_name = res_name.replace('+', '')
803 if atom_name and search(r'\+', atom_name):
804 atom_name = atom_name.replace('+', '')
805
806
807 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_num=atom_num, spin_name=atom_name)
808
809
810 spin_cont = return_spin(spin_id=id)
811
812
813 if spin_cont == None:
814 continue
815
816
817 spin_cont.pos = pos
818
819
820 data.append([id, repr(pos)])
821
822
823 if not len(data):
824 raise RelaxError("No positional information matching the spin ID '%s' could be found." % spin_id)
825
826
827 for spin in spin_loop():
828 if hasattr(spin, 'members'):
829
830 positions = []
831 for atom in spin.members:
832
833 subspin = return_spin(spin_id=atom)
834
835
836 if subspin == None:
837 raise RelaxNoSpinError(atom)
838
839
840 if not hasattr(subspin, 'pos') or subspin.pos is None or not len(subspin.pos):
841 raise RelaxError("Positional information is not available for the atom '%s'." % atom)
842
843
844 pos = subspin.pos
845
846
847 multi_model = True
848 if type(pos[0]) in [float, float64]:
849 multi_model = False
850 pos = [pos]
851
852
853 positions.append([])
854 for i in range(len(pos)):
855 positions[-1].append(pos[i].tolist())
856
857
858 if spin.averaging == 'linear':
859
860 ave = linear_ave(positions)
861
862
863 if multi_model:
864 spin.pos = ave
865 else:
866 spin.pos = ave[0]
867
868
869 write_data(out=sys.stdout, headings=["Spin_ID", "Position"], data=data)
870
871
872 -def load_spins(spin_id=None, str_id=None, from_mols=None, mol_name_target=None, ave_pos=False, spin_num=True):
873 """Load the spins from the structural object into the relax data store.
874
875 @keyword spin_id: The molecule, residue, and spin identifier string.
876 @type spin_id: str
877 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
878 @type str_id: int or str
879 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from.
880 @type from_mols: list of str or None
881 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures
882 @type mol_name_target: str or None
883 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
884 @type ave_pos: bool
885 @keyword spin_num: A flag specifying if the spin number should be loaded.
886 @type spin_num: bool
887 """
888
889
890 if from_mols != None:
891 load_spins_multi_mol(spin_id=spin_id, str_id=str_id, from_mols=from_mols, mol_name_target=mol_name_target, ave_pos=ave_pos, spin_num=spin_num)
892 return
893
894
895 check_pipe()
896 check_structure()
897
898
899 print("Adding the following spins to the relax data store.\n")
900
901
902 mol_names = []
903 res_nums = []
904 res_names = []
905 spin_nums = []
906 spin_names = []
907
908
909 selection = cdp.structure.selection(atom_id=spin_id)
910 for mol_name, res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, mol_name_flag=True, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
911
912 if mol_name_target:
913 mol_name = mol_name_target
914
915
916 if not spin_num:
917 atom_num = None
918
919
920 if mol_name and search(r'\+', mol_name):
921 mol_name = mol_name.replace('+', '')
922 if res_name and search(r'\+', res_name):
923 res_name = res_name.replace('+', '')
924 if atom_name and search(r'\+', atom_name):
925 atom_name = atom_name.replace('+', '')
926
927
928 id = generate_spin_id_unique(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name)
929
930
931 try:
932 spin_cont = create_spin(mol_name=mol_name, res_num=res_num, res_name=res_name, spin_num=atom_num, spin_name=atom_name)[0]
933
934
935 except RelaxError:
936 spin_cont = return_spin(spin_id=id)
937
938
939 if mol_name_target:
940 mol_names.append(mol_name_target)
941 else:
942 mol_names.append(mol_name)
943 res_nums.append(res_num)
944 res_names.append(res_name)
945 spin_nums.append(atom_num)
946 spin_names.append(atom_name)
947
948
949 if hasattr(spin_cont, 'pos') and spin_cont.pos is not None and (spin_cont.pos.shape != pos.shape or (spin_cont.pos != pos).any()):
950 warn(RelaxWarning("Positional information already exists for the spin %s, appending the new positions." % id))
951 spin_cont.pos = concatenate((spin_cont.pos, pos))
952 else:
953 spin_cont.pos = pos
954
955
956 spin_cont.element = element
957
958
959 if len(mol_names) == 0:
960 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id))
961 return
962
963
964 write_spin_data(file=sys.stdout, mol_names=mol_names, res_nums=res_nums, res_names=res_names, spin_nums=spin_nums, spin_names=spin_names)
965
966
967 cdp.N = cdp.structure.num_models()
968
969
970 -def load_spins_multi_mol(spin_id=None, str_id=None, from_mols=None, mol_name_target=None, ave_pos=False, spin_num=True):
971 """Load the spins from the structural object into the relax data store.
972
973 @keyword spin_id: The molecule, residue, and spin identifier string.
974 @type spin_id: str
975 @keyword str_id: The structure identifier. This can be the file name, model number, or structure number.
976 @type str_id: int or str
977 @keyword from_mols: The list of similar, but not necessarily identical molecules to load spin information from.
978 @type from_mols: list of str or None
979 @keyword mol_name_target: The name of target molecule container, overriding the name of the loaded structures
980 @type mol_name_target: str or None
981 @keyword ave_pos: A flag specifying if the average atom position or the atom position from all loaded structures is loaded into the SpinContainer.
982 @type ave_pos: bool
983 @keyword spin_num: A flag specifying if the spin number should be loaded.
984 @type spin_num: bool
985 """
986
987
988 check_pipe()
989 check_structure()
990
991
992 if mol_name_target == None:
993 raise RelaxError("The target molecule name must be supplied when specifying multiple molecules to load spins from.")
994
995
996 if cdp.structure.num_models() != 1:
997 raise RelaxError("Only a single structural model is allowed when specifying multiple molecules to load spins from.")
998
999
1000 if spin_id:
1001 mol_token, res_token, spin_token = tokenise(spin_id)
1002 if mol_token != None:
1003 raise RelaxError("The spin ID string cannot contain molecular information when specifying multiple molecules to load spins from.")
1004
1005
1006 print("Adding the following spins to the relax data store.\n")
1007
1008
1009 ids = []
1010 res_nums = {}
1011 res_names = {}
1012 spin_names = {}
1013 positions = {}
1014 elements = {}
1015
1016
1017 for mol_name in from_mols:
1018
1019 positions[mol_name] = {}
1020
1021
1022 new_id = '#' + mol_name
1023 if spin_id != None:
1024 new_id += spin_id
1025
1026
1027 selection = cdp.structure.selection(atom_id=new_id)
1028 for res_num, res_name, atom_num, atom_name, element, pos in cdp.structure.atom_loop(selection=selection, str_id=str_id, res_num_flag=True, res_name_flag=True, atom_num_flag=True, atom_name_flag=True, element_flag=True, pos_flag=True, ave=ave_pos):
1029
1030 if res_name and search(r'\+', res_name):
1031 res_name = res_name.replace('+', '')
1032 if atom_name and search(r'\+', atom_name):
1033 atom_name = atom_name.replace('+', '')
1034
1035
1036 if not spin_num:
1037 atom_num = None
1038
1039
1040 id = generate_spin_id_unique(mol_name=mol_name_target, res_num=res_num, res_name=res_name, spin_name=atom_name)
1041
1042
1043 if is_float(pos[0]):
1044 positions[mol_name][id] = pos
1045 else:
1046 positions[mol_name][id] = pos[0]
1047
1048
1049 if id in ids:
1050 continue
1051
1052
1053 ids.append(id)
1054 res_nums[id] = res_num
1055 res_names[id] = res_name
1056 spin_names[id] = atom_name
1057 elements[id] = element
1058
1059
1060 if len(ids) == 0:
1061 warn(RelaxWarning("No spins matching the '%s' ID string could be found." % spin_id))
1062 return
1063
1064
1065 mol_names2 = []
1066 res_nums2 = []
1067 res_names2 = []
1068 spin_names2 = []
1069 for id in ids:
1070
1071 spin_cont = return_spin(spin_id=id)
1072
1073
1074 if spin_cont == None:
1075 spin_cont = create_spin(mol_name=mol_name_target, res_num=res_nums[id], res_name=res_names[id], spin_name=spin_names[id])[0]
1076
1077
1078 spin_cont.pos = []
1079 for mol_name in from_mols:
1080 if id in positions[mol_name]:
1081 spin_cont.pos.append(positions[mol_name][id])
1082 else:
1083 spin_cont.pos.append(None)
1084
1085
1086 spin_cont.element = elements[id]
1087
1088
1089 mol_names2.append(mol_name_target)
1090 res_nums2.append(res_nums[id])
1091 res_names2.append(res_names[id])
1092 spin_names2.append(spin_names[id])
1093
1094
1095 write_spin_data(file=sys.stdout, mol_names=mol_names2, res_nums=res_nums2, res_names=res_names2, spin_names=spin_names2)
1096
1097
1098 cdp.N = len(from_mols)
1099
1100
1101 -def pca(pipes=None, models=None, molecules=None, obs_pipes=None, obs_models=None, obs_molecules=None, atom_id=None, algorithm=None, num_modes=4, format='grace', dir=None):
1102 """PC analysis of the motions between all the loaded models.
1103
1104 @keyword pipes: The data pipes to perform the PC analysis on.
1105 @type pipes: None or list of str
1106 @keyword models: The list of models to perform the PC analysis on. The number of elements must match the pipes argument. If set to None, then all models will be used.
1107 @type models: None or list of lists of int
1108 @keyword molecules: The list of molecules to perform the PC analysis on. The number of elements must match the pipes argument.
1109 @type molecules: None or list of lists of str
1110 @keyword obs_pipes: The data pipes in the PC analysis which will have zero weight. These structures are for comparison.
1111 @type obs_pipes: None or list of str
1112 @keyword obs_models: The list of models in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument. If set to None, then all models will be used.
1113 @type obs_models: None or list of lists of int
1114 @keyword obs_molecules: The list of molecules in the PC analysis which will have zero weight. These structures are for comparison. The number of elements must match the pipes argument.
1115 @type obs_molecules: None or list of lists of str
1116 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1117 @type atom_id: str or None
1118 @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd').
1119 @type algorithm: str
1120 @keyword num_modes: The number of PCA modes to calculate.
1121 @type num_modes: int
1122 @keyword format: The graph format to use.
1123 @type format: str
1124 @keyword dir: The optional directory to place the graphs into.
1125 @type dir: str
1126 """
1127
1128
1129 check_pipe()
1130
1131
1132 coord, ids, object_id_list, model_list, molecule_list, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id, lists=True)
1133
1134
1135 M = len(coord)
1136 if obs_pipes == None:
1137 obs_pipes = []
1138 weights = ones(M, float64)
1139 for struct in range(M):
1140
1141 for i in range(len(obs_pipes)):
1142
1143 if object_id_list[struct] == obs_pipes[i]:
1144
1145 if obs_molecules == None or molecule_list[struct] == obs_molecules[i]:
1146
1147 if obs_models == None or model_list[struct] == obs_models[i]:
1148 weights[struct] = 0.0
1149
1150
1151 print("\n\nStarting the PCA analysis.\n")
1152 values, vectors, proj = pca_analysis(coord=coord, weights=weights, algorithm=algorithm, num_modes=num_modes)
1153
1154
1155 cdp.structure.pca_values = values
1156 cdp.structure.pca_vectors = vectors
1157 cdp.structure.pca_proj = proj
1158
1159
1160 for mode in range(num_modes - 1):
1161
1162 data = [[[]]]
1163 current = None
1164 labels = []
1165 for struct in range(M):
1166
1167 id = "%s - %s" % (object_id_list[struct], molecule_list[struct])
1168 if current == None:
1169 current = id
1170 labels.append(current)
1171
1172
1173 if current != id:
1174 data[-1].append([])
1175 current = id
1176 labels.append(current)
1177
1178
1179 data[-1][-1].append([proj[mode, struct], proj[mode+1, struct]])
1180
1181
1182 sets = len(labels)
1183
1184
1185 file = open_write_file("graph_pc%s_pc%s.agr" % (mode+1, mode+2), dir=dir, force=True)
1186
1187
1188 write_xy_header(format=format, file=file, title="Principle component projections", sets=[sets], set_names=[labels], axis_labels=[[r'PC mode %i (\cE\C)' % (mode+1), r'PC mode %i (\cE\C)' % (mode+2)]], linestyle=[[0]*sets])
1189
1190
1191 write_xy_data(format=format, data=data, file=file, graph_type='xy')
1192
1193
1194 file.close()
1195
1196
1197 -def read_gaussian(file=None, dir=None, set_mol_name=None, set_model_num=None, verbosity=1, fail=True):
1198 """Read structures from a Gaussian log file.
1199
1200
1201 @keyword file: The name of the Gaussian log file to read.
1202 @type file: str
1203 @keyword dir: The directory where the Gaussian log file is located. If set to None, then the file will be searched for in the current directory.
1204 @type dir: str or None
1205 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information.
1206 @type set_mol_name: None, str, or list of str
1207 @keyword set_model_num: Set the model number of the loaded molecule.
1208 @type set_model_num: None, int, or list of int
1209 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the Gaussian log file does not exist. If False, then a RelaxWarning will be trown instead.
1210 @type fail: bool
1211 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
1212 @type verbosity: int
1213 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the Gaussian log file does not exist.
1214 """
1215
1216
1217 check_pipe()
1218
1219
1220 file_path = get_file_path(file, dir)
1221
1222
1223 if not access(file_path, F_OK):
1224 file_path_orig = file_path
1225 file_path = file_path + '.log'
1226
1227
1228 if not access(file_path, F_OK):
1229 if fail:
1230 raise RelaxFileError('Gaussian log', file_path_orig)
1231 else:
1232 warn(RelaxNoPDBFileWarning(file_path))
1233 return
1234
1235
1236 if not hasattr(cdp, 'structure'):
1237 cdp.structure = Internal()
1238
1239
1240 cdp.structure.load_gaussian(file_path, set_mol_name=set_mol_name, set_model_num=set_model_num, verbosity=verbosity)
1241
1242
1243 -def read_pdb(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, alt_loc=None, verbosity=1, merge=False, fail=True):
1244 """The PDB loading function.
1245
1246 @keyword file: The name of the PDB file to read.
1247 @type file: str
1248 @keyword dir: The directory where the PDB file is located. If set to None, then the file will be searched for in the current directory.
1249 @type dir: str or None
1250 @keyword read_mol: The molecule(s) to read from the file, independent of model. The molecules are numbered consecutively from 1. If set to None, then all molecules will be loaded.
1251 @type read_mol: None, int, or list of int
1252 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then the molecules will be automatically labelled based on the file name or other information.
1253 @type set_mol_name: None, str, or list of str
1254 @keyword read_model: The PDB model to extract from the file. If set to None, then all models will be loaded.
1255 @type read_model: None, int, or list of int
1256 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the PDB model numbers will be preserved, if they exist.
1257 @type set_model_num: None, int, or list of int
1258 @keyword alt_loc: The PDB ATOM record 'Alternate location indicator' field value to select which coordinates to use.
1259 @type alt_loc: str or None
1260 @keyword verbosity: The amount of information to print to screen. Zero corresponds to minimal output while higher values increase the amount of output. The default value is 1.
1261 @type verbosity: int
1262 @keyword merge: A flag which if set to True will try to merge the PDB structure into the currently loaded structures.
1263 @type merge: bool
1264 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the PDB file does not exist. If False, then a RelaxWarning will be trown instead.
1265 @type fail: bool
1266 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the PDB file does not exist.
1267 """
1268
1269
1270 check_pipe()
1271
1272
1273 file_path = get_file_path(file, dir)
1274
1275
1276 file_path_orig = file_path
1277 if not access(file_path, F_OK):
1278
1279 for ext in ['.pdb', '.gz', '.pdb.gz', '.bz2', '.pdb.bz2']:
1280
1281 if access(file_path+ext, F_OK):
1282 file_path = file_path + ext
1283
1284
1285 if not access(file_path, F_OK):
1286 if fail:
1287 raise RelaxFileError('PDB', file_path_orig)
1288 else:
1289 warn(RelaxNoPDBFileWarning(file_path))
1290 return
1291
1292
1293 if not hasattr(cdp, 'structure'):
1294 cdp.structure = Internal()
1295
1296
1297 cdp.structure.load_pdb(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, alt_loc=alt_loc, verbosity=verbosity, merge=merge)
1298
1299
1300 molmol.molmol_obj.open_pdb()
1301
1302
1303 -def read_xyz(file=None, dir=None, read_mol=None, set_mol_name=None, read_model=None, set_model_num=None, verbosity=1, fail=True):
1304 """The XYZ loading function.
1305
1306
1307 @keyword file: The name of the XYZ file to read.
1308 @type file: str
1309 @keyword dir: The directory where the XYZ file is located. If set to None, then the
1310 file will be searched for in the current directory.
1311 @type dir: str or None
1312 @keyword read_mol: The molecule(s) to read from the file, independent of model.
1313 If set to None, then all molecules will be loaded.
1314 @type read_mol: None, int, or list of int
1315 @keyword set_mol_name: Set the names of the molecules which are loaded. If set to None, then
1316 the molecules will be automatically labelled based on the file name or
1317 other information.
1318 @type set_mol_name: None, str, or list of str
1319 @keyword read_model: The XYZ model to extract from the file. If set to None, then all models
1320 will be loaded.
1321 @type read_model: None, int, or list of int
1322 @keyword set_model_num: Set the model number of the loaded molecule. If set to None, then the
1323 XYZ model numbers will be preserved, if they exist.
1324 @type set_model_num: None, int, or list of int
1325 @keyword fail: A flag which, if True, will cause a RelaxError to be raised if the XYZ
1326 file does not exist. If False, then a RelaxWarning will be trown
1327 instead.
1328 @type fail: bool
1329 @keyword verbosity: The amount of information to print to screen. Zero corresponds to
1330 minimal output while higher values increase the amount of output. The
1331 default value is 1.
1332 @type verbosity: int
1333 @raise RelaxFileError: If the fail flag is set, then a RelaxError is raised if the XYZ file
1334 does not exist.
1335 """
1336
1337
1338 check_pipe()
1339
1340
1341 file_path = get_file_path(file, dir)
1342
1343
1344 if not access(file_path, F_OK):
1345 file_path_orig = file_path
1346 file_path = file_path + '.xyz'
1347
1348
1349 if not access(file_path, F_OK):
1350 if fail:
1351 raise RelaxFileError('XYZ', file_path_orig)
1352 else:
1353 warn(RelaxNoPDBFileWarning(file_path))
1354 return
1355
1356
1357 if not hasattr(cdp, 'structure'):
1358 cdp.structure = Internal()
1359
1360
1361 cdp.structure.load_xyz(file_path, read_mol=read_mol, set_mol_name=set_mol_name, read_model=read_model, set_model_num=set_model_num, verbosity=verbosity)
1362
1363
1364 -def rmsd(pipes=None, models=None, molecules=None, atom_id=None, atomic=False):
1365 """Calculate the RMSD between the loaded models.
1366
1367 The RMSD value will be placed into the current data pipe cdp.structure.rmsd data structure.
1368
1369
1370 @keyword pipes: The data pipes to determine the RMSD for.
1371 @type pipes: None or list of str
1372 @keyword models: The list of models to determine the RMSD for. The number of elements must match the pipes argument. If set to None, then all models will be used.
1373 @type models: None or list of lists of int
1374 @keyword molecules: The list of molecules to determine the RMSD for. The number of elements must match the pipes argument.
1375 @type molecules: None or list of lists of str
1376 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1377 @type atom_id: str or None
1378 @keyword atomic: A flag which if True will allow for per-atom RMSDs to be additionally calculated.
1379 @type atomic: bool
1380 @return: The RMSD value.
1381 @rtype: float
1382 """
1383
1384
1385 check_pipe()
1386
1387
1388 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1389
1390
1391 if atomic:
1392
1393 print("\nCalculating the atomic-level RMSDs.")
1394
1395
1396 rmsd = per_atom_rmsd(coord, verbosity=0)
1397
1398
1399 for i in range(len(res_nums)):
1400
1401 id = generate_spin_id_unique(mol_name=mol_names[i], res_num=res_nums[i], res_name=res_names[i], spin_num=i, spin_name=atom_names[i])
1402
1403
1404 spin_cont = return_spin(spin_id=id)
1405
1406
1407 if spin_cont == None:
1408 continue
1409
1410
1411 spin_cont.pos_rmsd = rmsd[i]
1412
1413
1414 print("\nCalculating the global RMSD.")
1415 cdp.structure.rmsd = atomic_rmsd(coord, verbosity=1)
1416
1417
1418 return cdp.structure.rmsd
1419
1420
1421 -def rotate(R=None, origin=None, model=None, atom_id=None, pipe_name=None):
1422 """Rotate the structural data about the origin by the specified forwards rotation.
1423
1424 @keyword R: The forwards rotation matrix.
1425 @type R: numpy 3D, rank-2 array or a 3x3 list of floats
1426 @keyword origin: The origin of the rotation. If not supplied, the origin will be set to [0, 0, 0].
1427 @type origin: numpy 3D, rank-1 array or list of len 3 or None
1428 @keyword model: The model to rotate. If None, all models will be rotated.
1429 @type model: int
1430 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1431 @type atom_id: str or None
1432 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe.
1433 @type pipe_name: None or str
1434 """
1435
1436
1437 if pipe_name == None:
1438 pipe_name = cdp_name()
1439
1440
1441 check_pipe(pipe_name)
1442 check_structure(pipe_name)
1443
1444
1445 dp = get_pipe(pipe_name)
1446
1447
1448 if origin is None:
1449 origin = [0, 0, 0]
1450
1451
1452 R = array(R, float64)
1453 origin = array(origin, float64)
1454
1455
1456 selection = dp.structure.selection(atom_id=atom_id)
1457 dp.structure.rotate(R=R, origin=origin, model=model, selection=selection)
1458
1459
1460 if model != None:
1461 print("Rotated %i atoms of model %i." % (selection.count_atoms(), model))
1462 else:
1463 print("Rotated %i atoms." % selection.count_atoms())
1464
1465
1466 -def sequence_alignment(pipes=None, models=None, molecules=None, msa_algorithm='Central Star', pairwise_algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0, gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
1467 """Superimpose a set of related, but not identical structures.
1468
1469 @keyword pipes: The data pipes to include in the alignment and superimposition.
1470 @type pipes: None or list of str
1471 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used.
1472 @type models: list of lists of int or None
1473 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument.
1474 @type molecules: None or list of str
1475 @keyword msa_algorithm: The multiple sequence alignment (MSA) algorithm to use.
1476 @type msa_algorithm: str
1477 @keyword pairwise_algorithm: The pairwise sequence alignment algorithm to use.
1478 @type pairwise_algorithm: str
1479 @keyword matrix: The substitution matrix to use.
1480 @type matrix: str
1481 @keyword gap_open_penalty: The penalty for introducing gaps, as a positive number.
1482 @type gap_open_penalty: float
1483 @keyword gap_extend_penalty: The penalty for extending a gap, as a positive number.
1484 @type gap_extend_penalty: float
1485 @keyword end_gap_open_penalty: The optional penalty for opening a gap at the end of a sequence.
1486 @type end_gap_open_penalty: float
1487 @keyword end_gap_extend_penalty: The optional penalty for extending a gap at the end of a sequence.
1488 @type end_gap_extend_penalty: float
1489 """
1490
1491
1492 objects, object_names, pipes = assemble_structural_objects(pipes=pipes, models=models, molecules=molecules)
1493
1494
1495 ids, object_id_list, model_list, molecule_list, atom_pos, mol_names, res_names, res_nums, atom_names, elements, one_letter_codes, num_mols = assemble_atomic_coordinates(objects=objects, object_names=object_names, molecules=molecules, models=models)
1496
1497
1498 res_num_list = []
1499 for mol_index in range(num_mols):
1500 res_num_list.append([])
1501 for i in range(len(one_letter_codes[mol_index])):
1502 key = list(res_nums[mol_index][i].keys())[0]
1503 res_num_list[mol_index].append(res_nums[mol_index][i][key])
1504
1505
1506 strings, gaps = msa_general(one_letter_codes, residue_numbers=res_num_list, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)
1507
1508
1509 if not hasattr(ds, 'sequence_alignments'):
1510 ds.sequence_alignments = Sequence_alignments()
1511
1512
1513 if msa_algorithm == 'residue number':
1514 pairwise_algorithm = None
1515 matrix = None
1516 gap_open_penalty = None
1517 gap_extend_penalty = None
1518 end_gap_open_penalty = None
1519 end_gap_extend_penalty = None
1520
1521
1522 ds.sequence_alignments.add(object_ids=object_id_list, models=model_list, molecules=molecule_list, sequences=one_letter_codes, strings=strings, gaps=gaps, msa_algorithm=msa_algorithm, pairwise_algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty, end_gap_open_penalty=end_gap_open_penalty, end_gap_extend_penalty=end_gap_extend_penalty)
1523
1524
1526 """Place the XH unit vector into the spin container object.
1527
1528 @keyword spin: The spin container object.
1529 @type spin: SpinContainer instance
1530 @keyword xh_vect: The unit vector parallel to the XH bond.
1531 @type xh_vect: array of len 3
1532 """
1533
1534
1535 spin.xh_vect = xh_vect
1536
1537
1538 -def structure_loop(pipes=None, molecules=None, models=None, atom_id=None):
1539 """Generator function for looping over all internal structural objects, models and molecules.
1540
1541 @keyword pipes: The data pipes to loop over.
1542 @type pipes: None or list of str
1543 @keyword models: The list of models for each data pipe. The number of elements must match the pipes argument. If set to None, then all models will be used.
1544 @type models: None or list of lists of int
1545 @keyword molecules: The list of molecules for each data pipe. The number of elements must match the pipes argument.
1546 @type molecules: None or list of lists of str
1547 @keyword atom_id: The molecule, residue, and atom identifier string of the coordinates of interest. This matches the spin ID string format.
1548 @type atom_id: None or str
1549 @return: The data pipe index, model number, and molecule name.
1550 @rtype: int, int or None, str
1551 """
1552
1553
1554 if pipes == None:
1555 pipes = [cdp_name()]
1556 num_pipes = len(pipes)
1557
1558
1559 for pipe in pipes:
1560 check_pipe(pipe)
1561
1562
1563 if models != None:
1564 if len(models) != num_pipes:
1565 raise RelaxError("The %i elements of the models argument does not match the %i data pipes." % (len(models), num_pipes))
1566 if molecules != None:
1567 if len(molecules) != num_pipes:
1568 raise RelaxError("The %i elements of the molecules argument does not match the %i data pipes." % (len(molecules), num_pipes))
1569
1570
1571 objects = []
1572 for pipe_index in range(len(pipes)):
1573 dp = get_pipe(pipes[pipe_index])
1574 objects.append(dp.structure)
1575
1576
1577 for pipe_index, model_num, mol_name in loop_coord_structures(objects=objects, models=models, molecules=molecules, atom_id=atom_id):
1578 yield pipe_index, model_num, mol_name
1579
1580
1581 -def superimpose(pipes=None, models=None, molecules=None, atom_id=None, displace_id=None, method='fit to mean', centre_type="centroid", centroid=None):
1582 """Superimpose a set of structures.
1583
1584 @keyword pipes: The data pipes to include in the alignment and superimposition.
1585 @type pipes: None or list of str
1586 @keyword models: The list of models to for each data pipe superimpose. The number of elements must match the pipes argument. If set to None, then all models will be used.
1587 @type models: list of lists of int or None
1588 @keyword molecules: The molecule names to include in the alignment and superimposition. The number of elements must match the pipes argument.
1589 @type molecules: None or list of str
1590 @keyword atom_id: The molecule, residue, and atom identifier string. This matches the spin ID string format.
1591 @type atom_id: str or None
1592 @keyword displace_id: The atom ID string for restricting the displacement to a subset of all atoms. If not set, then all atoms will be translated and rotated. This can be a list of atom IDs with each element corresponding to one of the structures.
1593 @type displace_id: None, str, or list of str
1594 @keyword method: The superimposition method. It must be one of 'fit to mean' or 'fit to first'.
1595 @type method: str
1596 @keyword centre_type: The type of centre to superimpose over. This can either be the standard centroid superimposition or the CoM could be used instead.
1597 @type centre_type: str
1598 @keyword centroid: An alternative position of the centroid to allow for different superpositions, for example of pivot point motions.
1599 @type centroid: list of float or numpy rank-1, 3D array
1600 """
1601
1602
1603 allowed = ['fit to mean', 'fit to first']
1604 if method not in allowed:
1605 raise RelaxError("The superimposition method '%s' is unknown. It must be one of %s." % (method, allowed))
1606
1607
1608 allowed = ['centroid', 'CoM']
1609 if centre_type not in allowed:
1610 raise RelaxError("The superimposition centre type '%s' is unknown. It must be one of %s." % (centre_type, allowed))
1611
1612
1613 if pipes == None:
1614 pipes = [cdp_name()]
1615
1616
1617 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1618
1619
1620 if len(coord[0]) == 0:
1621 raise RelaxError("No common atoms could be found between the structures.")
1622
1623
1624 if method == 'fit to mean':
1625 T, R, pivot = fit_to_mean(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid)
1626 elif method == 'fit to first':
1627 T, R, pivot = fit_to_first(models=list(range(len(ids))), coord=coord, centre_type=centre_type, elements=elements, centroid=centroid)
1628
1629
1630 i = 0
1631 for pipe_index, model_num, mol_name in structure_loop(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id):
1632
1633 if i == 0 and method == 'fit to first':
1634 i += 1
1635 continue
1636
1637
1638 curr_displace_id = None
1639 if isinstance(displace_id, str):
1640 curr_displace_id = displace_id
1641 elif isinstance(displace_id, list):
1642 if len(displace_id) <= i:
1643 raise RelaxError("Not enough displacement ID strings have been provided.")
1644 curr_displace_id = displace_id[i]
1645
1646
1647 id = curr_displace_id
1648 if id == None or (mol_name and not search('#', id)):
1649 if curr_displace_id == None:
1650 id = '#%s' % mol_name
1651 elif search('#', curr_displace_id):
1652 id = curr_displace_id
1653 else:
1654 id = '#%s%s' % (mol_name, curr_displace_id)
1655
1656
1657 translate(T=T[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id)
1658
1659
1660 rotate(R=R[i], origin=pivot[i], model=model_num, pipe_name=pipes[pipe_index], atom_id=id)
1661
1662
1663 i += 1
1664
1665
1666 -def translate(T=None, model=None, atom_id=None, pipe_name=None):
1667 """Shift the structural data by the specified translation vector.
1668
1669 @keyword T: The translation vector.
1670 @type T: numpy rank-1, 3D array or list of float
1671 @keyword model: The model to translate. If None, all models will be rotated.
1672 @type model: int or None
1673 @keyword atom_id: The molecule, residue, and atom identifier string. Only atoms matching this selection will be used.
1674 @type atom_id: str or None
1675 @keyword pipe_name: The name of the data pipe containing the structures to translate. This defaults to the current data pipe.
1676 @type pipe_name: None or str
1677 """
1678
1679
1680 if pipe_name == None:
1681 pipe_name = cdp_name()
1682
1683
1684 check_pipe(pipe_name)
1685 check_structure(pipe_name)
1686
1687
1688 dp = get_pipe(pipe_name)
1689
1690
1691 T = array(T, float64)
1692
1693
1694 selection = dp.structure.selection(atom_id=atom_id)
1695 dp.structure.translate(T=T, model=model, selection=selection)
1696
1697
1698 if model != None:
1699 print("Translated %i atoms of model %i." % (selection.count_atoms(), model))
1700 else:
1701 print("Translated %i atoms." % selection.count_atoms())
1702
1703
1704 -def vectors(spin_id1=None, spin_id2=None, model=None, verbosity=1, ave=True, unit=True):
1705 """Extract the bond vectors from the loaded structures and store them in the spin container.
1706
1707 @keyword spin_id1: The spin identifier string of the first spin of the pair.
1708 @type spin_id1: str
1709 @keyword spin_id2: The spin identifier string of the second spin of the pair.
1710 @type spin_id2: str
1711 @keyword model: The model to extract the vector from. If None, all vectors will be extracted.
1712 @type model: str
1713 @keyword verbosity: The higher the value, the more information is printed to screen.
1714 @type verbosity: int
1715 @keyword ave: A flag which if True will cause the average of all vectors to be extracted.
1716 @type ave: bool
1717 @keyword unit: A flag which if True will cause the function to calculate the unit vectors.
1718 @type unit: bool
1719 """
1720
1721
1722 check_pipe()
1723 check_structure()
1724 check_mol_res_spin_data()
1725
1726
1727 if verbosity:
1728
1729 num_models = cdp.structure.num_models()
1730
1731
1732 if num_models > 1:
1733 if model:
1734 print("Extracting vectors for model '%s'." % model)
1735 else:
1736 print("Extracting vectors for all %s models." % num_models)
1737 if ave:
1738 print("Averaging all vectors.")
1739
1740
1741 else:
1742 print("Extracting vectors from the single model.")
1743
1744
1745 if unit:
1746 print("Calculating the unit vectors.")
1747
1748
1749 no_vectors = True
1750 for spin, mol_name, res_num, res_name in spin_loop(selection=spin_id, full_info=True):
1751
1752 if not spin.select:
1753 continue
1754
1755
1756 id = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name, spin_num=spin.num)
1757
1758
1759 if spin.num == None and spin.name == None:
1760 warn(RelaxWarning("Either the spin number or name must be set for the spin " + repr(id) + " to identify the corresponding atom in the molecule."))
1761 continue
1762
1763
1764 if hasattr(spin, 'vector'):
1765 obj = getattr(spin, 'vector')
1766 if obj != None:
1767 warn(RelaxWarning("The bond vector for the spin " + repr(id) + " already exists."))
1768 continue
1769
1770
1771 bond_vectors, attached_name, warnings = cdp.structure.bond_vectors(attached_atom=attached, model_num=model, res_num=res_num, spin_name=spin.name, spin_num=spin.num, return_name=True, return_warnings=True)
1772 id2 = generate_spin_id_unique(res_num=res_num, res_name=None, spin_name=spin.name)
1773
1774
1775 if not bond_vectors:
1776
1777 if warnings:
1778 warn(RelaxWarning(warnings + " (atom ID " + repr(id) + ")."))
1779
1780
1781 continue
1782
1783
1784 if not hasattr(spin, 'attached_atom'):
1785 spin.attached_atom = attached_name
1786 elif spin.attached_atom != attached_name:
1787 raise RelaxError("The " + repr(spin.attached_atom) + " atom already attached to the spin does not match the attached atom " + repr(attached_name) + ".")
1788
1789
1790 if ave:
1791 ave_vector = zeros(3, float64)
1792
1793
1794 for i in range(len(bond_vectors)):
1795
1796 if unit:
1797
1798 norm_factor = norm(bond_vectors[i])
1799
1800
1801 if norm_factor == 0.0:
1802 warn(RelaxZeroVectorWarning(spin_id1=id, spin_id2=id2))
1803
1804
1805 else:
1806 bond_vectors[i] = bond_vectors[i] / norm_factor
1807
1808
1809 if ave:
1810 ave_vector = ave_vector + bond_vectors[i]
1811
1812
1813 if ave:
1814 vector = ave_vector / float(len(bond_vectors))
1815 else:
1816 vector = bond_vectors
1817
1818
1819 if len(vector) == 1:
1820 vector = vector[0]
1821
1822
1823 setattr(spin, 'vector', vector)
1824
1825
1826 no_vectors = False
1827
1828
1829 if verbosity:
1830
1831 num = len(bond_vectors)
1832 plural = 's'
1833 if num == 1:
1834 plural = ''
1835
1836 if spin.name:
1837 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.name, attached_name, plural, id))
1838 else:
1839 print("Extracted %s %s-%s vector%s for the spin '%s'." % (num, spin.num, attached_name, plural, id))
1840
1841
1842 if no_vectors:
1843 raise RelaxError("No vectors could be extracted.")
1844
1845
1846 -def web_of_motion(pipes=None, models=None, molecules=None, atom_id=None, file=None, dir=None, force=False):
1847 """Create a PDB representation of the motion between a set of models.
1848
1849 This will create a PDB file containing the atoms of all models, with identical atoms links using CONECT records. This function only supports the internal structural object.
1850
1851
1852 @keyword pipes: The data pipes to generate the web between.
1853 @type pipes: None or list of str
1854 @keyword models: The list of models to generate the web between. The number of elements must match the pipes argument. If set to None, then all models will be used.
1855 @type models: None or list of lists of int
1856 @keyword molecules: The list of molecules to generate the web between. The number of elements must match the pipes argument.
1857 @type molecules: None or list of lists of str
1858 @keyword atom_id: The atom identification string of the coordinates of interest. This matches the spin ID string format.
1859 @type atom_id: str or None
1860 @keyword file: The name of the PDB file to write.
1861 @type file: str
1862 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory.
1863 @type dir: str or None
1864 @keyword force: The force flag which if True will cause the file to be overwritten.
1865 @type force: bool
1866 """
1867
1868
1869 check_pipe()
1870 check_structure()
1871
1872
1873 coord, ids, mol_names, res_names, res_nums, atom_names, elements = assemble_structural_coordinates(pipes=pipes, models=models, molecules=molecules, atom_id=atom_id)
1874
1875
1876 if not len(coord) > 1:
1877 raise RelaxError("Two or more structures are required.")
1878
1879
1880 web = Internal()
1881
1882
1883 for atom_index in range(len(atom_names)):
1884
1885 for struct_index in range(len(ids)):
1886
1887 web.add_atom(mol_name=mol_names[atom_index], atom_name=atom_names[atom_index], res_name=res_names[atom_index], res_num=res_nums[atom_index], pos=coord[struct_index, atom_index], element=elements[atom_index], sort=False)
1888
1889
1890 for k in range(len(ids)):
1891 for l in range(len(ids)):
1892
1893 if k == l:
1894 continue
1895
1896
1897 index1 = atom_index*len(ids) + k
1898 index2 = atom_index*len(ids) + l
1899
1900
1901 web.connect_atom(mol_name=mol_names[atom_index], index1=index1, index2=index2)
1902
1903
1904 if isinstance(file, str):
1905
1906 file = get_file_path(file, dir)
1907
1908
1909 if not search(".pdb$", file):
1910 file += '.pdb'
1911
1912
1913 file = open_write_file(file, force=force)
1914
1915
1916 web.write_pdb(file)
1917
1918
1919 -def write_pdb(file=None, dir=None, model_num=None, compress_type=0, force=False):
1920 """The PDB writing function.
1921
1922 @keyword file: The name of the PDB file to write. This can also be a file instance.
1923 @type file: str or file instance
1924 @keyword dir: The directory where the PDB file will be placed. If set to None, then the file will be placed in the current directory.
1925 @type dir: str or None
1926 @keyword model_num: The model to place into the PDB file. If not supplied, then all models will be placed into the file.
1927 @type model_num: None or int
1928 @keyword compress_type: The compression type. The integer values correspond to the compression type: 0, no compression; 1, Bzip2 compression; 2, Gzip compression.
1929 @type compress_type: int
1930 @keyword force: The force flag which if True will cause the file to be overwritten.
1931 @type force: bool
1932 """
1933
1934
1935 check_pipe()
1936
1937
1938 if not hasattr(cdp, 'structure'):
1939 raise RelaxError("No structural data is present in the current data pipe.")
1940
1941
1942 if isinstance(file, str):
1943
1944 file = get_file_path(file, dir)
1945
1946
1947 if not search(".pdb$", file):
1948 file = file + '.pdb'
1949
1950
1951 file = open_write_file(file, compress_type=compress_type, force=force)
1952
1953
1954 cdp.structure.write_pdb(file, model_num=model_num)
1955