1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """The N-state model or structural ensemble analysis."""
24
25
26 from copy import deepcopy
27 from math import pi
28 from minfx.generic import generic_minimise
29 from minfx.grid import grid
30 from numpy import dot, float64, zeros
31 from re import search
32 from warnings import warn
33
34
35 import lib.arg_check
36 from lib.errors import RelaxError, RelaxInfError, RelaxNaNError, RelaxNoModelError
37 from lib.float import isNaN, isInf
38 from lib.optimisation import test_grid_ops
39 from lib.warnings import RelaxWarning
40 from pipe_control import align_tensor, pcs, rdc
41 from pipe_control.align_tensor import opt_uses_tensor
42 from pipe_control.interatomic import interatomic_loop
43 from pipe_control.mol_res_spin import spin_loop
44 from specific_analyses.api_base import API_base
45 from specific_analyses.api_common import API_common
46 from specific_analyses.n_state_model.data import base_data_types, calc_ave_dist, num_data_points
47 from specific_analyses.n_state_model.optimisation import minimise_bc_data, target_fn_setup
48 from specific_analyses.n_state_model.parameter_object import N_state_params
49 from specific_analyses.n_state_model.parameters import disassemble_param_vector, linear_constraints, param_num
50 from target_functions.potential import quad_pot
51
52
54 """Class containing functions for the N-state model."""
55
56
57 instance = None
58
70
71
73 """Loop over the base data of the spins - RDCs, PCSs, and NOESY data.
74
75 This loop iterates for each data point (RDC, PCS, NOESY) for each spin or interatomic data container, returning the identification information.
76
77 @return: A list of the spin or interatomic data container, the data type ('rdc', 'pcs', 'noesy'), and the alignment ID if required.
78 @rtype: list of [SpinContainer instance, str, str] or [InteratomContainer instance, str, str]
79 """
80
81
82 for interatom in interatomic_loop():
83
84 if not interatom.select:
85 continue
86
87
88 data = [interatom, None, None]
89
90
91 if hasattr(interatom, 'rdc'):
92 data[1] = 'rdc'
93
94
95 for id in cdp.rdc_ids:
96
97 data[2] = id
98
99
100 yield data
101
102
103 if hasattr(interatom, 'noesy'):
104 data[1] = 'noesy'
105
106
107 for id in cdp.noesy_ids:
108
109 data[2] = id
110
111
112 yield data
113
114
115 for spin in spin_loop():
116
117 if not spin.select:
118 continue
119
120
121 data = [spin, None, None]
122
123
124 if hasattr(spin, 'pcs'):
125 data[1] = 'pcs'
126
127
128 for id in cdp.pcs_ids:
129
130 data[2] = id
131
132
133 yield data
134
135
136 - def calculate(self, spin_id=None, verbosity=1, sim_index=None):
137 """Calculation function.
138
139 Currently this function simply calculates the NOESY flat-bottom quadratic energy potential,
140 if NOE restraints are available.
141
142 @keyword spin_id: The spin identification string (unused).
143 @type spin_id: None or str
144 @keyword verbosity: The amount of information to print. The higher the value, the greater the verbosity.
145 @type verbosity: int
146 @keyword sim_index: The MC simulation index (unused).
147 @type sim_index: None
148 """
149
150
151 model, param_vector, data_types, scaling_matrix = target_fn_setup()
152
153
154 if model:
155
156 chi2 = model.func(param_vector)
157
158
159 cdp.chi2 = chi2
160
161
162 minimise_bc_data(model)
163
164
165 if 'rdc' in data_types:
166 rdc.q_factors()
167
168
169 if 'pcs' in data_types:
170 pcs.q_factors()
171
172
173 if hasattr(cdp, 'noe_restraints'):
174
175 num_restraints = len(cdp.noe_restraints)
176 dist = zeros(num_restraints, float64)
177 pot = zeros(num_restraints, float64)
178 lower = zeros(num_restraints, float64)
179 upper = zeros(num_restraints, float64)
180
181
182 for i in range(num_restraints):
183
184 lower[i] = cdp.noe_restraints[i][2]
185 upper[i] = cdp.noe_restraints[i][3]
186
187
188 dist[i] = calc_ave_dist(cdp.noe_restraints[i][0], cdp.noe_restraints[i][1], exp=-6)
189
190
191 quad_pot(dist, pot, lower, upper)
192
193
194 cdp.ave_dist = []
195 cdp.quad_pot = []
196 for i in range(num_restraints):
197 cdp.ave_dist.append([cdp.noe_restraints[i][0], cdp.noe_restraints[i][1], dist[i]])
198 cdp.quad_pot.append([cdp.noe_restraints[i][0], cdp.noe_restraints[i][1], pot[i]])
199
200
202 """Create the Monte Carlo data by back-calculation.
203
204 @keyword data_id: The list of spin ID, data type, and alignment ID, as yielded by the base_data_loop() generator method.
205 @type data_id: str
206 @return: The Monte Carlo Ri data.
207 @rtype: list of floats
208 """
209
210
211 mc_data = []
212
213
214 container = data_id[0]
215
216
217 if data_id[1] == 'rdc' and hasattr(container, 'rdc'):
218
219 if not hasattr(container, 'rdc_bc'):
220 self.calculate()
221
222
223 if not hasattr(container, 'rdc_bc') or not data_id[2] in container.rdc_bc:
224 data = None
225 else:
226 data = container.rdc_bc[data_id[2]]
227
228
229 mc_data.append(data)
230
231
232 elif data_id[1] == 'noesy' and hasattr(container, 'noesy'):
233
234 if not hasattr(container, 'noesy_bc'):
235 self.calculate()
236
237
238 mc_data.append(container.noesy_bc)
239
240
241 elif data_id[1] == 'pcs' and hasattr(container, 'pcs'):
242
243 if not hasattr(container, 'pcs_bc'):
244 self.calculate()
245
246
247 if not hasattr(container, 'pcs_bc') or not data_id[2] in container.pcs_bc:
248 data = None
249 else:
250 data = container.pcs_bc[data_id[2]]
251
252
253 mc_data.append(data)
254
255
256 return mc_data
257
258
259 - def grid_search(self, lower=None, upper=None, inc=None, constraints=False, verbosity=0, sim_index=None):
260 """The grid search function.
261
262 @param lower: The lower bounds of the grid search which must be equal to the number of parameters in the model.
263 @type lower: array of numbers
264 @param upper: The upper bounds of the grid search which must be equal to the number of parameters in the model.
265 @type upper: array of numbers
266 @param inc: The increments for each dimension of the space for the grid search. The number of elements in the array must equal to the number of parameters in the model.
267 @type inc: array of int
268 @param constraints: If True, constraints are applied during the grid search (elinating parts of the grid). If False, no constraints are used.
269 @type constraints: bool
270 @param verbosity: A flag specifying the amount of information to print. The higher the value, the greater the verbosity.
271 @type verbosity: int
272 """
273
274
275 if not hasattr(cdp, 'model'):
276 raise RelaxNoModelError('N-state')
277
278
279 n = param_num()
280
281
282 if n == 0:
283 print("Cannot run a grid search on a model with zero parameters, skipping the grid search.")
284 return
285
286
287 test_grid_ops(lower=lower, upper=upper, inc=inc, n=n)
288
289
290 if isinstance(inc, int):
291 inc = [inc]*n
292
293
294 if not lower:
295
296 lower = []
297 upper = []
298
299
300 for i in range(n):
301
302 if i < len(cdp.params):
303
304 if search('^p', cdp.params[i]):
305 lower.append(0.0)
306 upper.append(1.0)
307
308
309 if search('^alpha', cdp.params[i]) or search('^gamma', cdp.params[i]):
310 lower.append(0.0)
311 upper.append(2*pi)
312 elif search('^beta', cdp.params[i]):
313 lower.append(0.0)
314 upper.append(pi)
315
316
317 elif hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed and (n - i) <= 3:
318 lower.append(-100)
319 upper.append(100)
320
321
322 else:
323 lower.append(-1e-3)
324 upper.append(1e-3)
325
326
327 data_types = base_data_types()
328
329
330 tensor_num = align_tensor.num_tensors(skip_fixed=True)
331
332
333 if cdp.model == 'fixed' and tensor_num > 1 and ('rdc' in data_types or 'pcs' in data_types) and not align_tensor.all_tensors_fixed() and hasattr(cdp, 'paramag_centre_fixed') and cdp.paramag_centre_fixed:
334
335 print("Optimising each alignment tensor separately.")
336
337
338 fixed_flags = []
339 for i in range(len(cdp.align_ids)):
340
341 tensor = align_tensor.return_tensor(index=i, skip_fixed=False)
342
343
344 fixed_flags.append(tensor.fixed)
345
346
347 tensor.set('fixed', True)
348
349
350 for i in range(len(cdp.align_ids)):
351
352 if fixed_flags[i]:
353 continue
354
355
356 tensor = align_tensor.return_tensor(index=i, skip_fixed=False)
357
358
359 tensor.set('fixed', False)
360
361
362 lower_sub = lower[i*5:i*5+5]
363 upper_sub = upper[i*5:i*5+5]
364 inc_sub = inc[i*5:i*5+5]
365
366
367 self.minimise(min_algor='grid', lower=lower_sub, upper=upper_sub, inc=inc_sub, constraints=constraints, verbosity=verbosity, sim_index=sim_index)
368
369
370 tensor.set('fixed', True)
371
372
373 for i in range(len(cdp.align_ids)):
374
375 tensor = align_tensor.return_tensor(index=i, skip_fixed=False)
376
377
378 tensor.set('fixed', fixed_flags[i])
379
380
381 else:
382 self.minimise(min_algor='grid', lower=lower, upper=upper, inc=inc, constraints=constraints, verbosity=verbosity, sim_index=sim_index)
383
384
386 """Determine whether the given parameter is spin specific.
387
388 @param name: The name of the parameter.
389 @type name: str
390 @return: False
391 @rtype: bool
392 """
393
394
395 if name in ['r', 'heteronuc_type', 'proton_type']:
396 return True
397
398
399 return False
400
401
403 """Create bounds for the OpenDX mapping function.
404
405 @param param: The name of the parameter to return the lower and upper bounds of.
406 @type param: str
407 @param spin_id: The spin identification string (unused).
408 @type spin_id: None
409 @return: The upper and lower bounds of the parameter.
410 @rtype: list of float
411 """
412
413
414 if search('^paramag_[xyz]$', param):
415 return [-100.0, 100.0]
416
417
418 - def minimise(self, min_algor=None, min_options=None, func_tol=None, grad_tol=None, max_iterations=None, constraints=False, scaling=True, verbosity=0, sim_index=None, lower=None, upper=None, inc=None):
419 """Minimisation function.
420
421 @param min_algor: The minimisation algorithm to use.
422 @type min_algor: str
423 @param min_options: An array of options to be used by the minimisation algorithm.
424 @type min_options: array of str
425 @param func_tol: The function tolerance which, when reached, terminates optimisation. Setting this to None turns of the check.
426 @type func_tol: None or float
427 @param grad_tol: The gradient tolerance which, when reached, terminates optimisation. Setting this to None turns of the check.
428 @type grad_tol: None or float
429 @param max_iterations: The maximum number of iterations for the algorithm.
430 @type max_iterations: int
431 @param constraints: If True, constraints are used during optimisation.
432 @type constraints: bool
433 @param scaling: If True, diagonal scaling is enabled during optimisation to allow the problem to be better conditioned.
434 @type scaling: bool
435 @param verbosity: A flag specifying the amount of information to print. The higher the value, the greater the verbosity.
436 @type verbosity: int
437 @param sim_index: The index of the simulation to optimise. This should be None if normal optimisation is desired.
438 @type sim_index: None or int
439 @keyword lower: The lower bounds of the grid search which must be equal to the number of parameters in the model. This optional argument is only used when doing a grid search.
440 @type lower: array of numbers
441 @keyword upper: The upper bounds of the grid search which must be equal to the number of parameters in the model. This optional argument is only used when doing a grid search.
442 @type upper: array of numbers
443 @keyword inc: The increments for each dimension of the space for the grid search. The number of elements in the array must equal to the number of parameters in the model. This argument is only used when doing a grid search.
444 @type inc: array of int
445 """
446
447
448 model, param_vector, data_types, scaling_matrix = target_fn_setup(sim_index=sim_index, scaling=scaling)
449
450
451 if not len(param_vector):
452 warn(RelaxWarning("The model has no parameters, minimisation cannot be performed."))
453 return
454
455
456 if constraints and cdp.model == 'fixed':
457 warn(RelaxWarning("Turning constraints off. These cannot be used for the 'fixed' model."))
458 constraints = False
459
460
461 if min_algor == 'Method of Multipliers':
462 min_algor = min_options[0]
463 min_options = min_options[1:]
464
465
466 if not constraints and cdp.model == 'population':
467 warn(RelaxWarning("Turning constraints on. These absolutely must be used for the 'population' model."))
468 constraints = True
469
470
471 min_options = (min_algor,) + min_options
472 min_algor = 'Method of Multipliers'
473
474
475 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed:
476 if min_algor in ['newton']:
477 raise RelaxError("For the paramagnetic centre position, as the Hessians are not yet implemented Newton optimisation cannot be performed.")
478
479
480 if constraints:
481 A, b = linear_constraints(data_types=data_types, scaling_matrix=scaling_matrix)
482 else:
483 A, b = None, None
484
485
486 if search('^[Gg]rid', min_algor):
487
488 if scaling:
489 for i in range(len(param_vector)):
490 lower[i] = lower[i] / scaling_matrix[i, i]
491 upper[i] = upper[i] / scaling_matrix[i, i]
492
493
494 results = grid(func=model.func, args=(), num_incs=inc, lower=lower, upper=upper, A=A, b=b, verbosity=verbosity)
495
496
497 param_vector, func, iter_count, warning = results
498 f_count = iter_count
499 g_count = 0.0
500 h_count = 0.0
501
502
503 else:
504 results = generic_minimise(func=model.func, dfunc=model.dfunc, d2func=model.d2func, args=(), x0=param_vector, min_algor=min_algor, min_options=min_options, func_tol=func_tol, grad_tol=grad_tol, maxiter=max_iterations, A=A, b=b, full_output=1, print_flag=verbosity)
505
506
507 if results == None:
508 return
509 param_vector, func, iter_count, f_count, g_count, h_count, warning = results
510
511
512 if isInf(func):
513 raise RelaxInfError('chi-squared')
514
515
516 if isNaN(func):
517 raise RelaxNaNError('chi-squared')
518
519
520 chi2 = model.func(param_vector)
521
522
523 if scaling:
524 param_vector = dot(scaling_matrix, param_vector)
525
526
527 disassemble_param_vector(param_vector=param_vector, data_types=data_types, sim_index=sim_index)
528
529
530 if sim_index != None:
531
532 cdp.chi2_sim[sim_index] = func
533
534
535 cdp.iter_sim[sim_index] = iter_count
536
537
538 cdp.f_count_sim[sim_index] = f_count
539
540
541 cdp.g_count_sim[sim_index] = g_count
542
543
544 cdp.h_count_sim[sim_index] = h_count
545
546
547 cdp.warning_sim[sim_index] = warning
548
549
550 else:
551
552 cdp.chi2 = func
553
554
555 cdp.iter = iter_count
556
557
558 cdp.f_count = f_count
559
560
561 cdp.g_count = g_count
562
563
564 cdp.h_count = h_count
565
566
567 cdp.warning = warning
568
569
570 if sim_index == None and ('rdc' in data_types or 'pcs' in data_types):
571
572 minimise_bc_data(model)
573
574
575 if 'rdc' in data_types:
576 rdc.q_factors()
577
578
579 if 'pcs' in data_types:
580 pcs.q_factors()
581
582
584 """Return the k, n, and chi2 model statistics.
585
586 k - number of parameters.
587 n - number of data points.
588 chi2 - the chi-squared value.
589
590
591 @keyword model_info: The data returned from model_loop() (unused).
592 @type model_info: None
593 @keyword spin_id: The spin identification string. This is ignored in the N-state model.
594 @type spin_id: None or str
595 @keyword global_stats: A parameter which determines if global or local statistics are returned. For the N-state model, this argument is ignored.
596 @type global_stats: None or bool
597 @return: The optimisation statistics, in tuple format, of the number of parameters (k), the number of data points (n), and the chi-squared value (chi2).
598 @rtype: tuple of (int, int, float)
599 """
600
601
602 return param_num(), num_data_points(), cdp.chi2
603
604
646
647
649 """Create and return the spin specific Monte Carlo Ri error structure.
650
651 @keyword data_id: The list of spin ID, data type, and alignment ID, as yielded by the base_data_loop() generator method.
652 @type data_id: str
653 @return: The Monte Carlo simulation data errors.
654 @rtype: list of float
655 """
656
657
658 mc_errors = []
659
660
661 container = data_id[0]
662
663
664 if data_id[1] == 'pcs' and not container.select:
665 return
666
667
668 if data_id[1] == 'rdc' and hasattr(container, 'rdc'):
669
670 if not hasattr(container, 'rdc_err'):
671 raise RelaxError("The RDC errors are missing for the spin pair '%s' and '%s'." % (container.spin_id1, container.spin_id2))
672
673
674 if data_id[2] not in container.rdc_err:
675 err = None
676 else:
677 err = container.rdc_err[data_id[2]]
678
679
680 mc_errors.append(err)
681
682
683 elif data_id[1] == 'noesy' and hasattr(container, 'noesy'):
684
685 if not hasattr(container, 'noesy_err'):
686 raise RelaxError("The NOESY errors are missing for the spin pair '%s' and '%s'." % (container.spin_id1, container.spin_id2))
687
688
689 mc_errors.append(container.noesy_err)
690
691
692 elif data_id[1] == 'pcs' and hasattr(container, 'pcs'):
693
694 if not hasattr(container, 'pcs_err'):
695 raise RelaxError("The PCS errors are missing for spin '%s'." % data_id[0])
696
697
698 if data_id[2] not in container.pcs_err:
699 err = None
700 else:
701 err = container.pcs_err[data_id[2]]
702
703
704 mc_errors.append(err)
705
706
707 return mc_errors
708
709
710 - def set_error(self, model_info, index, error):
711 """Set the parameter errors.
712
713 @param model_info: The global model index originating from model_loop().
714 @type model_info: int
715 @param index: The index of the parameter to set the errors for.
716 @type index: int
717 @param error: The error value.
718 @type error: float
719 """
720
721
722 names = ['Axx', 'Ayy', 'Axy', 'Axz', 'Ayz']
723
724
725 if index < len(cdp.align_ids)*5:
726
727 param_index = index % 5
728 tensor_index = (index - index % 5) / 5
729
730
731 tensor = align_tensor.return_tensor(index=tensor_index, skip_fixed=True)
732 tensor.set(param=names[param_index], value=error, category='err')
733
734
735 return getattr(tensor, names[param_index]+'_err')
736
737
738 - def set_param_values(self, param=None, value=None, index=None, spin_id=None, error=False, force=True):
739 """Set the N-state model parameter values.
740
741 @keyword param: The parameter name list.
742 @type param: list of str
743 @keyword value: The parameter value list.
744 @type value: list
745 @keyword index: The index for parameters which are of the list-type (probs, alpha, beta, and gamma). This is ignored for all other types.
746 @type index: None or int
747 @keyword spin_id: The spin identification string (unused).
748 @type spin_id: None
749 @keyword error: A flag which if True will allow the parameter errors to be set instead of the values.
750 @type error: bool
751 @keyword force: A flag which if True will cause current values to be overwritten. If False, a RelaxError will raised if the parameter value is already set.
752 @type force: bool
753 """
754
755
756 lib.arg_check.is_str_list(param, 'parameter name')
757 lib.arg_check.is_list(value, 'parameter value')
758
759
760 for i in range(len(param)):
761
762 if not param[i]:
763 raise RelaxError("The parameter '%s' is not valid for this data pipe type." % param[i])
764
765
766 if error:
767 param[i] += '_err'
768
769
770 if param[i] in ['probs', 'alpha', 'beta', 'gamma']:
771 obj = getattr(cdp, param[i])
772 obj[index] = value[i]
773
774
775 else:
776 for spin in spin_loop(spin_id):
777 setattr(spin, param[i], value[i])
778
779
781 """Initialise the Monte Carlo parameter values."""
782
783
784 sim_names = self.data_names(set='min')
785
786
787 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed:
788 sim_names += ['paramagnetic_centre']
789
790
791 if hasattr(cdp, 'align_tensors'):
792
793 names = ['Axx', 'Ayy', 'Axy', 'Axz', 'Ayz']
794
795
796 for i in range(len(cdp.align_tensors)):
797
798 if not opt_uses_tensor(cdp.align_tensors[i]):
799 continue
800
801
802 cdp.align_tensors[i].set_sim_num(cdp.sim_number)
803
804
805 for object_name in names:
806 for j in range(cdp.sim_number):
807 cdp.align_tensors[i].set(param=object_name, value=deepcopy(getattr(cdp.align_tensors[i], object_name)), category='sim', sim_index=j)
808
809
810 for object_name in sim_names:
811
812 sim_object_name = object_name + '_sim'
813
814
815 setattr(cdp, sim_object_name, [])
816
817
818 sim_object = getattr(cdp, sim_object_name)
819
820
821 for j in range(cdp.sim_number):
822
823 sim_object.append(None)
824
825
826 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed:
827 for j in range(cdp.sim_number):
828 cdp.paramagnetic_centre_sim[j] = deepcopy(cdp.paramagnetic_centre)
829
830
832 """Pack the Monte Carlo simulation data.
833
834 @keyword data_id: The list of spin ID, data type, and alignment ID, as yielded by the base_data_loop() generator method.
835 @type data_id: list of str
836 @param sim_data: The Monte Carlo simulation data.
837 @type sim_data: list of float
838 """
839
840
841 container = data_id[0]
842
843
844 if data_id[1] == 'rdc' and hasattr(container, 'rdc'):
845
846 if not hasattr(container, 'rdc_sim'):
847 container.rdc_sim = {}
848
849
850 container.rdc_sim[data_id[2]] = []
851 for i in range(cdp.sim_number):
852 container.rdc_sim[data_id[2]].append(sim_data[i][0])
853
854
855 elif data_id[1] == 'noesy' and hasattr(container, 'noesy'):
856
857 container.noesy_sim = []
858 for i in range(cdp.sim_number):
859 container.noesy_sim[data_id[2]].append(sim_data[i][0])
860
861
862 elif data_id[1] == 'pcs' and hasattr(container, 'pcs'):
863
864 if not hasattr(container, 'pcs_sim'):
865 container.pcs_sim = {}
866
867
868 container.pcs_sim[data_id[2]] = []
869 for i in range(cdp.sim_number):
870 container.pcs_sim[data_id[2]].append(sim_data[i][0])
871
872
874 """Return the array of simulation parameter values.
875
876 @param model_info: The global model index originating from model_loop().
877 @type model_info: int
878 @param index: The index of the parameter to return the array of values for.
879 @type index: int
880 @return: The array of simulation parameter values.
881 @rtype: list of float
882 """
883
884
885 names = ['Axx', 'Ayy', 'Axy', 'Axz', 'Ayz']
886
887
888 if index < align_tensor.num_tensors(skip_fixed=True)*5:
889
890 param_index = index % 5
891 tensor_index = (index - index % 5) / 5
892
893
894 tensor = align_tensor.return_tensor(index=tensor_index, skip_fixed=True)
895 return getattr(tensor, names[param_index]+'_sim')
896