specific_analyses.n_state_model.parameters

1 ############################################################################### 2 # # 3 # Copyright (C) 2007-2013 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax (http://www.nmr-relax.com). # 6 # # 7 # This program is free software: you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation, either version 3 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # This program is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. # 19 # # 20 ############################################################################### 21 22 # Package docstring. 23 """Module for handling the parameters of the N-state model or structural ensemble analysis.""" 24 25 # Python module imports. 26 from numpy import array, float64, identity, zeros 27 from re import search 28 from warnings import warn 29 30 # relax module imports. 31 from lib.errors import RelaxNoModelError 32 from lib.warnings import RelaxWarning 33 from pipe_control import align_tensor, pipes 34 from pipe_control.align_tensor import opt_uses_align_data, opt_uses_tensor 35 from specific_analyses.n_state_model.data import base_data_types 36 37

38 -def assemble_param_vector(sim_index=None):

39 """Assemble all the parameters of the model into a single array. 40 41 @param sim_index: The index of the simulation to optimise. This should be None if normal optimisation is desired. 42 @type sim_index: None or int 43 @return: The parameter vector used for optimisation. 44 @rtype: numpy array 45 """ 46 47 # Test if the model is selected. 48 if not hasattr(cdp, 'model') or not isinstance(cdp.model, str): 49 raise RelaxNoModelError 50 51 # Determine the data type. 52 data_types = base_data_types() 53 54 # Initialise the parameter vector. 55 param_vector = [] 56 57 # A RDC or PCS data type requires the alignment tensors to be at the start of the parameter vector (unless the tensors are fixed). 58 if opt_uses_align_data(): 59 for i in range(len(cdp.align_tensors)): 60 # Skip non-optimised tensors. 61 if not opt_uses_tensor(cdp.align_tensors[i]): 62 continue 63 64 # Add the parameters. 65 param_vector = param_vector + list(cdp.align_tensors[i].A_5D) 66 67 # Monte Carlo simulation data structures. 68 if sim_index != None: 69 # Populations. 70 if cdp.model in ['2-domain', 'population']: 71 probs = cdp.probs_sim[sim_index] 72 73 # Euler angles. 74 if cdp.model == '2-domain': 75 alpha = cdp.alpha_sim[sim_index] 76 beta = cdp.beta_sim[sim_index] 77 gamma = cdp.gamma_sim[sim_index] 78 79 # Normal data structures. 80 else: 81 # Populations. 82 if cdp.model in ['2-domain', 'population']: 83 probs = cdp.probs 84 85 # Euler angles. 86 if cdp.model == '2-domain': 87 alpha = cdp.alpha 88 beta = cdp.beta 89 gamma = cdp.gamma 90 91 # The probabilities (exclude that of state N). 92 if cdp.model in ['2-domain', 'population']: 93 param_vector = param_vector + probs[0:-1] 94 95 # The Euler angles. 96 if cdp.model == '2-domain': 97 for i in range(cdp.N): 98 param_vector.append(alpha[i]) 99 param_vector.append(beta[i]) 100 param_vector.append(gamma[i]) 101 102 # The paramagnetic centre. 103 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed: 104 if not hasattr(cdp, 'paramagnetic_centre'): 105 for i in range(3): 106 param_vector.append(0.0) 107 elif sim_index != None: 108 if cdp.paramagnetic_centre_sim[sim_index] == None: 109 for i in range(3): 110 param_vector.append(0.0) 111 else: 112 for i in range(3): 113 param_vector.append(cdp.paramagnetic_centre_sim[sim_index][i]) 114 else: 115 for i in range(3): 116 param_vector.append(cdp.paramagnetic_centre[i]) 117 118 # Convert all None values to zero (to avoid conversion to NaN). 119 for i in range(len(param_vector)): 120 if param_vector[i] == None: 121 param_vector[i] = 0.0 122 123 # Return a numpy arrary. 124 return array(param_vector, float64)

125 126

127 -def assemble_scaling_matrix(data_types=None, scaling=True):

128 """Create and return the scaling matrix. 129 130 @keyword data_types: The base data types used in the optimisation. This list can contain 131 the elements 'rdc', 'pcs' or 'tensor'. 132 @type data_types: list of str 133 @keyword scaling: If False, then the identity matrix will be returned. 134 @type scaling: bool 135 @return: The square and diagonal scaling matrix. 136 @rtype: numpy rank-2 array 137 """ 138 139 # Initialise. 140 scaling_matrix = identity(param_num(), float64) 141 142 # Return the identity matrix. 143 if not scaling: 144 return scaling_matrix 145 146 # Starting point of the populations. 147 pop_start = 0 148 149 # Loop over the alignments. 150 tensor_num = 0 151 for i in range(len(cdp.align_tensors)): 152 # Skip non-optimised tensors. 153 if not opt_uses_tensor(cdp.align_tensors[i]): 154 continue 155 156 # Add the 5 alignment parameters. 157 pop_start = pop_start + 5 158 159 # The alignment parameters. 160 for j in range(5): 161 scaling_matrix[5*tensor_num + j, 5*tensor_num + j] = 1.0 162 163 # Increase the tensor number. 164 tensor_num += 1 165 166 # Loop over the populations, and set the scaling factor. 167 if cdp.model in ['2-domain', 'population']: 168 factor = 0.1 169 for i in range(pop_start, pop_start + (cdp.N-1)): 170 scaling_matrix[i, i] = factor 171 172 # The paramagnetic centre. 173 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed: 174 for i in range(-3, 0): 175 scaling_matrix[i, i] = 1e2 176 177 # Return the matrix. 178 return scaling_matrix

179 180

181 -def disassemble_param_vector(param_vector=None, data_types=None, sim_index=None):

182 """Disassemble the parameter vector and place the values into the relevant variables. 183 184 For the 2-domain N-state model, the parameters are stored in the probability and Euler angle data structures. For the population N-state model, only the probabilities are stored. If RDCs are present and alignment tensors are optimised, then these are stored as well. 185 186 @keyword data_types: The base data types used in the optimisation. This list can contain the elements 'rdc', 'pcs' or 'tensor'. 187 @type data_types: list of str 188 @keyword param_vector: The parameter vector returned from optimisation. 189 @type param_vector: numpy array 190 @keyword sim_index: The index of the simulation to optimise. This should be None if normal optimisation is desired. 191 @type sim_index: None or int 192 """ 193 194 # Test if the model is selected. 195 if not hasattr(cdp, 'model') or not isinstance(cdp.model, str): 196 raise RelaxNoModelError 197 198 # Unpack and strip off the alignment tensor parameters. 199 if ('rdc' in data_types or 'pcs' in data_types) and not align_tensor.all_tensors_fixed(): 200 # Loop over the alignments, adding the alignment tensor parameters to the tensor data container. 201 tensor_num = 0 202 for i in range(len(cdp.align_tensors)): 203 # Skip non-optimised tensors. 204 if not opt_uses_tensor(cdp.align_tensors[i]): 205 continue 206 207 # Normal tensors. 208 if sim_index == None: 209 cdp.align_tensors[i].set(param='Axx', value=param_vector[5*tensor_num]) 210 cdp.align_tensors[i].set(param='Ayy', value=param_vector[5*tensor_num+1]) 211 cdp.align_tensors[i].set(param='Axy', value=param_vector[5*tensor_num+2]) 212 cdp.align_tensors[i].set(param='Axz', value=param_vector[5*tensor_num+3]) 213 cdp.align_tensors[i].set(param='Ayz', value=param_vector[5*tensor_num+4]) 214 215 # Monte Carlo simulated tensors. 216 else: 217 cdp.align_tensors[i].set(param='Axx', value=param_vector[5*tensor_num], category='sim', sim_index=sim_index) 218 cdp.align_tensors[i].set(param='Ayy', value=param_vector[5*tensor_num+1], category='sim', sim_index=sim_index) 219 cdp.align_tensors[i].set(param='Axy', value=param_vector[5*tensor_num+2], category='sim', sim_index=sim_index) 220 cdp.align_tensors[i].set(param='Axz', value=param_vector[5*tensor_num+3], category='sim', sim_index=sim_index) 221 cdp.align_tensors[i].set(param='Ayz', value=param_vector[5*tensor_num+4], category='sim', sim_index=sim_index) 222 223 # Increase the tensor number. 224 tensor_num += 1 225 226 # Create a new parameter vector without the tensors. 227 param_vector = param_vector[5*tensor_num:] 228 229 # Alias the Monte Carlo simulation data structures. 230 if sim_index != None: 231 # Populations. 232 if cdp.model in ['2-domain', 'population']: 233 probs = cdp.probs_sim[sim_index] 234 235 # Euler angles. 236 if cdp.model == '2-domain': 237 alpha = cdp.alpha_sim[sim_index] 238 beta = cdp.beta_sim[sim_index] 239 gamma = cdp.gamma_sim[sim_index] 240 241 # Alias the normal data structures. 242 else: 243 # Populations. 244 if cdp.model in ['2-domain', 'population']: 245 probs = cdp.probs 246 247 # Euler angles. 248 if cdp.model == '2-domain': 249 alpha = cdp.alpha 250 beta = cdp.beta 251 gamma = cdp.gamma 252 253 # Set the probabilities for states 0 to N-1 in the aliased structures. 254 if cdp.model in ['2-domain', 'population']: 255 for i in range(cdp.N-1): 256 probs[i] = param_vector[i] 257 258 # The probability for state N. 259 probs[-1] = 1 - sum(probs[0:-1]) 260 261 # Set the Euler angles in the aliased structures. 262 if cdp.model == '2-domain': 263 for i in range(cdp.N): 264 alpha[i] = param_vector[cdp.N-1 + 3*i] 265 beta[i] = param_vector[cdp.N-1 + 3*i + 1] 266 gamma[i] = param_vector[cdp.N-1 + 3*i + 2] 267 268 # The paramagnetic centre. 269 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed: 270 # Create the structure if needed. 271 if not hasattr(cdp, 'paramagnetic_centre'): 272 cdp.paramagnetic_centre = zeros(3, float64) 273 274 # The position. 275 if sim_index == None: 276 cdp.paramagnetic_centre[0] = param_vector[-3] 277 cdp.paramagnetic_centre[1] = param_vector[-2] 278 cdp.paramagnetic_centre[2] = param_vector[-1] 279 280 # Monte Carlo simulated positions. 281 else: 282 if cdp.paramagnetic_centre_sim[sim_index] == None: 283 cdp.paramagnetic_centre_sim[sim_index] = [None, None, None] 284 cdp.paramagnetic_centre_sim[sim_index][0] = param_vector[-3] 285 cdp.paramagnetic_centre_sim[sim_index][1] = param_vector[-2] 286 cdp.paramagnetic_centre_sim[sim_index][2] = param_vector[-1]

287 288

289 -def elim_no_prob():

290 """Remove all structures or states which have no probability.""" 291 292 # Test if the current data pipe exists. 293 pipes.test() 294 295 # Test if the model is setup. 296 if not hasattr(cdp, 'model'): 297 raise RelaxNoModelError('N-state') 298 299 # Test if there are populations. 300 if not hasattr(cdp, 'probs'): 301 raise RelaxError("The N-state model populations do not exist.") 302 303 # Create the data structure if needed. 304 if not hasattr(cdp, 'select_models'): 305 cdp.state_select = [True] * cdp.N 306 307 # Loop over the structures. 308 for i in range(len(cdp.N)): 309 # No probability. 310 if cdp.probs[i] < 1e-5: 311 cdp.state_select[i] = False

312 313

314 -def linear_constraints(data_types=None, scaling_matrix=None):

315 """Function for setting up the linear constraint matrices A and b. 316 317 Standard notation 318 ================= 319 320 The N-state model constraints are:: 321 322 0 <= pc <= 1, 323 324 where p is the probability and c corresponds to state c. 325 326 327 Matrix notation 328 =============== 329 330 In the notation A.x >= b, where A is an matrix of coefficients, x is an array of parameter 331 values, and b is a vector of scalars, these inequality constraints are:: 332 333 | 1 0 0 | | 0 | 334 | | | | 335 |-1 0 0 | | -1 | 336 | | | | 337 | 0 1 0 | | 0 | 338 | | | p0 | | | 339 | 0 -1 0 | | | | -1 | 340 | | . | p1 | >= | | 341 | 0 0 1 | | | | 0 | 342 | | | p2 | | | 343 | 0 0 -1 | | -1 | 344 | | | | 345 |-1 -1 -1 | | -1 | 346 | | | | 347 | 1 1 1 | | 0 | 348 349 This example is for a 4-state model, the last probability pn is not included as this 350 parameter does not exist (because the sum of pc is equal to 1). The Euler angle parameters 351 have been excluded here but will be included in the returned A and b objects. These 352 parameters simply add columns of zero to the A matrix and have no effect on b. The last two 353 rows correspond to the inequality:: 354 355 0 <= pN <= 1. 356 357 As:: 358 N-1 359 \ 360 pN = 1 - > pc, 361 /__ 362 c=1 363 364 then:: 365 366 -p1 - p2 - ... - p(N-1) >= -1, 367 368 p1 + p2 + ... + p(N-1) >= 0. 369 370 371 @keyword data_types: The base data types used in the optimisation. This list can 372 contain the elements 'rdc', 'pcs' or 'tensor'. 373 @type data_types: list of str 374 @keyword scaling_matrix: The diagonal scaling matrix. 375 @type scaling_matrix: numpy rank-2 square matrix 376 @return: The matrices A and b. 377 @rtype: tuple of len 2 of a numpy rank-2, size NxM matrix and numpy 378 rank-1, size N array 379 """ 380 381 # Starting point of the populations. 382 pop_start = 0 383 if ('rdc' in data_types or 'pcs' in data_types) and not align_tensor.all_tensors_fixed(): 384 # Loop over the alignments. 385 for i in range(len(cdp.align_tensors)): 386 # Skip non-optimised tensors. 387 if not opt_uses_tensor(cdp.align_tensors[i]): 388 continue 389 390 # Add 5 parameters. 391 pop_start += 5 392 393 # Initialisation (0..j..m). 394 A = [] 395 b = [] 396 zero_array = zeros(param_num(), float64) 397 i = pop_start 398 j = 0 399 400 # Probability parameters. 401 if cdp.model in ['2-domain', 'population']: 402 # Loop over the prob parameters (N - 1, because the sum of pc is 1). 403 for k in range(cdp.N - 1): 404 # 0 <= pc <= 1. 405 A.append(zero_array * 0.0) 406 A.append(zero_array * 0.0) 407 A[j][i] = 1.0 408 A[j+1][i] = -1.0 409 b.append(0.0) 410 b.append(-1.0 / scaling_matrix[i, i]) 411 j = j + 2 412 413 # Increment i. 414 i = i + 1 415 416 # Add the inequalities for pN. 417 A.append(zero_array * 0.0) 418 A.append(zero_array * 0.0) 419 for i in range(pop_start, pop_start+cdp.N-1): 420 A[-2][i] = -1.0 421 A[-1][i] = 1.0 422 b.append(-1.0 / scaling_matrix[i, i]) 423 b.append(0.0) 424 425 # Convert to numpy data structures. 426 A = array(A, float64) 427 b = array(b, float64) 428 429 # Return the constraint objects. 430 return A, b

431 432

433 -def number_of_states(N=None):

434 """Set the number of states in the N-state model. 435 436 @param N: The number of states. 437 @type N: int 438 """ 439 440 # Test if the current data pipe exists. 441 pipes.test() 442 443 # Set the value of N. 444 cdp.N = N 445 446 # Update the model, if set. 447 if hasattr(cdp, 'model'): 448 update_model()

449 450

451 -def param_model_index(param):

452 """Return the N-state model index for the given parameter. 453 454 @param param: The N-state model parameter. 455 @type param: str 456 @return: The N-state model index. 457 @rtype: str 458 """ 459 460 # Probability. 461 if search('^p[0-9]*$', param): 462 return int(param[1:]) 463 464 # Alpha Euler angle. 465 if search('^alpha', param): 466 return int(param[5:]) 467 468 # Beta Euler angle. 469 if search('^beta', param): 470 return int(param[4:]) 471 472 # Gamma Euler angle. 473 if search('^gamma', param): 474 return int(param[5:]) 475 476 # Model independent parameter. 477 return None

478 479

480 -def param_num():

481 """Determine the number of parameters in the model. 482 483 @return: The number of model parameters. 484 @rtype: int 485 """ 486 487 # Determine the data type. 488 data_types = base_data_types() 489 490 # Init. 491 num = 0 492 493 # Alignment tensor params. 494 if ('rdc' in data_types or 'pcs' in data_types) and not align_tensor.all_tensors_fixed(): 495 # Loop over the alignments. 496 for i in range(len(cdp.align_tensors)): 497 # Skip non-optimised tensors. 498 if not opt_uses_tensor(cdp.align_tensors[i]): 499 continue 500 501 # Add 5 tensor parameters. 502 num += 5 503 504 # Populations. 505 if cdp.model in ['2-domain', 'population']: 506 num = num + (cdp.N - 1) 507 508 # Euler angles. 509 if cdp.model == '2-domain': 510 num = num + 3*cdp.N 511 512 # The paramagnetic centre. 513 if hasattr(cdp, 'paramag_centre_fixed') and not cdp.paramag_centre_fixed: 514 num = num + 3 515 516 # Return the param number. 517 return num

518 519

520 -def ref_domain(ref=None):

521 """Set the reference domain for the '2-domain' N-state model. 522 523 @param ref: The reference domain. 524 @type ref: str 525 """ 526 527 # Test if the current data pipe exists. 528 pipes.test() 529 530 # Test if the model is setup. 531 if not hasattr(cdp, 'model'): 532 raise RelaxNoModelError('N-state') 533 534 # Test that the correct model is set. 535 if cdp.model != '2-domain': 536 raise RelaxError("Setting the reference domain is only possible for the '2-domain' N-state model.") 537 538 # Test if the reference domain exists. 539 exists = False 540 for tensor_cont in cdp.align_tensors: 541 if tensor_cont.domain == ref: 542 exists = True 543 if not exists: 544 raise RelaxError("The reference domain cannot be found within any of the loaded tensors.") 545 546 # Set the reference domain. 547 cdp.ref_domain = ref 548 549 # Update the model. 550 update_model()

551 552

553 -def select_model(model=None):

554 """Select the N-state model type. 555 556 @param model: The N-state model type. Can be one of '2-domain', 'population', or 'fixed'. 557 @type model: str 558 """ 559 560 # Test if the current data pipe exists. 561 pipes.test() 562 563 # Test if the model name exists. 564 if not model in ['2-domain', 'population', 'fixed']: 565 raise RelaxError("The model name " + repr(model) + " is invalid.") 566 567 # Test if the model is setup. 568 if hasattr(cdp, 'model'): 569 warn(RelaxWarning("The N-state model has already been set up. Switching from model '%s' to '%s'." % (cdp.model, model))) 570 571 # Set the model 572 cdp.model = model 573 574 # Initialise the list of model parameters. 575 cdp.params = [] 576 577 # Update the model. 578 update_model()

579 580

581 -def update_model():

582 """Update the model parameters as necessary.""" 583 584 # Initialise the list of model parameters. 585 if not hasattr(cdp, 'params'): 586 cdp.params = [] 587 588 # Determine the number of states (loaded as structural models), if not already set. 589 if not hasattr(cdp, 'N'): 590 # Set the number. 591 if hasattr(cdp, 'structure'): 592 cdp.N = cdp.structure.num_models() 593 594 # Otherwise return as the rest cannot be updated without N. 595 else: 596 return 597 598 # Set up the parameter arrays. 599 if not cdp.params: 600 # Add the probability or population weight parameters. 601 if cdp.model in ['2-domain', 'population']: 602 for i in range(cdp.N-1): 603 cdp.params.append('p' + repr(i)) 604 605 # Add the Euler angle parameters. 606 if cdp.model == '2-domain': 607 for i in range(cdp.N): 608 cdp.params.append('alpha' + repr(i)) 609 cdp.params.append('beta' + repr(i)) 610 cdp.params.append('gamma' + repr(i)) 611 612 # Initialise the probability and Euler angle arrays. 613 if cdp.model in ['2-domain', 'population']: 614 if not hasattr(cdp, 'probs'): 615 cdp.probs = [None] * cdp.N 616 if cdp.model == '2-domain': 617 if not hasattr(cdp, 'alpha'): 618 cdp.alpha = [None] * cdp.N 619 if not hasattr(cdp, 'beta'): 620 cdp.beta = [None] * cdp.N 621 if not hasattr(cdp, 'gamma'): 622 cdp.gamma = [None] * cdp.N 623 624 # Determine the data type. 625 data_types = base_data_types() 626 627 # Set up tensors for each alignment. 628 if hasattr(cdp, 'align_ids'): 629 for id in cdp.align_ids: 630 # No tensors initialised. 631 if not hasattr(cdp, 'align_tensors'): 632 align_tensor.init(tensor=id, align_id=id, params=[0.0, 0.0, 0.0, 0.0, 0.0]) 633 634 # Find if the tensor corresponding to the id exists. 635 exists = False 636 for tensor in cdp.align_tensors: 637 if id == tensor.align_id: 638 exists = True 639 640 # Initialise the tensor. 641 if not exists: 642 align_tensor.init(tensor=id, align_id=id, params=[0.0, 0.0, 0.0, 0.0, 0.0])

643

Source Code for Module specific_analyses.n_state_model.parameters