Package lib :: Package structure :: Module pca
[hide private]
[frames] | no frames]

Source Code for Module lib.structure.pca

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2015 Edward d'Auvergne                                        # 
  4  #                                                                             # 
  5  # This file is part of the program relax (http://www.nmr-relax.com).          # 
  6  #                                                                             # 
  7  # This program is free software: you can redistribute it and/or modify        # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation, either version 3 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # This program is distributed in the hope that it will be useful,             # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with this program.  If not, see <http://www.gnu.org/licenses/>.       # 
 19  #                                                                             # 
 20  ############################################################################### 
 21   
 22  # Module docstring. 
 23  """Module for performing a principle component analysis (PCA).""" 
 24   
 25  # Python module imports. 
 26  from numpy import array, dot, float64, outer, sqrt, zeros 
 27  from numpy.linalg import eigh, svd 
 28   
 29  # relax library module imports. 
 30  from lib.errors import RelaxError 
 31  from lib.structure.statistics import calc_mean_structure 
 32   
 33   
34 -def calc_covariance_matrix(coord=None, weights=None):
35 """Calculate the covariance matrix for the structures. 36 37 @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates. 38 @type coord: list of numpy rank-2, Nx3 arrays 39 @keyword weights: The weights for each structure. 40 @type weights: list of float 41 @return: The covariance matrix and the deviation matrix. 42 @rtype: numpy rank-2 3Nx3N array, numpy rank-2 MxNx3 array 43 """ 44 45 # Init. 46 M = len(coord) 47 N = len(coord[0]) 48 if weights is None: 49 weights = ones(M, float64) 50 else: 51 weights = array(weights, float64) 52 covariance_matrix = zeros((N*3, N*3), float64) 53 deviations = zeros((M, N, 3), float64) 54 mean_struct = zeros((N, 3), float64) 55 56 # Calculate the mean structure. 57 calc_mean_structure(coord, mean_struct, weights=weights) 58 59 # Loop over the models. 60 for i in range(M): 61 # The deviations from the mean. 62 deviations[i] = coord[i] - mean_struct 63 64 # Sum the covariance element. 65 covariance_matrix += weights[i] * (outer(deviations[i], deviations[i])) 66 67 # Average. 68 covariance_matrix /= weights.sum() 69 70 # Return the matrix. 71 return covariance_matrix, deviations
72 73
74 -def calc_projections(coord=None, num_modes=4):
75 """Calculate the PCA projections. 76 77 @keyword num_modes: The number of PCA modes to calculate. 78 @type num_modes: int 79 """
80 81
82 -def pca_analysis(coord=None, weights=None, algorithm='eigen', num_modes=4):
83 """Perform the PCA analysis. 84 85 @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates. 86 @type coord: list of numpy rank-2, Nx3 arrays 87 @keyword weights: The weights for each structure. 88 @type weights: list of float 89 @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd'). 90 @type algorithm: str 91 @keyword num_modes: The number of PCA modes to calculate. 92 @type num_modes: int 93 @return: The PCA values and vectors, and the per structure projections. 94 @rtype: numpy rank-1 array, numpy rank-3 array, numpy rank2 array 95 """ 96 97 # Init. 98 M = len(coord) 99 N = len(coord[0]) 100 101 # Calculate the covariance matrix for the structures. 102 covariance_matrix, deviations = calc_covariance_matrix(coord, weights=weights) 103 104 # Perform an eigenvalue decomposition of the covariance matrix. 105 if algorithm == 'eigen': 106 text = 'eigenvalues' 107 values, vectors = eigh(covariance_matrix) 108 109 # Sort the values and vectors. 110 indices = values.argsort()[::-1] 111 values = values[indices] 112 vectors = vectors[:, indices] 113 114 # Perform a singular value decomposition of the covariance matrix. 115 elif algorithm == 'svd': 116 text = 'singular values' 117 vectors, values, V = svd(covariance_matrix) 118 119 # Invalid algorithm. 120 else: 121 raise RelaxError("The '%s' algorithm is unknown. It should be either 'eigen' or 'svd'." % algorithm) 122 123 # Printout. 124 print("\nThe %s in Angstrom are:" % text) 125 for i in range(num_modes): 126 print("Mode %i: %15.5f" % (i+1, values[i])) 127 128 # Calculate the projection for each structure. 129 proj = zeros((num_modes, M), float64) 130 for s in range(M): 131 for mode in range(num_modes): 132 proj[mode, s] = dot(vectors[:, mode], deviations[s].reshape(N*3)) 133 134 # Truncation to the desired number of modes. 135 values = values[:num_modes] 136 vectors = vectors[:,:num_modes].reshape((3, N, num_modes)) 137 138 # Return the results. 139 return values, vectors, proj
140