1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for performing a principle component analysis (PCA)."""
24
25
26 from numpy import array, dot, float64, outer, sqrt, zeros
27 from numpy.linalg import eigh, svd
28
29
30 from lib.errors import RelaxError
31 from lib.structure.statistics import calc_mean_structure
32
33
35 """Calculate the covariance matrix for the structures.
36
37 @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates.
38 @type coord: list of numpy rank-2, Nx3 arrays
39 @keyword weights: The weights for each structure.
40 @type weights: list of float
41 @return: The covariance matrix and the deviation matrix.
42 @rtype: numpy rank-2 3Nx3N array, numpy rank-2 MxNx3 array
43 """
44
45
46 M = len(coord)
47 N = len(coord[0])
48 if weights is None:
49 weights = ones(M, float64)
50 else:
51 weights = array(weights, float64)
52 covariance_matrix = zeros((N*3, N*3), float64)
53 deviations = zeros((M, N, 3), float64)
54 mean_struct = zeros((N, 3), float64)
55
56
57 calc_mean_structure(coord, mean_struct, weights=weights)
58
59
60 for i in range(M):
61
62 deviations[i] = coord[i] - mean_struct
63
64
65 covariance_matrix += weights[i] * (outer(deviations[i], deviations[i]))
66
67
68 covariance_matrix /= weights.sum()
69
70
71 return covariance_matrix, deviations
72
73
75 """Calculate the PCA projections.
76
77 @keyword num_modes: The number of PCA modes to calculate.
78 @type num_modes: int
79 """
80
81
82 -def pca_analysis(coord=None, weights=None, algorithm='eigen', num_modes=4):
83 """Perform the PCA analysis.
84
85 @keyword coord: The list of coordinates of all models to superimpose. The first index is the models, the second is the atomic positions, and the third is the xyz coordinates.
86 @type coord: list of numpy rank-2, Nx3 arrays
87 @keyword weights: The weights for each structure.
88 @type weights: list of float
89 @keyword algorithm: The PCA algorithm to use (either 'eigen' or 'svd').
90 @type algorithm: str
91 @keyword num_modes: The number of PCA modes to calculate.
92 @type num_modes: int
93 @return: The PCA values and vectors, and the per structure projections.
94 @rtype: numpy rank-1 array, numpy rank-3 array, numpy rank2 array
95 """
96
97
98 M = len(coord)
99 N = len(coord[0])
100
101
102 covariance_matrix, deviations = calc_covariance_matrix(coord, weights=weights)
103
104
105 if algorithm == 'eigen':
106 text = 'eigenvalues'
107 values, vectors = eigh(covariance_matrix)
108
109
110 indices = values.argsort()[::-1]
111 values = values[indices]
112 vectors = vectors[:, indices]
113
114
115 elif algorithm == 'svd':
116 text = 'singular values'
117 vectors, values, V = svd(covariance_matrix)
118
119
120 else:
121 raise RelaxError("The '%s' algorithm is unknown. It should be either 'eigen' or 'svd'." % algorithm)
122
123
124 print("\nThe %s in Angstrom are:" % text)
125 for i in range(num_modes):
126 print("Mode %i: %15.5f" % (i+1, values[i]))
127
128
129 proj = zeros((num_modes, M), float64)
130 for s in range(M):
131 for mode in range(num_modes):
132 proj[mode, s] = dot(vectors[:, mode], deviations[s].reshape(N*3))
133
134
135 values = values[:num_modes]
136 vectors = vectors[:,:num_modes].reshape((3, N, num_modes))
137
138
139 return values, vectors, proj
140