1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 from Numeric import Float64, dot, outerproduct, sqrt, zeros
25 from re import match
26
27
28 from constraint_linear import Constraint_linear
29 from base_classes import Min
30
31
32 -def method_of_multipliers(func=None, dfunc=None, d2func=None, args=(), x0=None, min_options=(), A=None, b=None, l=None, u=None, c=None, dc=None, d2c=None, lambda0=None, init_lambda=1e4, mu0=1e-5, epsilon0=1e-2, gamma0=1e-2, scale_mu=0.5, scale_epsilon=1e-2, scale_gamma=1e-2, func_tol=1e-25, grad_tol=None, maxiter=1e6, inner_maxiter=500, full_output=0, print_flag=0):
33 """The method of multipliers, also known as the augmented Lagrangian method.
34
35 Page 515 from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999, 2nd ed.
36
37 The algorithm is:
38
39 Given u0 > 0, tolerance t0 > 0, starting points x0s and lambda0
40 while 1:
41 Find an approximate minimiser xk of LA(.,lambdak; uk), starting at xks, and terminating when
42 the augmented Lagrangian gradient <= tk
43 Final convergence test
44 Update Lagrange multipliers using formula 17.58
45 Choose new penalty parameter uk+1 within (0, uk)
46 Set starting point for the next iteration to xk+1s = xk
47 k = k + 1
48
49
50 Three types of inequality constraint are supported. These are linear, bound, and general
51 constraints and must be setup as follows. The vector x is the vector of model parameters.
52 Don't use bound constriants yet as this code is incomplete!
53
54 Equality constraints are currently unimplemented.
55
56
57 Linear constraints
58 ~~~~~~~~~~~~~~~~~~
59
60 These are defined as:
61
62 A.x >= b
63
64 where:
65 A is an m*n matrix where the rows are the transposed vectors, ai, of length n. The elements
66 of ai are the coefficients of the model parameters.
67
68 x is the vector of model parameters of dimension n.
69
70 b is the vector of scalars of dimension m.
71
72 m is the number of constraints.
73
74 n is the number of model parameters.
75
76 eg if 0 <= q <= 1, q >= 1 - 2r, and 0 <= r, then:
77
78 | 1 0 | | 0 |
79 | | | |
80 |-1 0 | | q | | -1 |
81 | | . | | >= | |
82 | 1 2 | | r | | 1 |
83 | | | |
84 | 0 1 | | 2 |
85
86 To use linear constraints both the matrix A and vector b need to be supplied.
87
88
89 Bound constraints
90 ~~~~~~~~~~~~~~~~~
91
92 Bound constraints are defined as:
93
94 l <= x <= u
95
96 where l and u are the vectors of lower and upper bounds respectively.
97
98 eg if 0 <= q <= 1, r >= 0, s <= 3, then:
99
100 | 0 | | q | | 1 |
101 | 0 | <= | r | <= | inf |
102 |-inf | | s | | 3 |
103
104 To use bound constraints both vectors l and u need to be supplied.
105
106
107 General constraints
108 ~~~~~~~~~~~~~~~~~~~
109
110 These are defined as:
111
112 ci(x) >= 0
113
114 where ci(x) are the constraint functions.
115
116 To use general constrains the functions c, dc, and d2c need to be supplied. The function c is
117 the constraint function which should return the vector of constraint values. The function dc is
118 the constraint gradient function which should return the matrix of constraint gradients. The
119 function d2c is the constraint Hessian function which should return the 3D matrix of constraint
120 Hessians.
121
122
123 Initial values
124 ~~~~~~~~~~~~~~
125
126 These are the default initial values:
127
128 mu0 = 1e-5
129 epsilon0 = 1e-2
130 gamma0 = 1e-2
131 scale_mu = 0.5
132 scale_epsilon = 1e-2
133 scale_gamma = 1e-2
134 """
135
136 if print_flag:
137 print "\n"
138 print "Method of Multipliers"
139 print "~~~~~~~~~~~~~~~~~~~~~"
140 min = Method_of_multipliers(func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag)
141 if min.init_failure:
142 return None
143 results = min.minimise()
144 return results
145
146
147
149 - def __init__(self, func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag):
150 """Class for Newton minimisation specific functions.
151
152 Unless you know what you are doing, you should call the function 'method_of_multipliers'
153 rather than using this class.
154 """
155
156
157
158
159
160 from generic import generic_minimise
161 self.generic_minimise = generic_minimise
162
163
164 if A != None and b != None:
165 self.A = A
166 self.b = b
167 self.constraint_linear = Constraint_linear(self.A, self.b)
168 self.c = self.constraint_linear.func
169 self.dc = self.constraint_linear.dfunc
170 self.d2c = None
171 self.func_d2LA = self.func_d2LA_simple
172 self.m = len(self.b)
173 if print_flag >= 2:
174 print "Linear constraint matrices."
175 print "A:\n" + `self.A`
176 print "b:\n" + `self.b`
177
178
179 elif l != None and u != None:
180 print "Bound constraints are not implemented yet."
181 self.init_failure = 1
182 return
183 self.l = l
184 self.u = u
185
186
187
188
189 self.m = 2.0*len(self.l)
190
191
192 elif c != None and dc != None and d2c != None:
193 self.c = c
194 self.dc = dc
195 self.d2c = d2c
196
197
198 else:
199 print "The constraints have been incorrectly supplied."
200 self.init_failure = 1
201 return
202
203
204 if len(min_options) == 0:
205 print "The unconstrained minimisation algorithm has not been specified."
206 self.init_failure = 1
207 return
208 self.min_algor = min_options[0]
209 self.min_options = min_options[1:]
210
211
212 self.args = args
213 self.func = func
214 self.dfunc = dfunc
215 self.d2func = d2func
216 self.xk = x0
217 self.mu = mu0
218 self.epsilon = epsilon0
219 self.gamma = gamma0
220 self.scale_mu = scale_mu
221 self.scale_epsilon = scale_epsilon
222 self.scale_gamma = scale_gamma
223 self.func_tol = func_tol
224 self.grad_tol = grad_tol
225 self.maxiter = maxiter
226 self.inner_maxiter = inner_maxiter
227 self.full_output = full_output
228 self.print_flag = print_flag
229
230
231 self.print_prefix = ""
232
233
234 self.init_failure = 0
235
236
237 self.f_count = 0
238 self.g_count = 0
239 self.h_count = 0
240
241
242 self.warning = None
243
244
245 if lambda0 == None:
246 self.lambda_k = zeros(self.m, Float64)
247 self.ck = apply(self.c, (self.xk,)+args)
248 for i in xrange(self.m):
249
250 if self.ck[i] <= 0.0:
251 self.lambda_k[i] = init_lambda
252 else:
253 self.lambda_k = lambda0
254
255
256 self.test_str = zeros(self.m)
257 self.L = apply(self.func_LA, (self.xk,)+self.args)
258
259
260 self.setup_conv_tests()
261
262
264 """The augmented Lagrangian function.
265
266 The equation is:
267
268 L(x, lambda_k; muk) = f(x) + sum(psi(ci(x), lambdai_k; muk))
269
270 where:
271
272 / -s.t + t^2/(2m) if t - ms <= 0,
273 psi(t, s; m) = <
274 \ -ms^2/2 otherwise.
275 """
276
277
278 self.fk = L = apply(self.func, (args[0],)+args[1:])
279 self.ck = apply(self.c, (args[0],))
280
281
282 for i in xrange(self.m):
283 if self.ck[i] <= self.mu * self.lambda_k[i]:
284 L = L - self.lambda_k[i] * self.ck[i] + 0.5 * self.ck[i]**2 / self.mu
285 self.test_str[i] = 1
286 else:
287 L = L - 0.5 * self.mu * self.lambda_k[i]**2
288 self.test_str[i] = 0
289
290 if self.print_flag >= 4:
291 print ""
292 print "\taug Lagr value: " + `L`
293 print "\tfunction value: " + `self.fk`
294 print "\tck: " + `self.ck`
295 print "\tMu: " + `self.mu`
296 print "\tck - mu.lambda_k: " + `self.ck - self.mu * self.lambda_k`
297 print "\tlambda_k - ck/mu: " + `self.lambda_k - self.ck / self.mu`
298 print "\tepsilon: " + `self.epsilon`
299 print "\tgamma: " + `self.gamma`
300 print "\tLagrange multipliers: " + `self.lambda_k`
301 print "\tTest structure: " + `self.test_str`
302
303 return L
304
305
307 """The augmented Lagrangian gradient."""
308
309
310 dfk = dL = apply(self.dfunc, (args[0],)+args[1:])
311 self.dck = apply(self.dc, (args[0],))
312
313
314 for i in xrange(self.m):
315 if self.test_str[i]:
316 dL = dL - (self.lambda_k[i] - self.ck[i] / self.mu) * self.dck[i]
317
318 if self.print_flag >= 4:
319 print ""
320 print "\taug Lagr grad: " + `dL`
321 print "\tfunction grad: " + `dfk`
322 print "\tdck: " + `self.dck`
323 print "\tTest structure: " + `self.test_str`
324
325 return dL
326
327
329 """The quadratic augmented Lagrangian Hessian."""
330
331
332 d2L = apply(self.d2func, (args[0],)+args[1:])
333 self.d2ck = apply(self.d2c, (args[0],))
334
335
336 for i in xrange(self.m):
337 if self.test_str[i]:
338 d2L = d2L + outerproduct(self.dck[i], self.dck[i]) / self.mu - (self.lambda_k[i] - self.ck[i] / self.mu) * self.d2ck[i]
339
340 return d2L
341
342
344 """The augmented Lagrangian Hessian.
345
346 This function has been simplified by assuming that the constraint Hessian is zero.
347 """
348
349
350 d2L = d2fk = apply(self.d2func, (args[0],)+args[1:])
351
352
353 for i in xrange(self.m):
354 if self.test_str[i]:
355 d2L = d2L + outerproduct(self.dck[i], self.dck[i]) / self.mu
356
357 if self.print_flag >= 4:
358 print ""
359 print "\taug Lagr Hess: " + `d2L`
360 print "\tfunction Hess: " + `d2fk`
361 print "\tTest structure: " + `self.test_str`
362
363 return d2L
364
365
367 """Method of multipliers algorithm."""
368
369
370 self.k = 0
371 self.j = 0
372
373
374 sub_print_flag = self.print_flag
375 if sub_print_flag >= 2:
376 sub_print_flag = sub_print_flag - 1
377
378
379 while 1:
380
381 if self.print_flag:
382 print "\n%-3s%-8i%-4s%-65s%-4s%-20s" % ("k:", self.k, "xk:", `self.xk`, "fk:", `self.fk`)
383 if self.print_flag >= 2:
384 self.printout()
385 print "Entering sub-algorithm."
386
387
388 self.tk = min(self.epsilon, self.gamma*sqrt(dot(self.ck, self.ck)))
389
390
391 if self.maxiter - self.j < self.inner_maxiter:
392 maxiter = self.maxiter - self.j
393 else:
394 maxiter = self.inner_maxiter
395
396
397 results = self.generic_minimise(func=self.func_LA, dfunc=self.func_dLA, d2func=self.func_d2LA, args=self.args, x0=self.xk, min_algor=self.min_algor, min_options=self.min_options, func_tol=None, grad_tol=self.tk, maxiter=maxiter, full_output=1, print_flag=sub_print_flag, print_prefix="\t")
398 if results == None:
399 return
400
401
402 self.xk_new, self.L_new, j, f, g, h, self.temp_warning = results
403 self.j, self.f_count, self.g_count, self.h_count = self.j + j, self.f_count + f, self.g_count + g, self.h_count + h
404
405
406
407
408 if self.j >= self.maxiter - 1:
409 self.warning = "Maximum number of iterations reached"
410 break
411
412
413 if not hasattr(self, 'dL'):
414 self.dL = apply(self.func_dLA, (self.xk_new,)+self.args)
415 if self.conv_test(self.L_new, self.L, self.dL):
416 break
417
418
419
420
421 self.ck = apply(self.c, (self.xk_new,)+self.args)
422 for i in xrange(self.m):
423 self.lambda_k[i] = max(self.lambda_k[i] - self.ck[i] / self.mu, 0.0)
424
425
426 self.mu = self.scale_mu * self.mu
427 self.epsilon = self.scale_epsilon * self.epsilon
428 self.gamma = self.scale_gamma * self.gamma
429 if self.mu < 1e-99:
430 self.warning = "Mu too small."
431 break
432
433
434 self.xk = self.xk_new * 1.0
435 self.L = self.L_new
436 self.k = self.k + 1
437
438 if self.print_flag >= 2:
439 self.printout()
440
441
442 if self.full_output:
443 try:
444 self.fk = apply(self.func, (self.xk_new,)+self.args)
445 return self.xk_new, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning
446 except AttributeError:
447 self.fk = apply(self.func, (self.xk,)+self.args)
448 return self.xk, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning
449 else:
450 try:
451 return self.xk_new
452 except AttributeError:
453 return self.xk
454
455
457 """Function to print out various data structures."""
458
459 print "aug Lagr value: " + `self.L`
460 print "function value: " + `self.fk`
461 print "ck: " + `self.ck`
462 print "Mu: " + `self.mu`
463 print "ck - mu.lambda_k: " + `self.ck - self.mu * self.lambda_k`
464 print "epsilon: " + `self.epsilon`
465 print "gamma: " + `self.gamma`
466 print "Lagrange multipliers: " + `self.lambda_k`
467 print "Test structure: " + `self.test_str`
468