minimise.method_of

33 """The method of multipliers, also known as the augmented Lagrangian method. 34 35 Page 515 from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999, 2nd ed. 36 37 The algorithm is: 38 39 Given u0 > 0, tolerance t0 > 0, starting points x0s and lambda0 40 while 1: 41 Find an approximate minimiser xk of LA(.,lambdak; uk), starting at xks, and terminating when 42 the augmented Lagrangian gradient <= tk 43 Final convergence test 44 Update Lagrange multipliers using formula 17.58 45 Choose new penalty parameter uk+1 within (0, uk) 46 Set starting point for the next iteration to xk+1s = xk 47 k = k + 1 48 49 50 Three types of inequality constraint are supported. These are linear, bound, and general 51 constraints and must be setup as follows. The vector x is the vector of model parameters. 52 Don't use bound constriants yet as this code is incomplete! 53 54 Equality constraints are currently unimplemented. 55 56 57 Linear constraints 58 ~~~~~~~~~~~~~~~~~~ 59 60 These are defined as: 61 62 A.x >= b 63 64 where: 65 A is an m*n matrix where the rows are the transposed vectors, ai, of length n. The elements 66 of ai are the coefficients of the model parameters. 67 68 x is the vector of model parameters of dimension n. 69 70 b is the vector of scalars of dimension m. 71 72 m is the number of constraints. 73 74 n is the number of model parameters. 75 76 eg if 0 <= q <= 1, q >= 1 - 2r, and 0 <= r, then: 77 78 | 1 0 | | 0 | 79 | | | | 80 |-1 0 | | q | | -1 | 81 | | . | | >= | | 82 | 1 2 | | r | | 1 | 83 | | | | 84 | 0 1 | | 2 | 85 86 To use linear constraints both the matrix A and vector b need to be supplied. 87 88 89 Bound constraints 90 ~~~~~~~~~~~~~~~~~ 91 92 Bound constraints are defined as: 93 94 l <= x <= u 95 96 where l and u are the vectors of lower and upper bounds respectively. 97 98 eg if 0 <= q <= 1, r >= 0, s <= 3, then: 99 100 | 0 | | q | | 1 | 101 | 0 | <= | r | <= | inf | 102 |-inf | | s | | 3 | 103 104 To use bound constraints both vectors l and u need to be supplied. 105 106 107 General constraints 108 ~~~~~~~~~~~~~~~~~~~ 109 110 These are defined as: 111 112 ci(x) >= 0 113 114 where ci(x) are the constraint functions. 115 116 To use general constrains the functions c, dc, and d2c need to be supplied. The function c is 117 the constraint function which should return the vector of constraint values. The function dc is 118 the constraint gradient function which should return the matrix of constraint gradients. The 119 function d2c is the constraint Hessian function which should return the 3D matrix of constraint 120 Hessians. 121 122 123 Initial values 124 ~~~~~~~~~~~~~~ 125 126 These are the default initial values: 127 128 mu0 = 1e-5 129 epsilon0 = 1e-2 130 gamma0 = 1e-2 131 scale_mu = 0.5 132 scale_epsilon = 1e-2 133 scale_gamma = 1e-2 134 """ 135 136 if print_flag: 137 print "\n" 138 print "Method of Multipliers" 139 print "~~~~~~~~~~~~~~~~~~~~~" 140 min = Method_of_multipliers(func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag) 141 if min.init_failure: 142 return None 143 results = min.minimise() 144 return results

149 - def __init__(self, func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag):

150 """Class for Newton minimisation specific functions. 151 152 Unless you know what you are doing, you should call the function 'method_of_multipliers' 153 rather than using this class. 154 """ 155 156 # Import the 'generic_minimise' function from 'generic.py'. It is important that 157 # this import statment occurs here otherwise a recursive import between the module 158 # 'generic' and this module occurs. This means that the function 'generic_minimise' 159 # has not been initialised and is therefore not in the namespace. 160 from generic import generic_minimise 161 self.generic_minimise = generic_minimise 162 163 # Linear constraints. 164 if A != None and b != None: 165 self.A = A 166 self.b = b 167 self.constraint_linear = Constraint_linear(self.A, self.b) 168 self.c = self.constraint_linear.func 169 self.dc = self.constraint_linear.dfunc 170 self.d2c = None 171 self.func_d2LA = self.func_d2LA_simple 172 self.m = len(self.b) 173 if print_flag >= 2: 174 print "Linear constraint matrices." 175 print "A:\n" + `self.A` 176 print "b:\n" + `self.b` 177 178 # Bound constraints. 179 elif l != None and u != None: 180 print "Bound constraints are not implemented yet." 181 self.init_failure = 1 182 return 183 self.l = l 184 self.u = u 185 #self.bound_constraint = bound_constraint(self.l, self.u) 186 #self.c = self.bound_constraint.func 187 #self.dc = self.bound_constraint.dfunc 188 #self.d2c = None 189 self.m = 2.0*len(self.l) 190 191 # General constraints. 192 elif c != None and dc != None and d2c != None: 193 self.c = c 194 self.dc = dc 195 self.d2c = d2c 196 197 # Incorrectly supplied constraints. 198 else: 199 print "The constraints have been incorrectly supplied." 200 self.init_failure = 1 201 return 202 203 # min_options. 204 if len(min_options) == 0: 205 print "The unconstrained minimisation algorithm has not been specified." 206 self.init_failure = 1 207 return 208 self.min_algor = min_options[0] 209 self.min_options = min_options[1:] 210 211 # Function arguments. 212 self.args = args 213 self.func = func 214 self.dfunc = dfunc 215 self.d2func = d2func 216 self.xk = x0 217 self.mu = mu0 218 self.epsilon = epsilon0 219 self.gamma = gamma0 220 self.scale_mu = scale_mu 221 self.scale_epsilon = scale_epsilon 222 self.scale_gamma = scale_gamma 223 self.func_tol = func_tol 224 self.grad_tol = grad_tol 225 self.maxiter = maxiter 226 self.inner_maxiter = inner_maxiter 227 self.full_output = full_output 228 self.print_flag = print_flag 229 230 # Set the print prefix to nothing. 231 self.print_prefix = "" 232 233 # Initialisation failure flag. 234 self.init_failure = 0 235 236 # Initialise the function, gradient, and Hessian evaluation counters. 237 self.f_count = 0 238 self.g_count = 0 239 self.h_count = 0 240 241 # Initialise the warning string. 242 self.warning = None 243 244 # Initial Lagrange multipliers. 245 if lambda0 == None: 246 self.lambda_k = zeros(self.m, Float64) 247 self.ck = apply(self.c, (self.xk,)+args) 248 for i in xrange(self.m): 249 #self.lambda_k[i] = init_lambda 250 if self.ck[i] <= 0.0: 251 self.lambda_k[i] = init_lambda 252 else: 253 self.lambda_k = lambda0 254 255 # Initialise data structures. 256 self.test_str = zeros(self.m) 257 self.L = apply(self.func_LA, (self.xk,)+self.args) 258 259 # Set the convergence test function. 260 self.setup_conv_tests()

261 262

263 - def func_LA(self, *args):

264 """The augmented Lagrangian function. 265 266 The equation is: 267 268 L(x, lambda_k; muk) = f(x) + sum(psi(ci(x), lambdai_k; muk)) 269 270 where: 271 272 / -s.t + t^2/(2m) if t - ms <= 0, 273 psi(t, s; m) = < 274 \ -ms^2/2 otherwise. 275 """ 276 277 # Calculate the function and constraint values. 278 self.fk = L = apply(self.func, (args[0],)+args[1:]) 279 self.ck = apply(self.c, (args[0],)) 280 281 # Calculate the quadratic augmented Lagrangian value. 282 for i in xrange(self.m): 283 if self.ck[i] <= self.mu * self.lambda_k[i]: 284 L = L - self.lambda_k[i] * self.ck[i] + 0.5 * self.ck[i]**2 / self.mu 285 self.test_str[i] = 1 286 else: 287 L = L - 0.5 * self.mu * self.lambda_k[i]**2 288 self.test_str[i] = 0 289 290 if self.print_flag >= 4: 291 print "" 292 print "\taug Lagr value: " + `L` 293 print "\tfunction value: " + `self.fk` 294 print "\tck: " + `self.ck` 295 print "\tMu: " + `self.mu` 296 print "\tck - mu.lambda_k: " + `self.ck - self.mu * self.lambda_k` 297 print "\tlambda_k - ck/mu: " + `self.lambda_k - self.ck / self.mu` 298 print "\tepsilon: " + `self.epsilon` 299 print "\tgamma: " + `self.gamma` 300 print "\tLagrange multipliers: " + `self.lambda_k` 301 print "\tTest structure: " + `self.test_str` 302 303 return L

304 305

306 - def func_dLA(self, *args):

307 """The augmented Lagrangian gradient.""" 308 309 # Calculate the function and constraint gradients. 310 dfk = dL = apply(self.dfunc, (args[0],)+args[1:]) 311 self.dck = apply(self.dc, (args[0],)) 312 313 # Calculate the quadratic augmented Lagrangian gradient. 314 for i in xrange(self.m): 315 if self.test_str[i]: 316 dL = dL - (self.lambda_k[i] - self.ck[i] / self.mu) * self.dck[i] 317 318 if self.print_flag >= 4: 319 print "" 320 print "\taug Lagr grad: " + `dL` 321 print "\tfunction grad: " + `dfk` 322 print "\tdck: " + `self.dck` 323 print "\tTest structure: " + `self.test_str` 324 325 return dL

326 327

328 - def func_d2LA(self, *args):

329 """The quadratic augmented Lagrangian Hessian.""" 330 331 # Calculate the function and constraint Hessians. 332 d2L = apply(self.d2func, (args[0],)+args[1:]) 333 self.d2ck = apply(self.d2c, (args[0],)) 334 335 # Calculate the quadratic augmented Lagrangian Hessian. 336 for i in xrange(self.m): 337 if self.test_str[i]: 338 d2L = d2L + outerproduct(self.dck[i], self.dck[i]) / self.mu - (self.lambda_k[i] - self.ck[i] / self.mu) * self.d2ck[i] 339 340 return d2L

341 342

343 - def func_d2LA_simple(self, *args):

344 """The augmented Lagrangian Hessian. 345 346 This function has been simplified by assuming that the constraint Hessian is zero. 347 """ 348 349 # Calculate the function Hessians. 350 d2L = d2fk = apply(self.d2func, (args[0],)+args[1:]) 351 352 # Calculate the quadratic augmented Lagrangian Hessian. 353 for i in xrange(self.m): 354 if self.test_str[i]: 355 d2L = d2L + outerproduct(self.dck[i], self.dck[i]) / self.mu 356 357 if self.print_flag >= 4: 358 print "" 359 print "\taug Lagr Hess: " + `d2L` 360 print "\tfunction Hess: " + `d2fk` 361 print "\tTest structure: " + `self.test_str` 362 363 return d2L

364 365

366 - def minimise(self):

367 """Method of multipliers algorithm.""" 368 369 # Start the iteration counters. 370 self.k = 0 371 self.j = 0 372 373 # Sub-algorithm print out. 374 sub_print_flag = self.print_flag 375 if sub_print_flag >= 2: 376 sub_print_flag = sub_print_flag - 1 377 378 # Iterate until the local minima is found. 379 while 1: 380 # Print out. 381 if self.print_flag: 382 print "\n%-3s%-8i%-4s%-65s%-4s%-20s" % ("k:", self.k, "xk:", `self.xk`, "fk:", `self.fk`) 383 if self.print_flag >= 2: 384 self.printout() 385 print "Entering sub-algorithm." 386 387 # Calculate the augmented Lagrangian gradient tolerance. 388 self.tk = min(self.epsilon, self.gamma*sqrt(dot(self.ck, self.ck))) 389 390 # Maximum number of iterations for the sub-loop. 391 if self.maxiter - self.j < self.inner_maxiter: 392 maxiter = self.maxiter - self.j 393 else: 394 maxiter = self.inner_maxiter 395 396 # Unconstrained minimisation sub-loop. 397 results = self.generic_minimise(func=self.func_LA, dfunc=self.func_dLA, d2func=self.func_d2LA, args=self.args, x0=self.xk, min_algor=self.min_algor, min_options=self.min_options, func_tol=None, grad_tol=self.tk, maxiter=maxiter, full_output=1, print_flag=sub_print_flag, print_prefix="\t") 398 if results == None: 399 return 400 401 # Unpack and sort the results. 402 self.xk_new, self.L_new, j, f, g, h, self.temp_warning = results 403 self.j, self.f_count, self.g_count, self.h_count = self.j + j, self.f_count + f, self.g_count + g, self.h_count + h 404 #if self.warning != None: 405 # break 406 407 # Maximum number of iteration test. 408 if self.j >= self.maxiter - 1: 409 self.warning = "Maximum number of iterations reached" 410 break 411 412 # Convergence test. 413 if not hasattr(self, 'dL'): 414 self.dL = apply(self.func_dLA, (self.xk_new,)+self.args) 415 if self.conv_test(self.L_new, self.L, self.dL): 416 break 417 418 # Lagrange multiplier update function. 419 # The update is given by the following formula: 420 # lambdai_k+1 = max(lambdai_k - ci(xk)/mu, 0) 421 self.ck = apply(self.c, (self.xk_new,)+self.args) 422 for i in xrange(self.m): 423 self.lambda_k[i] = max(self.lambda_k[i] - self.ck[i] / self.mu, 0.0) 424 425 # Update mu, epsilon, and gamma. 426 self.mu = self.scale_mu * self.mu 427 self.epsilon = self.scale_epsilon * self.epsilon 428 self.gamma = self.scale_gamma * self.gamma 429 if self.mu < 1e-99: 430 self.warning = "Mu too small." 431 break 432 433 # Iteration counter update. 434 self.xk = self.xk_new * 1.0 435 self.L = self.L_new 436 self.k = self.k + 1 437 438 if self.print_flag >= 2: 439 self.printout() 440 441 # Return. 442 if self.full_output: 443 try: 444 self.fk = apply(self.func, (self.xk_new,)+self.args) 445 return self.xk_new, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning 446 except AttributeError: 447 self.fk = apply(self.func, (self.xk,)+self.args) 448 return self.xk, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning 449 else: 450 try: 451 return self.xk_new 452 except AttributeError: 453 return self.xk

454 455

456 - def printout(self):

457 """Function to print out various data structures.""" 458 459 print "aug Lagr value: " + `self.L` 460 print "function value: " + `self.fk` 461 print "ck: " + `self.ck` 462 print "Mu: " + `self.mu` 463 print "ck - mu.lambda_k: " + `self.ck - self.mu * self.lambda_k` 464 print "epsilon: " + `self.epsilon` 465 print "gamma: " + `self.gamma` 466 print "Lagrange multipliers: " + `self.lambda_k` 467 print "Test structure: " + `self.test_str`

Source Code for Module minimise.method_of_multipliers