minfx.method_of_multipliers

40 """The method of multipliers, also known as the augmented Lagrangian method. 41 42 Page 515 from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999, 2nd ed. The algorithm is: 43 44 - Given u0 > 0, tolerance t0 > 0, starting points x0s and lambda0 45 - while 1: 46 - Find an approximate minimiser xk of LA(.,lambdak; uk), starting at xks, and terminating when the augmented Lagrangian gradient <= tk 47 - Final convergence test 48 - Update Lagrange multipliers using formula 17.58 49 - Choose new penalty parameter uk+1 within (0, uk) 50 - Set starting point for the next iteration to xk+1s = xk 51 - k = k + 1 52 53 54 Three types of inequality constraint are supported. These are linear, bound, and general constraints and must be setup as follows. The vector x is the vector of model parameters. Don't use bound constraints yet as this code is incomplete! 55 56 Equality constraints are currently unimplemented. 57 58 59 Linear constraints 60 ================== 61 62 These are defined as:: 63 64 A.x >= b 65 66 where: 67 68 - A is an m*n matrix where the rows are the transposed vectors, ai, of length n. The elements of ai are the coefficients of the model parameters. 69 - x is the vector of model parameters of dimension n. 70 - b is the vector of scalars of dimension m. 71 - m is the number of constraints. 72 - n is the number of model parameters. 73 74 E.g. if 0 <= q <= 1, q >= 1 - 2r, and 0 <= r, then:: 75 76 | 1 0 | | 0 | 77 | | | | 78 |-1 0 | | q | | -1 | 79 | | . | | >= | | 80 | 1 2 | | r | | 1 | 81 | | | | 82 | 0 1 | | 2 | 83 84 To use linear constraints both the matrix A and vector b need to be supplied. 85 86 87 Bound constraints 88 ================= 89 90 Bound constraints are defined as:: 91 92 l <= x <= u, 93 94 where l and u are the vectors of lower and upper bounds respectively. E.g. if 0 <= q <= 1, r >= 0, s <= 3, then:: 95 96 | 0 | | q | | 1 | 97 | 0 | <= | r | <= | inf | 98 |-inf | | s | | 3 | 99 100 To use bound constraints both vectors l and u need to be supplied. 101 102 103 General constraints 104 =================== 105 106 These are defined as:: 107 108 ci(x) >= 0, 109 110 where ci(x) are the constraint functions. 111 112 To use general constrains the functions c, dc, and d2c need to be supplied. The function c is the constraint function which should return the vector of constraint values. The function dc is the constraint gradient function which should return the matrix of constraint gradients. The function d2c is the constraint Hessian function which should return the 3D matrix of constraint Hessians. 113 114 115 Initial values 116 ============== 117 118 These are the default initial values:: 119 120 mu0 = 1e-5 121 epsilon0 = 1e-2 122 gamma0 = 1e-2 123 scale_mu = 0.5 124 scale_epsilon = 1e-2 125 scale_gamma = 1e-2 126 """ 127 128 if print_flag: 129 print("\n") 130 print("Method of Multipliers") 131 print("~~~~~~~~~~~~~~~~~~~~~") 132 min = Method_of_multipliers(func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag) 133 if min.init_failure: 134 return None 135 results = min.minimise() 136 return results

141 - def __init__(self, func, dfunc, d2func, args, x0, min_options, A, b, l, u, c, dc, d2c, lambda0, init_lambda, mu0, epsilon0, gamma0, scale_mu, scale_epsilon, scale_gamma, func_tol, grad_tol, maxiter, inner_maxiter, full_output, print_flag):

142 """Class for Newton minimisation specific functions. 143 144 Unless you know what you are doing, you should call the function 'method_of_multipliers' rather than using this class. 145 """ 146 147 # Import the 'generic_minimise' function from 'generic.py'. It is important that 148 # this import statment occurs here otherwise a recursive import between the module 149 # 'generic' and this module occurs. This means that the function 'generic_minimise' 150 # has not been initialised and is therefore not in the namespace. 151 from minfx.generic import generic_minimise 152 self.generic_minimise = generic_minimise 153 154 # Linear constraints. 155 if A != None and b != None: 156 self.A = A 157 self.b = b 158 self.constraint_linear = Constraint_linear(self.A, self.b) 159 self.c = self.constraint_linear.func 160 self.dc = self.constraint_linear.dfunc 161 self.d2c = None 162 self.func_d2LA = self.func_d2LA_simple 163 self.m = len(self.b) 164 if print_flag >= 2: 165 print("Linear constraint matrices.") 166 print("A:\n" + repr(self.A)) 167 print("b:\n" + repr(self.b)) 168 169 # Check for the essential gradient function. 170 if dfunc == None: 171 print("The essential gradient function has not been supplied.") 172 self.init_failure = 1 173 return 174 175 # Bound constraints. 176 elif l != None and u != None: 177 print("Bound constraints are not implemented yet.") 178 self.init_failure = 1 179 return 180 self.l = l 181 self.u = u 182 #self.bound_constraint = bound_constraint(self.l, self.u) 183 #self.c = self.bound_constraint.func 184 #self.dc = self.bound_constraint.dfunc 185 #self.d2c = None 186 self.m = 2.0*len(self.l) 187 188 # General constraints. 189 elif c != None and dc != None and d2c != None: 190 self.c = c 191 self.dc = dc 192 self.d2c = d2c 193 194 # Incorrectly supplied constraints. 195 else: 196 print("The constraints have been incorrectly supplied.") 197 self.init_failure = 1 198 return 199 200 # min_options. 201 if len(min_options) == 0: 202 print("The unconstrained minimisation algorithm has not been specified.") 203 self.init_failure = 1 204 return 205 self.min_algor = min_options[0] 206 self.min_options = min_options[1:] 207 208 # Function arguments. 209 self.args = args 210 self.func = func 211 self.dfunc = dfunc 212 self.d2func = d2func 213 self.xk = x0 214 self.mu = mu0 215 self.epsilon = epsilon0 216 self.gamma = gamma0 217 self.scale_mu = scale_mu 218 self.scale_epsilon = scale_epsilon 219 self.scale_gamma = scale_gamma 220 self.func_tol = func_tol 221 self.grad_tol = grad_tol 222 self.maxiter = maxiter 223 self.inner_maxiter = inner_maxiter 224 self.full_output = full_output 225 self.print_flag = print_flag 226 227 # Set the print_prefix to nothing. 228 self.print_prefix = "" 229 230 # Initialisation failure flag. 231 self.init_failure = 0 232 233 # Initialise the function, gradient, and Hessian evaluation counters. 234 self.f_count = 0 235 self.g_count = 0 236 self.h_count = 0 237 238 # Initialise the warning string. 239 self.warning = None 240 241 # Initial Lagrange multipliers. 242 if lambda0 == None: 243 self.lambda_k = zeros(self.m, float64) 244 self.ck = self.c(*(self.xk,)+args) 245 for i in range(self.m): 246 #self.lambda_k[i] = init_lambda 247 if self.ck[i] <= 0.0: 248 self.lambda_k[i] = init_lambda 249 else: 250 self.lambda_k = lambda0 251 252 # Initialise data structures. 253 self.test_str = zeros(self.m) 254 self.L = self.func_LA(*(self.xk,)+self.args) 255 256 # Set the convergence test function. 257 self.setup_conv_tests()

258 259

260 - def func_LA(self, *args):

261 """The augmented Lagrangian function. 262 263 The equation is:: 264 265 L(x, lambda_k; muk) = f(x) + sum(psi(ci(x), lambdai_k; muk)) 266 267 where:: 268 269 / -s.t + t^2/(2m) if t - ms <= 0, 270 psi(t, s; m) = < 271 \ -ms^2/2 otherwise. 272 """ 273 274 # Calculate the function and constraint values. 275 self.fk = L = self.func(*(args[0],)+args[1:]) 276 self.ck = self.c(*(args[0],)) 277 278 # Calculate the quadratic augmented Lagrangian value. 279 for i in range(self.m): 280 if self.ck[i] <= self.mu * self.lambda_k[i]: 281 L = L - self.lambda_k[i] * self.ck[i] + 0.5 * self.ck[i]**2 / self.mu 282 self.test_str[i] = 1 283 else: 284 L = L - 0.5 * self.mu * self.lambda_k[i]**2 285 self.test_str[i] = 0 286 287 if self.print_flag >= 4: 288 print("") 289 print("\taug Lagr value: " + repr(L)) 290 print("\tfunction value: " + repr(self.fk)) 291 print("\tck: " + repr(self.ck)) 292 print("\tMu: " + repr(self.mu)) 293 print("\tck - mu.lambda_k: " + repr(self.ck - self.mu * self.lambda_k)) 294 print("\tlambda_k - ck/mu: " + repr(self.lambda_k - self.ck / self.mu)) 295 print("\tepsilon: " + repr(self.epsilon)) 296 print("\tgamma: " + repr(self.gamma)) 297 print("\tLagrange multipliers: " + repr(self.lambda_k)) 298 print("\tTest structure: " + repr(self.test_str)) 299 300 return L

301 302

303 - def func_dLA(self, *args):

304 """The augmented Lagrangian gradient.""" 305 306 # Calculate the function and constraint gradients. 307 dfk = dL = self.dfunc(*(args[0],)+args[1:]) 308 self.dck = self.dc(*(args[0],)) 309 310 # Calculate the quadratic augmented Lagrangian gradient. 311 for i in range(self.m): 312 if self.test_str[i]: 313 dL = dL - (self.lambda_k[i] - self.ck[i] / self.mu) * self.dck[i] 314 315 if self.print_flag >= 4: 316 print("") 317 print("\taug Lagr grad: " + repr(dL)) 318 print("\tfunction grad: " + repr(dfk)) 319 print("\tdck: " + repr(self.dck)) 320 print("\tTest structure: " + repr(self.test_str)) 321 322 return dL

323 324

325 - def func_d2LA(self, *args):

326 """The quadratic augmented Lagrangian Hessian.""" 327 328 # Calculate the function and constraint Hessians. 329 d2L = self.d2func(*(args[0],)+args[1:]) 330 self.d2ck = self.d2c(*(args[0],)) 331 332 # Calculate the quadratic augmented Lagrangian Hessian. 333 for i in range(self.m): 334 if self.test_str[i]: 335 d2L = d2L + outer(self.dck[i], self.dck[i]) / self.mu - (self.lambda_k[i] - self.ck[i] / self.mu) * self.d2ck[i] 336 337 return d2L

338 339

340 - def func_d2LA_simple(self, *args):

341 """The augmented Lagrangian Hessian. 342 343 This function has been simplified by assuming that the constraint Hessian is zero. 344 """ 345 346 # Calculate the function Hessians. 347 d2L = d2fk = self.d2func(*(args[0],)+args[1:]) 348 349 # Calculate the quadratic augmented Lagrangian Hessian. 350 for i in range(self.m): 351 if self.test_str[i]: 352 d2L = d2L + outer(self.dck[i], self.dck[i]) / self.mu 353 354 if self.print_flag >= 4: 355 print("") 356 print("\taug Lagr Hess: " + repr(d2L)) 357 print("\tfunction Hess: " + repr(d2fk)) 358 print("\tTest structure: " + repr(self.test_str)) 359 360 return d2L

361 362

363 - def minimise(self):

364 """Method of multipliers algorithm.""" 365 366 # Start the iteration counters. 367 self.k = 0 368 self.j = 0 369 370 # Sub-algorithm printout. 371 sub_print_flag = self.print_flag 372 if sub_print_flag >= 2: 373 sub_print_flag = sub_print_flag - 1 374 375 # Iterate until the local minima is found. 376 while True: 377 # Print out. 378 if self.print_flag: 379 print_iter(self.k, self.xk, self.fk) 380 if self.print_flag >= 2: 381 self.printout() 382 print("Entering sub-algorithm.") 383 384 # Calculate the augmented Lagrangian gradient tolerance. 385 self.tk = min(self.epsilon, self.gamma*sqrt(dot(self.ck, self.ck))) 386 387 # Maximum number of iterations for the sub-loop. 388 if self.maxiter - self.j < self.inner_maxiter: 389 maxiter = self.maxiter - self.j 390 else: 391 maxiter = self.inner_maxiter 392 393 # Unconstrained minimisation sub-loop. 394 results = self.generic_minimise(func=self.func_LA, dfunc=self.func_dLA, d2func=self.func_d2LA, args=self.args, x0=self.xk, min_algor=self.min_algor, min_options=self.min_options, func_tol=None, grad_tol=self.tk, maxiter=maxiter, full_output=1, print_flag=sub_print_flag, print_prefix="\t") 395 if results == None: 396 return 397 398 # Unpack and sort the results. 399 self.xk_new, self.L_new, j, f, g, h, self.temp_warning = results 400 self.j, self.f_count, self.g_count, self.h_count = self.j + j, self.f_count + f, self.g_count + g, self.h_count + h 401 #if self.warning != None: 402 # break 403 404 # Maximum number of iteration test. 405 if self.j >= self.maxiter - 1: 406 self.warning = "Maximum number of iterations reached" 407 break 408 409 # Convergence test. 410 if not hasattr(self, 'dL'): 411 self.dL = self.func_dLA(*(self.xk_new,)+self.args) 412 if self.conv_test(self.L_new, self.L, self.dL): 413 break 414 415 # Lagrange multiplier update function. 416 # The update is given by the following formula: 417 # lambdai_k+1 = max(lambdai_k - ci(xk)/mu, 0) 418 self.ck = self.c(*(self.xk_new,)+self.args) 419 for i in range(self.m): 420 self.lambda_k[i] = max(self.lambda_k[i] - self.ck[i] / self.mu, 0.0) 421 422 # Update mu, epsilon, and gamma. 423 self.mu = self.scale_mu * self.mu 424 self.epsilon = self.scale_epsilon * self.epsilon 425 self.gamma = self.scale_gamma * self.gamma 426 if self.mu < 1e-99: 427 self.warning = "Mu too small." 428 break 429 430 # Iteration counter update. 431 self.xk = self.xk_new * 1.0 432 self.L = self.L_new 433 self.k = self.k + 1 434 435 if self.print_flag >= 2: 436 self.printout() 437 438 # Return. 439 if self.full_output: 440 try: 441 self.fk = self.func(*(self.xk_new,)+self.args) 442 return self.xk_new, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning 443 except AttributeError: 444 self.fk = self.func(*(self.xk,)+self.args) 445 return self.xk, self.fk, self.j, self.f_count, self.g_count, self.h_count, self.warning 446 else: 447 try: 448 return self.xk_new 449 except AttributeError: 450 return self.xk

451 452

453 - def printout(self):

454 """Function to print out various data structures.""" 455 456 print("aug Lagr value: " + repr(self.L)) 457 print("function value: " + repr(self.fk)) 458 print("ck: " + repr(self.ck)) 459 print("Mu: " + repr(self.mu)) 460 print("ck - mu.lambda_k: " + repr(self.ck - self.mu * self.lambda_k)) 461 print("epsilon: " + repr(self.epsilon)) 462 print("gamma: " + repr(self.gamma)) 463 print("Lagrange multipliers: " + repr(self.lambda_k)) 464 print("Test structure: " + repr(self.test_str))

Source Code for Module minfx.method_of_multipliers