minimise.line_search.more

1 ############################################################################### 2 # # 3 # Copyright (C) 2003 Edward d'Auvergne # 4 # # 5 # This file is part of the program relax. # 6 # # 7 # relax is free software; you can redistribute it and/or modify # 8 # it under the terms of the GNU General Public License as published by # 9 # the Free Software Foundation; either version 2 of the License, or # 10 # (at your option) any later version. # 11 # # 12 # relax is distributed in the hope that it will be useful, # 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 # GNU General Public License for more details. # 16 # # 17 # You should have received a copy of the GNU General Public License # 18 # along with relax; if not, write to the Free Software # 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 20 # # 21 ############################################################################### 22 23 24 import sys 25 from math import sqrt 26 from Numeric import copy, dot 27 28 from interpolate import cubic_int, cubic_ext, quadratic_fafbga, quadratic_gagb 29 30 cubic = cubic_int 31 quadratic = quadratic_fafbga 32 secant = quadratic_gagb 33 34

35 -def more_thuente(func, func_prime, args, x, f, g, p, a_init=1.0, a_min=1e-25, a_max=None, a_tol=1e-10, phi_min=-1e3, mu=0.001, eta=0.1, print_flag=0):

36 """A line search algorithm from More and Thuente. 37 38 More, J. J., and Thuente, D. J. 1994, Line search algorithms with guaranteed sufficient 39 decrease. ACM Trans. Math. Softw. 20, 286-307. 40 41 42 Internal variables 43 ~~~~~~~~~~~~~~~~~~ 44 45 a0, the null sequence data structure containing the following keys: 46 'a' - 0 47 'phi' - phi(0) 48 'phi_prime' - phi'(0) 49 50 a, the sequence data structure containing the following keys: 51 'a' - alpha 52 'phi' - phi(alpha) 53 'phi_prime' - phi'(alpha) 54 55 Ik, the interval data structure containing the following keys: 56 'a' - The current interval Ik = [al, au] 57 'phi' - The interval [phi(al), phi(au)] 58 'phi_prime' - The interval [phi'(al), phi'(au)] 59 60 Instead of using the modified function: 61 psi(a) = phi(a) - phi(0) - a.phi'(0), 62 the function: 63 psi(a) = phi(a) - a.phi'(0), 64 was used as the phi(0) component has no effect on the results. 65 """ 66 67 # Initialise values. 68 k = 0 69 f_count = 0 70 g_count = 0 71 mod_flag = 1 72 bracketed = 0 73 a0 = {} 74 a0['a'] = 0.0 75 a0['phi'] = f 76 a0['phi_prime'] = dot(g, p) 77 if not a_min: 78 a_min = 0.0 79 if not a_max: 80 a_max = 4.0*max(1.0,a_init) 81 Ik_lim = [0.0, 5.0*a_init] 82 width = a_max - a_min 83 width2 = 2.0*width 84 85 # Initialise sequence data. 86 a = {} 87 a['a'] = a_init 88 a['phi'] = apply(func, (x + a['a']*p,)+args) 89 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p) 90 f_count = f_count + 1 91 g_count = g_count + 1 92 93 # Initialise interval data. 94 Ik = {} 95 Ik['a'] = [0.0, 0.0] 96 Ik['phi'] = [a0['phi'], a0['phi']] 97 Ik['phi_prime'] = [a0['phi_prime'], a0['phi_prime']] 98 99 if print_flag: 100 print "\n<Line search initial values>" 101 print_data("Pre", -1, a0, Ik, Ik_lim) 102 103 # Test for errors. 104 if a0['phi_prime'] > 0.0: 105 if print_flag: 106 print "xk = " + `x` 107 print "fk = " + `f` 108 print "dfk = " + `g` 109 print "pk = " + `p` 110 print "dot(dfk, pk) = " + `dot(g, p)` 111 print "a0['phi_prime'] = " + `a0['phi_prime']` 112 raise NameError, "The gradient at point 0 of this line search is positive, ie p is not a descent direction and the line search will not work." 113 if a['a'] < a_min: 114 raise NameError, "Alpha is less than alpha_min, " + `a['a']` + " > " + `a_min` 115 if a['a'] > a_max: 116 raise NameError, "Alpha is greater than alpha_max, " + `a['a']` + " > " + `a_max` 117 118 while 1: 119 if print_flag: 120 print "\n<Line search iteration k = " + `k+1` + " >" 121 print "Bracketed: " + `bracketed` 122 print_data("Initial", k, a, Ik, Ik_lim) 123 124 # Test values. 125 curv = mu * a0['phi_prime'] 126 suff_dec = a0['phi'] + a['a'] * curv 127 128 # Modification flag, 0 - phi, 1 - psi. 129 if mod_flag: 130 if a['phi'] <= suff_dec and a['phi_prime'] >= 0.0: 131 mod_flag = 0 132 133 # Test for convergence using the strong Wolfe conditions. 134 if print_flag: 135 print "Testing for convergence using the strong Wolfe conditions." 136 if a['phi'] <= suff_dec and abs(a['phi_prime']) <= eta * abs(a0['phi_prime']): 137 if print_flag: 138 print "\tYes." 139 print "<Line search has converged>\n" 140 return a['a'], f_count, g_count 141 if print_flag: 142 print "\tNo." 143 144 # Test if limits have been reached. 145 if print_flag: 146 print "Testing if limits have been reached." 147 if a['a'] == a_min: 148 if a['phi'] > suff_dec or a['phi_prime'] >= curv: 149 if print_flag: 150 print "\tYes." 151 print "<Min alpha has been reached>\n" 152 return a['a'], f_count, g_count 153 if a['a'] == a_max: 154 if a['phi'] <= suff_dec and a['phi_prime'] <= curv: 155 if print_flag: 156 print "\tYes." 157 print "<Max alpha has been reached>\n" 158 return a['a'], f_count, g_count 159 if print_flag: 160 print "\tNo." 161 162 if bracketed: 163 # Test for roundoff error. 164 if print_flag: 165 print "Testing for roundoff error." 166 if a['a'] <= Ik_lim[0] or a['a'] >= Ik_lim[1]: 167 if print_flag: 168 print "\tYes." 169 print "<Stopping due to roundoff error>\n" 170 return a['a'], f_count, g_count 171 if print_flag: 172 print "\tNo." 173 174 # Test to see if a_tol has been reached. 175 if print_flag: 176 print "Testing tol." 177 if Ik_lim[1] - Ik_lim[0] <= a_tol * Ik_lim[1]: 178 if print_flag: 179 print "\tYes." 180 print "<Stopping tol>\n" 181 return a['a'], f_count, g_count 182 if print_flag: 183 print "\tNo." 184 185 # Choose a safeguarded ak in set Ik which is a subset of [a_min, a_max], and update the interval Ik. 186 a_new = {} 187 if mod_flag and a['phi'] <= Ik['phi'][0] and a['phi'] > suff_dec: 188 if print_flag: 189 print "Choosing ak and updating the interval Ik using the modified function psi." 190 191 # Calculate the modified function values and gradients at at, al, and au. 192 psi = a['phi'] - curv * a['a'] 193 psi_l = Ik['phi'][0] - curv * Ik['a'][0] 194 psi_u = Ik['phi'][1] - curv * Ik['a'][1] 195 psi_prime = a['phi_prime'] - curv 196 psi_l_prime = Ik['phi_prime'][0] - curv 197 psi_u_prime = Ik['phi_prime'][1] - curv 198 199 a_new['a'], Ik_new, bracketed = update(a, Ik, a['a'], Ik['a'][0], Ik['a'][1], psi, psi_l, psi_u, psi_prime, psi_l_prime, psi_u_prime, bracketed, Ik_lim, print_flag=print_flag) 200 else: 201 if print_flag: 202 print "Choosing ak and updating the interval Ik using the function phi." 203 a_new['a'], Ik_new, bracketed = update(a, Ik, a['a'], Ik['a'][0], Ik['a'][1], a['phi'], Ik['phi'][0], Ik['phi'][1], a['phi_prime'], Ik['phi_prime'][0], Ik['phi_prime'][1], bracketed, Ik_lim, print_flag=print_flag) 204 205 # Bisection step. 206 if bracketed: 207 size = abs(Ik_new['a'][0] - Ik_new['a'][1]) 208 if size >= 0.66 * width2: 209 if print_flag: 210 print "Bisection step." 211 a_new['a'] = 0.5 * (Ik_new['a'][0] + Ik_new['a'][1]) 212 width2 = width 213 width = size 214 215 # Limit. 216 if print_flag: 217 print "Limiting" 218 print " Ik_lim: " + `Ik_lim` 219 if bracketed: 220 if print_flag: 221 print " Bracketed." 222 Ik_lim[0] = min(Ik_new['a'][0], Ik_new['a'][1]) 223 Ik_lim[1] = max(Ik_new['a'][0], Ik_new['a'][1]) 224 else: 225 if print_flag: 226 print " Not bracketed." 227 print " a_new['a']: " + `a_new['a']` 228 print " xtrapl: " + `1.1` 229 Ik_lim[0] = a_new['a'] + 1.1 * (a_new['a'] - Ik_new['a'][0]) 230 Ik_lim[1] = a_new['a'] + 4.0 * (a_new['a'] - Ik_new['a'][0]) 231 if print_flag: 232 print " Ik_lim: " + `Ik_lim` 233 234 if bracketed: 235 if a_new['a'] <= Ik_lim[0] or a_new['a'] >= Ik_lim[1] or Ik_lim[1] - Ik_lim[0] <= a_tol * Ik_lim[1]: 236 if print_flag: 237 print "aaa" 238 a_new['a'] = Ik['a'][0] 239 240 # The step must be between a_min and a_max. 241 if a_new['a'] < a_min: 242 if print_flag: 243 print "The step is below a_min, therefore setting the step length to a_min." 244 a_new['a'] = a_min 245 if a_new['a'] > a_max: 246 if print_flag: 247 print "The step is above a_max, therefore setting the step length to a_max." 248 a_new['a'] = a_max 249 250 # Calculate new values. 251 if print_flag: 252 print "Calculating new values." 253 a_new['phi'] = apply(func, (x + a_new['a']*p,)+args) 254 a_new['phi_prime'] = dot(apply(func_prime, (x + a_new['a']*p,)+args), p) 255 f_count = f_count + 1 256 g_count = g_count + 1 257 258 # Shift data from k+1 to k. 259 k = k + 1 260 if print_flag: 261 print "Bracketed: " + `bracketed` 262 print_data("Final", k, a_new, Ik_new, Ik_lim) 263 a = copy.deepcopy(a_new) 264 Ik = copy.deepcopy(Ik_new)

265 266 267

268 -def print_data(text, k, a, Ik, Ik_lim):

269 """Temp func for debugging.""" 270 271 print text + " data printout:" 272 print " Iteration: " + `k+1` 273 print " a: " + `a['a']` 274 print " phi: " + `a['phi']` 275 print " phi_prime: " + `a['phi_prime']` 276 print " Ik: " + `Ik['a']` 277 print " phi_I: " + `Ik['phi']` 278 print " phi_I_prime: " + `Ik['phi_prime']` 279 print " Ik_lim: " + `Ik_lim`

280 281

282 -def update(a, Ik, at, al, au, ft, fl, fu, gt, gl, gu, bracketed, Ik_lim, d=0.66, print_flag=0):

283 """Trial value selection and interval updating. 284 285 Trial value selection 286 ~~~~~~~~~~~~~~~~~~~~~ 287 288 fl, fu, ft, gl, gu, and gt are the function and gradient values at the interval end points al 289 and au, and at the trial point at. 290 ac is the minimiser of the cubic that interpolates fl, ft, gl, and gt. 291 aq is the minimiser of the quadratic that interpolates fl, ft, and gl. 292 as is the minimiser of the quadratic that interpolates fl, gl, and gt. 293 294 The trial value selection is divided into four cases. 295 296 Case 1: ft > fl. In this case compute ac and aq, and set 297 298 / ac, if |ac - al| < |aq - al|, 299 at+ = < 300 \ 1/2(aq + ac), otherwise. 301 302 303 Case 2: ft <= fl and gt.gl < 0. In this case compute ac and as, and set 304 305 / ac, if |ac - at| >= |as - at|, 306 at+ = < 307 \ as, otherwise. 308 309 310 Case 3: ft <= fl and gt.gl >= 0, and |gt| <= |gl|. In this case at+ is chosen by extrapolating 311 the function values at al and at, so the trial value at+ lies outside th interval with at and al 312 as endpoints. Compute ac and as. 313 314 If the cubic tends to infinity in the direction of the step and the minimum of the cubic is 315 beyound at, set 316 317 / ac, if |ac - at| < |as - at|, 318 at+ = < 319 \ as, otherwise. 320 321 Otherwise set at+ = as. 322 323 324 Redefine at+ by setting 325 326 / min{at + d(au - at), at+}, if at > al. 327 at+ = < 328 \ max{at + d(au - at), at+}, otherwise, 329 330 for some d < 1. 331 332 333 Case 4: ft <= fl and gt.gl >= 0, and |gt| > |gl|. In this case choose at+ as the minimiser of 334 the cubic that interpolates fu, ft, gu, and gt. 335 336 337 Interval updating 338 ~~~~~~~~~~~~~~~~~ 339 340 Given a trial value at in I, the endpoints al+ and au+ of the updated interval I+ are determined 341 as follows: 342 Case U1: If f(at) > f(al), then al+ = al and au+ = at. 343 Case U2: If f(at) <= f(al) and f'(at)(al - at) > 0, then al+ = at and au+ = au. 344 Case U3: If f(at) <= f(al) and f'(at)(al - at) < 0, then al+ = at and au+ = al. 345 """ 346 347 # Trial value selection. 348 349 # Case 1. 350 if ft > fl: 351 if print_flag: 352 print "\tat selection, case 1." 353 # The minimum is bracketed. 354 bracketed = 1 355 356 # Interpolation. 357 ac = cubic(al, at, fl, ft, gl, gt) 358 aq = quadratic(al, at, fl, ft, gl) 359 if print_flag: 360 print "\t\tac: " + `ac` 361 print "\t\taq: " + `aq` 362 363 # Return at+. 364 if abs(ac - al) < abs(aq - al): 365 if print_flag: 366 print "\t\tabs(ac - al) < abs(aq - al), " + `abs(ac - al)` + " < " + `abs(aq - al)` 367 print "\t\tat_new = ac = " + `ac` 368 at_new = ac 369 else: 370 if print_flag: 371 print "\t\tabs(ac - al) >= abs(aq - al), " + `abs(ac - al)` + " >= " + `abs(aq - al)` 372 print "\t\tat_new = 1/2(aq + ac) = " + `0.5*(aq + ac)` 373 at_new = 0.5*(aq + ac) 374 375 376 # Case 2. 377 elif gt * gl < 0.0: 378 if print_flag: 379 print "\tat selection, case 2." 380 # The minimum is bracketed. 381 bracketed = 1 382 383 # Interpolation. 384 ac = cubic(al, at, fl, ft, gl, gt) 385 as = secant(al, at, gl, gt) 386 if print_flag: 387 print "\t\tac: " + `ac` 388 print "\t\tas: " + `as` 389 390 # Return at+. 391 if abs(ac - at) >= abs(as - at): 392 if print_flag: 393 print "\t\tabs(ac - at) >= abs(as - at), " + `abs(ac - at)` + " >= " + `abs(as - at)` 394 print "\t\tat_new = ac = " + `ac` 395 at_new = ac 396 else: 397 if print_flag: 398 print "\t\tabs(ac - at) < abs(as - at), " + `abs(ac - at)` + " < " + `abs(as - at)` 399 print "\t\tat_new = as = " + `as` 400 at_new = as 401 402 403 # Case 3. 404 elif abs(gt) <= abs(gl): 405 if print_flag: 406 print "\tat selection, case 3." 407 408 # Interpolation. 409 ac, beta1, beta2 = cubic_ext(al, at, fl, ft, gl, gt, full_output=1) 410 411 if ac > at and beta2 != 0.0: 412 # Leave ac as ac. 413 if print_flag: 414 print "\t\tac > at and beta2 != 0.0" 415 elif at > al: 416 # Set ac to the upper limit. 417 if print_flag: 418 print "\t\tat > al, " + `at` + " > " + `al` 419 ac = Ik_lim[1] 420 else: 421 # Set ac to the lower limit. 422 ac = Ik_lim[0] 423 424 as = secant(al, at, gl, gt) 425 426 if print_flag: 427 print "\t\tac: " + `ac` 428 print "\t\tas: " + `as` 429 430 # Test if bracketed. 431 if bracketed: 432 if print_flag: 433 print "\t\tBracketed" 434 if abs(ac - at) < abs(as - at): 435 if print_flag: 436 print "\t\t\tabs(ac - at) < abs(as - at), " + `abs(ac - at)` + " < " + `abs(as - at)` 437 print "\t\t\tat_new = ac = " + `ac` 438 at_new = ac 439 else: 440 if print_flag: 441 print "\t\t\tabs(ac - at) >= abs(as - at), " + `abs(ac - at)` + " >= " + `abs(as - at)` 442 print "\t\t\tat_new = as = " + `as` 443 at_new = as 444 445 # Redefine at+. 446 if print_flag: 447 print "\t\tRedefining at+" 448 if at > al: 449 at_new = min(at + d*(au - at), at_new) 450 if print_flag: 451 print "\t\t\tat > al, " + `at` + " > " + `al` 452 print "\t\t\tat_new = " + `at_new` 453 else: 454 at_new = max(at + d*(au - at), at_new) 455 if print_flag: 456 print "\t\t\tat <= al, " + `at` + " <= " + `al` 457 print "\t\t\tat_new = " + `at_new` 458 else: 459 if print_flag: 460 print "\t\tNot bracketed" 461 if abs(ac - at) > abs(as - at): 462 if print_flag: 463 print "\t\t\tabs(ac - at) > abs(as - at), " + `abs(ac - at)` + " > " + `abs(as - at)` 464 print "\t\t\tat_new = ac = " + `ac` 465 at_new = ac 466 else: 467 if print_flag: 468 print "\t\t\tabs(ac - at) <= abs(as - at), " + `abs(ac - at)` + " <= " + `abs(as - at)` 469 print "\t\t\tat_new = as = " + `as` 470 at_new = as 471 472 # Check limits. 473 if print_flag: 474 print "\t\tChecking limits." 475 if at_new < Ik_lim[0]: 476 if print_flag: 477 print "\t\t\tat_new < Ik_lim[0], " + `at_new` + " < " + `Ik_lim[0]` 478 print "\t\t\tat_new = " + `Ik_lim[0]` 479 at_new = Ik_lim[0] 480 if at_new > Ik_lim[1]: 481 if print_flag: 482 print "\t\t\tat_new > Ik_lim[1], " + `at_new` + " > " + `Ik_lim[1]` 483 print "\t\t\tat_new = " + `Ik_lim[1]` 484 at_new = Ik_lim[1] 485 486 487 # Case 4. 488 else: 489 if print_flag: 490 print "\tat selection, case 4." 491 if bracketed: 492 if print_flag: 493 print "\t\tbracketed." 494 at_new = cubic(au, at, fu, ft, gu, gt) 495 if print_flag: 496 print "\t\tat_new = " + `at_new` 497 elif at > al: 498 if print_flag: 499 print "\t\tnot bracketed but at > al, " + `at` + " > " + `al` 500 print "\t\tat_new = " + `Ik_lim[1]` 501 at_new = Ik_lim[1] 502 else: 503 if print_flag: 504 print "\t\tnot bracketed but at <= al, " + `at` + " <= " + `al` 505 print "\t\tat_new = " + `Ik_lim[0]` 506 at_new = Ik_lim[0] 507 508 509 # Interval updating algorithm. 510 Ik_new = copy.deepcopy(Ik) 511 512 if ft > fl: 513 if print_flag: 514 print "\tIk update, case a, ft > fl." 515 Ik_new['a'][1] = at 516 Ik_new['phi'][1] = a['phi'] 517 Ik_new['phi_prime'][1] = a['phi_prime'] 518 elif gt*(al - at) > 0.0: 519 if print_flag: 520 print "\tIk update, case b, gt*(al - at) > 0.0." 521 Ik_new['a'][0] = at 522 Ik_new['phi'][0] = a['phi'] 523 Ik_new['phi_prime'][0] = a['phi_prime'] 524 else: 525 Ik_new['a'][0] = at 526 Ik_new['phi'][0] = a['phi'] 527 Ik_new['phi_prime'][0] = a['phi_prime'] 528 Ik_new['a'][1] = al 529 Ik_new['phi'][1] = Ik['phi'][0] 530 Ik_new['phi_prime'][1] = Ik['phi_prime'][0] 531 532 if print_flag: 533 print "\tIk update, case c." 534 print "\t\tat: " + `at` 535 print "\t\ta['phi']: " + `a['phi']` 536 print "\t\ta['phi_prime']: " + `a['phi_prime']` 537 print "\t\tIk_new['a']: " + `Ik_new['a']` 538 print "\t\tIk_new['phi']: " + `Ik_new['phi']` 539 print "\t\tIk_new['phi_prime']: " + `Ik_new['phi_prime']` 540 541 return at_new, Ik_new, bracketed

542

Source Code for Module minimise.line_search.more_thuente