1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 import sys
25 from math import sqrt
26 from Numeric import copy, dot
27
28 from interpolate import cubic_int, cubic_ext, quadratic_fafbga, quadratic_gagb
29
30 cubic = cubic_int
31 quadratic = quadratic_fafbga
32 secant = quadratic_gagb
33
34
35 -def more_thuente(func, func_prime, args, x, f, g, p, a_init=1.0, a_min=1e-25, a_max=None, a_tol=1e-10, phi_min=-1e3, mu=0.001, eta=0.1, print_flag=0):
36 """A line search algorithm from More and Thuente.
37
38 More, J. J., and Thuente, D. J. 1994, Line search algorithms with guaranteed sufficient
39 decrease. ACM Trans. Math. Softw. 20, 286-307.
40
41
42 Internal variables
43 ~~~~~~~~~~~~~~~~~~
44
45 a0, the null sequence data structure containing the following keys:
46 'a' - 0
47 'phi' - phi(0)
48 'phi_prime' - phi'(0)
49
50 a, the sequence data structure containing the following keys:
51 'a' - alpha
52 'phi' - phi(alpha)
53 'phi_prime' - phi'(alpha)
54
55 Ik, the interval data structure containing the following keys:
56 'a' - The current interval Ik = [al, au]
57 'phi' - The interval [phi(al), phi(au)]
58 'phi_prime' - The interval [phi'(al), phi'(au)]
59
60 Instead of using the modified function:
61 psi(a) = phi(a) - phi(0) - a.phi'(0),
62 the function:
63 psi(a) = phi(a) - a.phi'(0),
64 was used as the phi(0) component has no effect on the results.
65 """
66
67
68 k = 0
69 f_count = 0
70 g_count = 0
71 mod_flag = 1
72 bracketed = 0
73 a0 = {}
74 a0['a'] = 0.0
75 a0['phi'] = f
76 a0['phi_prime'] = dot(g, p)
77 if not a_min:
78 a_min = 0.0
79 if not a_max:
80 a_max = 4.0*max(1.0,a_init)
81 Ik_lim = [0.0, 5.0*a_init]
82 width = a_max - a_min
83 width2 = 2.0*width
84
85
86 a = {}
87 a['a'] = a_init
88 a['phi'] = apply(func, (x + a['a']*p,)+args)
89 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p)
90 f_count = f_count + 1
91 g_count = g_count + 1
92
93
94 Ik = {}
95 Ik['a'] = [0.0, 0.0]
96 Ik['phi'] = [a0['phi'], a0['phi']]
97 Ik['phi_prime'] = [a0['phi_prime'], a0['phi_prime']]
98
99 if print_flag:
100 print "\n<Line search initial values>"
101 print_data("Pre", -1, a0, Ik, Ik_lim)
102
103
104 if a0['phi_prime'] > 0.0:
105 if print_flag:
106 print "xk = " + `x`
107 print "fk = " + `f`
108 print "dfk = " + `g`
109 print "pk = " + `p`
110 print "dot(dfk, pk) = " + `dot(g, p)`
111 print "a0['phi_prime'] = " + `a0['phi_prime']`
112 raise NameError, "The gradient at point 0 of this line search is positive, ie p is not a descent direction and the line search will not work."
113 if a['a'] < a_min:
114 raise NameError, "Alpha is less than alpha_min, " + `a['a']` + " > " + `a_min`
115 if a['a'] > a_max:
116 raise NameError, "Alpha is greater than alpha_max, " + `a['a']` + " > " + `a_max`
117
118 while 1:
119 if print_flag:
120 print "\n<Line search iteration k = " + `k+1` + " >"
121 print "Bracketed: " + `bracketed`
122 print_data("Initial", k, a, Ik, Ik_lim)
123
124
125 curv = mu * a0['phi_prime']
126 suff_dec = a0['phi'] + a['a'] * curv
127
128
129 if mod_flag:
130 if a['phi'] <= suff_dec and a['phi_prime'] >= 0.0:
131 mod_flag = 0
132
133
134 if print_flag:
135 print "Testing for convergence using the strong Wolfe conditions."
136 if a['phi'] <= suff_dec and abs(a['phi_prime']) <= eta * abs(a0['phi_prime']):
137 if print_flag:
138 print "\tYes."
139 print "<Line search has converged>\n"
140 return a['a'], f_count, g_count
141 if print_flag:
142 print "\tNo."
143
144
145 if print_flag:
146 print "Testing if limits have been reached."
147 if a['a'] == a_min:
148 if a['phi'] > suff_dec or a['phi_prime'] >= curv:
149 if print_flag:
150 print "\tYes."
151 print "<Min alpha has been reached>\n"
152 return a['a'], f_count, g_count
153 if a['a'] == a_max:
154 if a['phi'] <= suff_dec and a['phi_prime'] <= curv:
155 if print_flag:
156 print "\tYes."
157 print "<Max alpha has been reached>\n"
158 return a['a'], f_count, g_count
159 if print_flag:
160 print "\tNo."
161
162 if bracketed:
163
164 if print_flag:
165 print "Testing for roundoff error."
166 if a['a'] <= Ik_lim[0] or a['a'] >= Ik_lim[1]:
167 if print_flag:
168 print "\tYes."
169 print "<Stopping due to roundoff error>\n"
170 return a['a'], f_count, g_count
171 if print_flag:
172 print "\tNo."
173
174
175 if print_flag:
176 print "Testing tol."
177 if Ik_lim[1] - Ik_lim[0] <= a_tol * Ik_lim[1]:
178 if print_flag:
179 print "\tYes."
180 print "<Stopping tol>\n"
181 return a['a'], f_count, g_count
182 if print_flag:
183 print "\tNo."
184
185
186 a_new = {}
187 if mod_flag and a['phi'] <= Ik['phi'][0] and a['phi'] > suff_dec:
188 if print_flag:
189 print "Choosing ak and updating the interval Ik using the modified function psi."
190
191
192 psi = a['phi'] - curv * a['a']
193 psi_l = Ik['phi'][0] - curv * Ik['a'][0]
194 psi_u = Ik['phi'][1] - curv * Ik['a'][1]
195 psi_prime = a['phi_prime'] - curv
196 psi_l_prime = Ik['phi_prime'][0] - curv
197 psi_u_prime = Ik['phi_prime'][1] - curv
198
199 a_new['a'], Ik_new, bracketed = update(a, Ik, a['a'], Ik['a'][0], Ik['a'][1], psi, psi_l, psi_u, psi_prime, psi_l_prime, psi_u_prime, bracketed, Ik_lim, print_flag=print_flag)
200 else:
201 if print_flag:
202 print "Choosing ak and updating the interval Ik using the function phi."
203 a_new['a'], Ik_new, bracketed = update(a, Ik, a['a'], Ik['a'][0], Ik['a'][1], a['phi'], Ik['phi'][0], Ik['phi'][1], a['phi_prime'], Ik['phi_prime'][0], Ik['phi_prime'][1], bracketed, Ik_lim, print_flag=print_flag)
204
205
206 if bracketed:
207 size = abs(Ik_new['a'][0] - Ik_new['a'][1])
208 if size >= 0.66 * width2:
209 if print_flag:
210 print "Bisection step."
211 a_new['a'] = 0.5 * (Ik_new['a'][0] + Ik_new['a'][1])
212 width2 = width
213 width = size
214
215
216 if print_flag:
217 print "Limiting"
218 print " Ik_lim: " + `Ik_lim`
219 if bracketed:
220 if print_flag:
221 print " Bracketed."
222 Ik_lim[0] = min(Ik_new['a'][0], Ik_new['a'][1])
223 Ik_lim[1] = max(Ik_new['a'][0], Ik_new['a'][1])
224 else:
225 if print_flag:
226 print " Not bracketed."
227 print " a_new['a']: " + `a_new['a']`
228 print " xtrapl: " + `1.1`
229 Ik_lim[0] = a_new['a'] + 1.1 * (a_new['a'] - Ik_new['a'][0])
230 Ik_lim[1] = a_new['a'] + 4.0 * (a_new['a'] - Ik_new['a'][0])
231 if print_flag:
232 print " Ik_lim: " + `Ik_lim`
233
234 if bracketed:
235 if a_new['a'] <= Ik_lim[0] or a_new['a'] >= Ik_lim[1] or Ik_lim[1] - Ik_lim[0] <= a_tol * Ik_lim[1]:
236 if print_flag:
237 print "aaa"
238 a_new['a'] = Ik['a'][0]
239
240
241 if a_new['a'] < a_min:
242 if print_flag:
243 print "The step is below a_min, therefore setting the step length to a_min."
244 a_new['a'] = a_min
245 if a_new['a'] > a_max:
246 if print_flag:
247 print "The step is above a_max, therefore setting the step length to a_max."
248 a_new['a'] = a_max
249
250
251 if print_flag:
252 print "Calculating new values."
253 a_new['phi'] = apply(func, (x + a_new['a']*p,)+args)
254 a_new['phi_prime'] = dot(apply(func_prime, (x + a_new['a']*p,)+args), p)
255 f_count = f_count + 1
256 g_count = g_count + 1
257
258
259 k = k + 1
260 if print_flag:
261 print "Bracketed: " + `bracketed`
262 print_data("Final", k, a_new, Ik_new, Ik_lim)
263 a = copy.deepcopy(a_new)
264 Ik = copy.deepcopy(Ik_new)
265
266
267
269 """Temp func for debugging."""
270
271 print text + " data printout:"
272 print " Iteration: " + `k+1`
273 print " a: " + `a['a']`
274 print " phi: " + `a['phi']`
275 print " phi_prime: " + `a['phi_prime']`
276 print " Ik: " + `Ik['a']`
277 print " phi_I: " + `Ik['phi']`
278 print " phi_I_prime: " + `Ik['phi_prime']`
279 print " Ik_lim: " + `Ik_lim`
280
281
282 -def update(a, Ik, at, al, au, ft, fl, fu, gt, gl, gu, bracketed, Ik_lim, d=0.66, print_flag=0):
283 """Trial value selection and interval updating.
284
285 Trial value selection
286 ~~~~~~~~~~~~~~~~~~~~~
287
288 fl, fu, ft, gl, gu, and gt are the function and gradient values at the interval end points al
289 and au, and at the trial point at.
290 ac is the minimiser of the cubic that interpolates fl, ft, gl, and gt.
291 aq is the minimiser of the quadratic that interpolates fl, ft, and gl.
292 as is the minimiser of the quadratic that interpolates fl, gl, and gt.
293
294 The trial value selection is divided into four cases.
295
296 Case 1: ft > fl. In this case compute ac and aq, and set
297
298 / ac, if |ac - al| < |aq - al|,
299 at+ = <
300 \ 1/2(aq + ac), otherwise.
301
302
303 Case 2: ft <= fl and gt.gl < 0. In this case compute ac and as, and set
304
305 / ac, if |ac - at| >= |as - at|,
306 at+ = <
307 \ as, otherwise.
308
309
310 Case 3: ft <= fl and gt.gl >= 0, and |gt| <= |gl|. In this case at+ is chosen by extrapolating
311 the function values at al and at, so the trial value at+ lies outside th interval with at and al
312 as endpoints. Compute ac and as.
313
314 If the cubic tends to infinity in the direction of the step and the minimum of the cubic is
315 beyound at, set
316
317 / ac, if |ac - at| < |as - at|,
318 at+ = <
319 \ as, otherwise.
320
321 Otherwise set at+ = as.
322
323
324 Redefine at+ by setting
325
326 / min{at + d(au - at), at+}, if at > al.
327 at+ = <
328 \ max{at + d(au - at), at+}, otherwise,
329
330 for some d < 1.
331
332
333 Case 4: ft <= fl and gt.gl >= 0, and |gt| > |gl|. In this case choose at+ as the minimiser of
334 the cubic that interpolates fu, ft, gu, and gt.
335
336
337 Interval updating
338 ~~~~~~~~~~~~~~~~~
339
340 Given a trial value at in I, the endpoints al+ and au+ of the updated interval I+ are determined
341 as follows:
342 Case U1: If f(at) > f(al), then al+ = al and au+ = at.
343 Case U2: If f(at) <= f(al) and f'(at)(al - at) > 0, then al+ = at and au+ = au.
344 Case U3: If f(at) <= f(al) and f'(at)(al - at) < 0, then al+ = at and au+ = al.
345 """
346
347
348
349
350 if ft > fl:
351 if print_flag:
352 print "\tat selection, case 1."
353
354 bracketed = 1
355
356
357 ac = cubic(al, at, fl, ft, gl, gt)
358 aq = quadratic(al, at, fl, ft, gl)
359 if print_flag:
360 print "\t\tac: " + `ac`
361 print "\t\taq: " + `aq`
362
363
364 if abs(ac - al) < abs(aq - al):
365 if print_flag:
366 print "\t\tabs(ac - al) < abs(aq - al), " + `abs(ac - al)` + " < " + `abs(aq - al)`
367 print "\t\tat_new = ac = " + `ac`
368 at_new = ac
369 else:
370 if print_flag:
371 print "\t\tabs(ac - al) >= abs(aq - al), " + `abs(ac - al)` + " >= " + `abs(aq - al)`
372 print "\t\tat_new = 1/2(aq + ac) = " + `0.5*(aq + ac)`
373 at_new = 0.5*(aq + ac)
374
375
376
377 elif gt * gl < 0.0:
378 if print_flag:
379 print "\tat selection, case 2."
380
381 bracketed = 1
382
383
384 ac = cubic(al, at, fl, ft, gl, gt)
385 as = secant(al, at, gl, gt)
386 if print_flag:
387 print "\t\tac: " + `ac`
388 print "\t\tas: " + `as`
389
390
391 if abs(ac - at) >= abs(as - at):
392 if print_flag:
393 print "\t\tabs(ac - at) >= abs(as - at), " + `abs(ac - at)` + " >= " + `abs(as - at)`
394 print "\t\tat_new = ac = " + `ac`
395 at_new = ac
396 else:
397 if print_flag:
398 print "\t\tabs(ac - at) < abs(as - at), " + `abs(ac - at)` + " < " + `abs(as - at)`
399 print "\t\tat_new = as = " + `as`
400 at_new = as
401
402
403
404 elif abs(gt) <= abs(gl):
405 if print_flag:
406 print "\tat selection, case 3."
407
408
409 ac, beta1, beta2 = cubic_ext(al, at, fl, ft, gl, gt, full_output=1)
410
411 if ac > at and beta2 != 0.0:
412
413 if print_flag:
414 print "\t\tac > at and beta2 != 0.0"
415 elif at > al:
416
417 if print_flag:
418 print "\t\tat > al, " + `at` + " > " + `al`
419 ac = Ik_lim[1]
420 else:
421
422 ac = Ik_lim[0]
423
424 as = secant(al, at, gl, gt)
425
426 if print_flag:
427 print "\t\tac: " + `ac`
428 print "\t\tas: " + `as`
429
430
431 if bracketed:
432 if print_flag:
433 print "\t\tBracketed"
434 if abs(ac - at) < abs(as - at):
435 if print_flag:
436 print "\t\t\tabs(ac - at) < abs(as - at), " + `abs(ac - at)` + " < " + `abs(as - at)`
437 print "\t\t\tat_new = ac = " + `ac`
438 at_new = ac
439 else:
440 if print_flag:
441 print "\t\t\tabs(ac - at) >= abs(as - at), " + `abs(ac - at)` + " >= " + `abs(as - at)`
442 print "\t\t\tat_new = as = " + `as`
443 at_new = as
444
445
446 if print_flag:
447 print "\t\tRedefining at+"
448 if at > al:
449 at_new = min(at + d*(au - at), at_new)
450 if print_flag:
451 print "\t\t\tat > al, " + `at` + " > " + `al`
452 print "\t\t\tat_new = " + `at_new`
453 else:
454 at_new = max(at + d*(au - at), at_new)
455 if print_flag:
456 print "\t\t\tat <= al, " + `at` + " <= " + `al`
457 print "\t\t\tat_new = " + `at_new`
458 else:
459 if print_flag:
460 print "\t\tNot bracketed"
461 if abs(ac - at) > abs(as - at):
462 if print_flag:
463 print "\t\t\tabs(ac - at) > abs(as - at), " + `abs(ac - at)` + " > " + `abs(as - at)`
464 print "\t\t\tat_new = ac = " + `ac`
465 at_new = ac
466 else:
467 if print_flag:
468 print "\t\t\tabs(ac - at) <= abs(as - at), " + `abs(ac - at)` + " <= " + `abs(as - at)`
469 print "\t\t\tat_new = as = " + `as`
470 at_new = as
471
472
473 if print_flag:
474 print "\t\tChecking limits."
475 if at_new < Ik_lim[0]:
476 if print_flag:
477 print "\t\t\tat_new < Ik_lim[0], " + `at_new` + " < " + `Ik_lim[0]`
478 print "\t\t\tat_new = " + `Ik_lim[0]`
479 at_new = Ik_lim[0]
480 if at_new > Ik_lim[1]:
481 if print_flag:
482 print "\t\t\tat_new > Ik_lim[1], " + `at_new` + " > " + `Ik_lim[1]`
483 print "\t\t\tat_new = " + `Ik_lim[1]`
484 at_new = Ik_lim[1]
485
486
487
488 else:
489 if print_flag:
490 print "\tat selection, case 4."
491 if bracketed:
492 if print_flag:
493 print "\t\tbracketed."
494 at_new = cubic(au, at, fu, ft, gu, gt)
495 if print_flag:
496 print "\t\tat_new = " + `at_new`
497 elif at > al:
498 if print_flag:
499 print "\t\tnot bracketed but at > al, " + `at` + " > " + `al`
500 print "\t\tat_new = " + `Ik_lim[1]`
501 at_new = Ik_lim[1]
502 else:
503 if print_flag:
504 print "\t\tnot bracketed but at <= al, " + `at` + " <= " + `al`
505 print "\t\tat_new = " + `Ik_lim[0]`
506 at_new = Ik_lim[0]
507
508
509
510 Ik_new = copy.deepcopy(Ik)
511
512 if ft > fl:
513 if print_flag:
514 print "\tIk update, case a, ft > fl."
515 Ik_new['a'][1] = at
516 Ik_new['phi'][1] = a['phi']
517 Ik_new['phi_prime'][1] = a['phi_prime']
518 elif gt*(al - at) > 0.0:
519 if print_flag:
520 print "\tIk update, case b, gt*(al - at) > 0.0."
521 Ik_new['a'][0] = at
522 Ik_new['phi'][0] = a['phi']
523 Ik_new['phi_prime'][0] = a['phi_prime']
524 else:
525 Ik_new['a'][0] = at
526 Ik_new['phi'][0] = a['phi']
527 Ik_new['phi_prime'][0] = a['phi_prime']
528 Ik_new['a'][1] = al
529 Ik_new['phi'][1] = Ik['phi'][0]
530 Ik_new['phi_prime'][1] = Ik['phi_prime'][0]
531
532 if print_flag:
533 print "\tIk update, case c."
534 print "\t\tat: " + `at`
535 print "\t\ta['phi']: " + `a['phi']`
536 print "\t\ta['phi_prime']: " + `a['phi_prime']`
537 print "\t\tIk_new['a']: " + `Ik_new['a']`
538 print "\t\tIk_new['phi']: " + `Ik_new['phi']`
539 print "\t\tIk_new['phi_prime']: " + `Ik_new['phi_prime']`
540
541 return at_new, Ik_new, bracketed
542