1 from copy import deepcopy
2 from Numeric import copy, dot, sqrt
3
4 from interpolate import cubic_ext, quadratic_fafbga
5 quadratic = quadratic_fafbga
6
8 """A line search algorithm based on interpolation.
9
10 Pages 56-57, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999
11 Requires the gradient function.
12
13
14 Function options
15 ~~~~~~~~~~~~~~~~
16
17 func - The function to minimise.
18 func_prime - The function which returns the gradient vector.
19 args - The tuple of arguments to supply to the functions func and dfunc.
20 x - The parameter vector at minimisation step k.
21 f - The function value at the point x.
22 g - The function gradient vector at the point x.
23 p - The descent direction.
24 a_init - Initial step length.
25 mu - Constant determining the slope for the sufficient decrease condition (0 < mu < 1).
26
27 """
28
29
30 i = 1
31 f_count = 0
32 a0 = {}
33 a0['a'] = 0.0
34 a0['phi'] = f
35 a0['phi_prime'] = dot(g, p)
36
37
38 a = {}
39 a['a'] = a_init
40 a['phi'] = apply(func, (x + a['a']*p,)+args)
41 f_count = f_count + 1
42
43 if print_flag:
44 print "\n<Line search initial values>"
45 print_data("Pre (a0)", i-1, a0)
46
47
48 if a0['phi_prime'] >= 0.0:
49 raise NameError, "The gradient at point 0 of this line search is positive, ie p is not a descent direction and the line search will not work."
50
51
52 if a['phi'] <= a0['phi'] + mu * a['a'] * a0['phi_prime']:
53 return a['a'], f_count
54
55
56 a_last = deepcopy(a)
57
58
59 a_new = - 0.5 * a0['phi_prime'] * a['a']**2 / (a['phi'] - a0['phi'] - a0['phi_prime']*a['a'])
60 a['a'] = a_new
61 a['phi'] = apply(func, (x + a['a']*p,)+args)
62 f_count = f_count + 1
63
64
65 if a['phi'] <= a0['phi'] + mu * a['a'] * a0['phi_prime']:
66 return a['a'], f_count
67
68 while 1:
69 if print_flag:
70 print "<Line search iteration i = " + `i` + " >"
71 print_data("Initial (a)", i, a)
72 print_data("Initial (a_last)", i, a_last)
73
74 beta1 = 1.0 / (a_last['a']**2 * a['a']**2 * (a['a'] - a_last['a']))
75 beta2 = a['phi'] - a0['phi'] - a0['phi_prime'] * a['a']
76 beta3 = a_last['phi'] - a0['phi'] - a0['phi_prime'] * a_last['a']
77
78 fact_a = beta1 * (a_last['a']**2 * beta2 - a['a']**2 * beta3)
79 fact_b = beta1 * (-a_last['a']**3 * beta2 + a['a']**3 * beta3)
80
81 a_new = {}
82 a_new['a'] = (-fact_b + sqrt(fact_b**2 - 3.0 * fact_a * a0['phi_prime'])) / (3.0 * fact_a)
83 a_new['phi'] = apply(func, (x + a_new['a']*p,)+args)
84 f_count = f_count + 1
85
86
87 if a_new['phi'] <= a0['phi'] + mu * a_new['a'] * a0['phi_prime']:
88 return a_new['a'], f_count
89
90
91 if a['a'] - a_new['a'] > 0.5 * a['a'] or 1.0 - a_new['a']/a['a'] < 0.9:
92 a_new['a'] = 0.5 * a['a']
93 a_new['phi'] = apply(func, (x + a_new['a']*p,)+args)
94 f_count = f_count + 1
95
96
97 a_last = deepcopy(a)
98 a = deepcopy(a_new)
99
100
102 "Temp func for debugging."
103
104 print text + " data printout:"
105 print " Iteration: " + `k`
106 print " a: " + `a['a']`
107 print " phi: " + `a['phi']`
108 print " phi_prime: " + `a['phi_prime']`
109