1 from copy import deepcopy
2 from Numeric import copy, dot, sqrt
3
4 from interpolate import cubic_ext, quadratic_fafbga
5 quadratic = quadratic_fafbga
6
7 -def nocedal_wright_wolfe(func, func_prime, args, x, f, g, p, a_init=1.0, max_a=1e5, mu=0.001, eta=0.9, tol=1e-10, print_flag=0):
8 """A line search algorithm implemented using the strong Wolfe condittions.
9
10 Algorithm 3.2, page 59, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999
11 Requires the gradient function.
12
13 These functions require serious debugging and recoding to work properly (especially the safeguarding).
14
15 Function options
16 ~~~~~~~~~~~~~~~~
17
18 func - The function to minimise.
19 func_prime - The function which returns the gradient vector.
20 args - The tuple of arguments to supply to the functions func and dfunc.
21 x - The parameter vector at minimisation step k.
22 f - The function value at the point x.
23 g - The function gradient vector at the point x.
24 p - The descent direction.
25 a_init - Initial step length.
26 a_max - The maximum value for the step length.
27 mu - Constant determining the slope for the sufficient decrease condition (0 < mu < eta < 1).
28 eta - Constant used for the Wolfe curvature condition (0 < mu < eta < 1).
29
30 """
31
32
33 i = 1
34 f_count = 0
35 g_count = 0
36 a0 = {}
37 a0['a'] = 0.0
38 a0['phi'] = f
39 a0['phi_prime'] = dot(g, p)
40 a_last = deepcopy(a0)
41 a_max = {}
42 a_max['a'] = max_a
43 a_max['phi'] = apply(func, (x + a_max['a']*p,)+args)
44 a_max['phi_prime'] = dot(apply(func_prime, (x + a_max['a']*p,)+args), p)
45 f_count = f_count + 1
46 g_count = g_count + 1
47
48
49 a = {}
50 a['a'] = a_init
51 a['phi'] = apply(func, (x + a['a']*p,)+args)
52 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p)
53 f_count = f_count + 1
54 g_count = g_count + 1
55
56 if print_flag:
57 print "\n<Line search initial values>"
58 print_data("Pre (a0)", i-1, a0)
59 print_data("Pre (a_max)", i-1, a_max)
60
61 while 1:
62 if print_flag:
63 print "<Line search iteration i = " + `i` + " >"
64 print_data("Initial (a)", i, a)
65 print_data("Initial (a_last)", i, a_last)
66
67
68
69 if not a['phi'] <= a0['phi'] + mu * a['a'] * a0['phi_prime']:
70 if print_flag:
71 print "\tSufficient decrease condition is violated - zooming"
72 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_last, a, tol, print_flag=print_flag)
73 if print_flag:
74 print "\tSufficient decrease condition is OK"
75
76
77 if abs(a['phi_prime']) <= -eta * a0['phi_prime']:
78 if print_flag:
79 print "\tCurvature condition OK, returning a"
80 return a['a'], f_count, g_count
81 if print_flag:
82 print "\tCurvature condition is violated"
83
84
85
86 if a['phi_prime'] >= 0.0:
87 if print_flag:
88 print "\tGradient at a['a'] is positive - zooming"
89
90 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a, a_last, tol, print_flag=print_flag)
91 if print_flag:
92 print "\tGradient is negative"
93
94
95
96
97
98 a_new = a['a'] + 0.25 * (a_max['a'] - a['a'])
99
100
101 a_last = deepcopy(a)
102 a['a'] = a_new
103 a['phi'] = apply(func, (x + a['a']*p,)+args)
104 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p)
105 f_count = f_count + 1
106 g_count = g_count + 1
107 i = i + 1
108 if print_flag:
109 print_data("Final (a)", i, a)
110 print_data("Final (a_last)", i, a_last)
111
112
113 if abs(a_last['phi'] - a['phi']) <= tol:
114 if print_flag:
115 print "abs(a_last['phi'] - a['phi']) <= tol"
116 return a['a'], f_count, g_count
117
118
120 "Temp func for debugging."
121
122 print text + " data printout:"
123 print " Iteration: " + `k`
124 print " a: " + `a['a']`
125 print " phi: " + `a['phi']`
126 print " phi_prime: " + `a['phi_prime']`
127
128
129 -def zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_lo, a_hi, tol, print_flag=0):
130 """Find the minimum function value in the open interval (a_lo, a_hi)
131
132 Algorithm 3.3, page 60, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999
133 """
134
135
136 aj = {}
137 j = 0
138 aj_last = deepcopy(a_lo)
139
140 while 1:
141 if print_flag:
142 print "\n<Zooming iterate j = " + `j` + " >"
143
144
145 aj_new = quadratic(a_lo['a'], a_hi['a'], a_lo['phi'], a_hi['phi'], a_lo['phi_prime'])
146
147
148 aj['a'] = max(aj_last['a'] + 0.66*(a_hi['a'] - aj_last['a']), aj_new)
149
150
151 aj['phi'] = apply(func, (x + aj['a']*p,)+args)
152 aj['phi_prime'] = dot(apply(func_prime, (x + aj['a']*p,)+args), p)
153 f_count = f_count + 1
154 g_count = g_count + 1
155
156 if print_flag:
157 print_data("a_lo", i, a_lo)
158 print_data("a_hi", i, a_hi)
159 print_data("aj", i, aj)
160
161
162 if not aj['phi'] <= a0['phi'] + mu * aj['a'] * a0['phi_prime']:
163 a_hi = deepcopy(aj)
164 else:
165
166 if abs(aj['phi_prime']) <= -eta * a0['phi_prime']:
167 if print_flag:
168 print "aj: " + `aj`
169 print "<Finished zooming>"
170 return aj['a'], f_count, g_count
171
172
173 if aj['phi_prime'] * (a_hi['a'] - a_lo['a']) >= 0.0:
174 a_hi = deepcopy(a_lo)
175
176 a_lo = deepcopy(aj)
177
178
179 if abs(aj_last['phi'] - aj['phi']) <= tol:
180 if print_flag:
181 print "abs(aj_last['phi'] - aj['phi']) <= tol"
182 print "<Finished zooming>"
183 return aj['a'], f_count, g_count
184
185
186 aj_last = deepcopy(aj)
187 j = j + 1
188