Package minimise :: Package line_search :: Module nocedal_wright_wolfe
[hide private]
[frames] | no frames]

Source Code for Module minimise.line_search.nocedal_wright_wolfe

  1  from copy import deepcopy 
  2  from Numeric import copy, dot, sqrt 
  3   
  4  from interpolate import cubic_ext, quadratic_fafbga 
  5  quadratic = quadratic_fafbga 
  6   
7 -def nocedal_wright_wolfe(func, func_prime, args, x, f, g, p, a_init=1.0, max_a=1e5, mu=0.001, eta=0.9, tol=1e-10, print_flag=0):
8 """A line search algorithm implemented using the strong Wolfe condittions. 9 10 Algorithm 3.2, page 59, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999 11 Requires the gradient function. 12 13 These functions require serious debugging and recoding to work properly (especially the safeguarding). 14 15 Function options 16 ~~~~~~~~~~~~~~~~ 17 18 func - The function to minimise. 19 func_prime - The function which returns the gradient vector. 20 args - The tuple of arguments to supply to the functions func and dfunc. 21 x - The parameter vector at minimisation step k. 22 f - The function value at the point x. 23 g - The function gradient vector at the point x. 24 p - The descent direction. 25 a_init - Initial step length. 26 a_max - The maximum value for the step length. 27 mu - Constant determining the slope for the sufficient decrease condition (0 < mu < eta < 1). 28 eta - Constant used for the Wolfe curvature condition (0 < mu < eta < 1). 29 30 """ 31 32 # Initialise values. 33 i = 1 34 f_count = 0 35 g_count = 0 36 a0 = {} 37 a0['a'] = 0.0 38 a0['phi'] = f 39 a0['phi_prime'] = dot(g, p) 40 a_last = deepcopy(a0) 41 a_max = {} 42 a_max['a'] = max_a 43 a_max['phi'] = apply(func, (x + a_max['a']*p,)+args) 44 a_max['phi_prime'] = dot(apply(func_prime, (x + a_max['a']*p,)+args), p) 45 f_count = f_count + 1 46 g_count = g_count + 1 47 48 # Initialise sequence data. 49 a = {} 50 a['a'] = a_init 51 a['phi'] = apply(func, (x + a['a']*p,)+args) 52 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p) 53 f_count = f_count + 1 54 g_count = g_count + 1 55 56 if print_flag: 57 print "\n<Line search initial values>" 58 print_data("Pre (a0)", i-1, a0) 59 print_data("Pre (a_max)", i-1, a_max) 60 61 while 1: 62 if print_flag: 63 print "<Line search iteration i = " + `i` + " >" 64 print_data("Initial (a)", i, a) 65 print_data("Initial (a_last)", i, a_last) 66 67 # Check if the sufficient decrease condition is violated. 68 # If so, the interval (a(i-1), a) will contain step lengths satisfying the strong Wolfe conditions. 69 if not a['phi'] <= a0['phi'] + mu * a['a'] * a0['phi_prime']: 70 if print_flag: 71 print "\tSufficient decrease condition is violated - zooming" 72 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_last, a, tol, print_flag=print_flag) 73 if print_flag: 74 print "\tSufficient decrease condition is OK" 75 76 # Check if the curvature condition is met and if so, return the step length a which satisfies the strong Wolfe conditions. 77 if abs(a['phi_prime']) <= -eta * a0['phi_prime']: 78 if print_flag: 79 print "\tCurvature condition OK, returning a" 80 return a['a'], f_count, g_count 81 if print_flag: 82 print "\tCurvature condition is violated" 83 84 # Check if the gradient is positive. 85 # If so, the interval (a(i-1), a) will contain step lengths satisfying the strong Wolfe conditions. 86 if a['phi_prime'] >= 0.0: 87 if print_flag: 88 print "\tGradient at a['a'] is positive - zooming" 89 # The arguments to zoom are a followed by a_last, because the function value at a_last will be higher than at a. 90 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a, a_last, tol, print_flag=print_flag) 91 if print_flag: 92 print "\tGradient is negative" 93 94 # The strong Wolfe conditions are not met, and therefore interpolation between a and a_max will be used to find a value for a(i+1). 95 #if print_flag: 96 # print "\tStrong Wolfe conditions are not met, doing quadratic interpolation" 97 #a_new = cubic_ext(a['a'], a_max['a'], a['phi'], a_max['phi'], a['phi_prime'], a_max['phi_prime'], full_output=0) 98 a_new = a['a'] + 0.25 * (a_max['a'] - a['a']) 99 100 # Update. 101 a_last = deepcopy(a) 102 a['a'] = a_new 103 a['phi'] = apply(func, (x + a['a']*p,)+args) 104 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p) 105 f_count = f_count + 1 106 g_count = g_count + 1 107 i = i + 1 108 if print_flag: 109 print_data("Final (a)", i, a) 110 print_data("Final (a_last)", i, a_last) 111 112 # Test if the difference in function values is less than the tolerance. 113 if abs(a_last['phi'] - a['phi']) <= tol: 114 if print_flag: 115 print "abs(a_last['phi'] - a['phi']) <= tol" 116 return a['a'], f_count, g_count
117 118 127 128
129 -def zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_lo, a_hi, tol, print_flag=0):
130 """Find the minimum function value in the open interval (a_lo, a_hi) 131 132 Algorithm 3.3, page 60, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 1999 133 """ 134 135 # Initialize aj. 136 aj = {} 137 j = 0 138 aj_last = deepcopy(a_lo) 139 140 while 1: 141 if print_flag: 142 print "\n<Zooming iterate j = " + `j` + " >" 143 144 # Interpolate to find a trial step length aj between a_lo and a_hi. 145 aj_new = quadratic(a_lo['a'], a_hi['a'], a_lo['phi'], a_hi['phi'], a_lo['phi_prime']) 146 147 # Safeguarding aj['a'] 148 aj['a'] = max(aj_last['a'] + 0.66*(a_hi['a'] - aj_last['a']), aj_new) 149 150 # Calculate the function and gradient value at aj['a']. 151 aj['phi'] = apply(func, (x + aj['a']*p,)+args) 152 aj['phi_prime'] = dot(apply(func_prime, (x + aj['a']*p,)+args), p) 153 f_count = f_count + 1 154 g_count = g_count + 1 155 156 if print_flag: 157 print_data("a_lo", i, a_lo) 158 print_data("a_hi", i, a_hi) 159 print_data("aj", i, aj) 160 161 # Check if the sufficient decrease condition is violated. 162 if not aj['phi'] <= a0['phi'] + mu * aj['a'] * a0['phi_prime']: 163 a_hi = deepcopy(aj) 164 else: 165 # Check if the curvature condition is met and if so, return the step length ai which satisfies the strong Wolfe conditions. 166 if abs(aj['phi_prime']) <= -eta * a0['phi_prime']: 167 if print_flag: 168 print "aj: " + `aj` 169 print "<Finished zooming>" 170 return aj['a'], f_count, g_count 171 172 # Determine if a_hi needs to be reset. 173 if aj['phi_prime'] * (a_hi['a'] - a_lo['a']) >= 0.0: 174 a_hi = deepcopy(a_lo) 175 176 a_lo = deepcopy(aj) 177 178 # Test if the difference in function values is less than the tolerance. 179 if abs(aj_last['phi'] - aj['phi']) <= tol: 180 if print_flag: 181 print "abs(aj_last['phi'] - aj['phi']) <= tol" 182 print "<Finished zooming>" 183 return aj['a'], f_count, g_count 184 185 # Update. 186 aj_last = deepcopy(aj) 187 j = j + 1
188