Package minimise :: Package line_search :: Module nocedal_wright_wolfe
[hide private]
[frames] | no frames]

Source Code for Module minimise.line_search.nocedal_wright_wolfe

  1  ############################################################################### 
  2  #                                                                             # 
  3  # Copyright (C) 2003 Edward d'Auvergne                                        # 
  4  #                                                                             # 
  5  # This file is part of the program relax.                                     # 
  6  #                                                                             # 
  7  # relax is free software; you can redistribute it and/or modify               # 
  8  # it under the terms of the GNU General Public License as published by        # 
  9  # the Free Software Foundation; either version 2 of the License, or           # 
 10  # (at your option) any later version.                                         # 
 11  #                                                                             # 
 12  # relax is distributed in the hope that it will be useful,                    # 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of              # 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               # 
 15  # GNU General Public License for more details.                                # 
 16  #                                                                             # 
 17  # You should have received a copy of the GNU General Public License           # 
 18  # along with relax; if not, write to the Free Software                        # 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   # 
 20  #                                                                             # 
 21  ############################################################################### 
 22   
 23   
 24  from Numeric import copy, dot, sqrt 
 25   
 26  from interpolate import cubic_ext, quadratic_fafbga 
 27  quadratic = quadratic_fafbga 
 28   
29 -def nocedal_wright_wolfe(func, func_prime, args, x, f, g, p, a_init=1.0, max_a=1e5, mu=0.001, eta=0.9, tol=1e-10, print_flag=0):
30 """A line search algorithm implemented using the strong Wolfe conditions. 31 32 Algorithm 3.2, page 59, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 33 1999, 2nd ed. 34 35 Requires the gradient function. 36 37 ####################################################################################### 38 These functions require serious debugging and recoding to work properly (especially the 39 safeguarding). 40 ####################################################################################### 41 42 Function options 43 ~~~~~~~~~~~~~~~~ 44 45 func - The function to minimise. 46 func_prime - The function which returns the gradient vector. 47 args - The tuple of arguments to supply to the functions func and dfunc. 48 x - The parameter vector at minimisation step k. 49 f - The function value at the point x. 50 g - The function gradient vector at the point x. 51 p - The descent direction. 52 a_init - Initial step length. 53 a_max - The maximum value for the step length. 54 mu - Constant determining the slope for the sufficient decrease condition 55 (0 < mu < eta < 1). 56 eta - Constant used for the Wolfe curvature condition (0 < mu < eta < 1). 57 """ 58 59 # Initialise values. 60 i = 1 61 f_count = 0 62 g_count = 0 63 a0 = {} 64 a0['a'] = 0.0 65 a0['phi'] = f 66 a0['phi_prime'] = dot(g, p) 67 a_last = copy.deepcopy(a0) 68 a_max = {} 69 a_max['a'] = max_a 70 a_max['phi'] = apply(func, (x + a_max['a']*p,)+args) 71 a_max['phi_prime'] = dot(apply(func_prime, (x + a_max['a']*p,)+args), p) 72 f_count = f_count + 1 73 g_count = g_count + 1 74 75 # Initialise sequence data. 76 a = {} 77 a['a'] = a_init 78 a['phi'] = apply(func, (x + a['a']*p,)+args) 79 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p) 80 f_count = f_count + 1 81 g_count = g_count + 1 82 83 if print_flag: 84 print "\n<Line search initial values>" 85 print_data("Pre (a0)", i-1, a0) 86 print_data("Pre (a_max)", i-1, a_max) 87 88 while 1: 89 if print_flag: 90 print "<Line search iteration i = " + `i` + " >" 91 print_data("Initial (a)", i, a) 92 print_data("Initial (a_last)", i, a_last) 93 94 # Check if the sufficient decrease condition is violated. 95 # If so, the interval (a(i-1), a) will contain step lengths satisfying the strong Wolfe conditions. 96 if not a['phi'] <= a0['phi'] + mu * a['a'] * a0['phi_prime']: 97 if print_flag: 98 print "\tSufficient decrease condition is violated - zooming" 99 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_last, a, tol, print_flag=print_flag) 100 if print_flag: 101 print "\tSufficient decrease condition is OK" 102 103 # Check if the curvature condition is met and if so, return the step length a which satisfies the strong Wolfe conditions. 104 if abs(a['phi_prime']) <= -eta * a0['phi_prime']: 105 if print_flag: 106 print "\tCurvature condition OK, returning a" 107 return a['a'], f_count, g_count 108 if print_flag: 109 print "\tCurvature condition is violated" 110 111 # Check if the gradient is positive. 112 # If so, the interval (a(i-1), a) will contain step lengths satisfying the strong Wolfe conditions. 113 if a['phi_prime'] >= 0.0: 114 if print_flag: 115 print "\tGradient at a['a'] is positive - zooming" 116 # The arguments to zoom are a followed by a_last, because the function value at a_last will be higher than at a. 117 return zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a, a_last, tol, print_flag=print_flag) 118 if print_flag: 119 print "\tGradient is negative" 120 121 # The strong Wolfe conditions are not met, and therefore interpolation between a and a_max will be used to find a value for a(i+1). 122 #if print_flag: 123 # print "\tStrong Wolfe conditions are not met, doing quadratic interpolation" 124 #a_new = cubic_ext(a['a'], a_max['a'], a['phi'], a_max['phi'], a['phi_prime'], a_max['phi_prime'], full_output=0) 125 a_new = a['a'] + 0.25 * (a_max['a'] - a['a']) 126 127 # Update. 128 a_last = copy.deepcopy(a) 129 a['a'] = a_new 130 a['phi'] = apply(func, (x + a['a']*p,)+args) 131 a['phi_prime'] = dot(apply(func_prime, (x + a['a']*p,)+args), p) 132 f_count = f_count + 1 133 g_count = g_count + 1 134 i = i + 1 135 if print_flag: 136 print_data("Final (a)", i, a) 137 print_data("Final (a_last)", i, a_last) 138 139 # Test if the difference in function values is less than the tolerance. 140 if abs(a_last['phi'] - a['phi']) <= tol: 141 if print_flag: 142 print "abs(a_last['phi'] - a['phi']) <= tol" 143 return a['a'], f_count, g_count
144 145 154 155
156 -def zoom(func, func_prime, args, f_count, g_count, x, f, g, p, mu, eta, i, a0, a_lo, a_hi, tol, print_flag=0):
157 """Find the minimum function value in the open interval (a_lo, a_hi) 158 159 Algorithm 3.3, page 60, from 'Numerical Optimization' by Jorge Nocedal and Stephen J. Wright, 160 1999, 2nd ed. 161 """ 162 163 # Initialize aj. 164 aj = {} 165 j = 0 166 aj_last = copy.deepcopy(a_lo) 167 168 while 1: 169 if print_flag: 170 print "\n<Zooming iterate j = " + `j` + " >" 171 172 # Interpolate to find a trial step length aj between a_lo and a_hi. 173 aj_new = quadratic(a_lo['a'], a_hi['a'], a_lo['phi'], a_hi['phi'], a_lo['phi_prime']) 174 175 # Safeguarding aj['a'] 176 aj['a'] = max(aj_last['a'] + 0.66*(a_hi['a'] - aj_last['a']), aj_new) 177 178 # Calculate the function and gradient value at aj['a']. 179 aj['phi'] = apply(func, (x + aj['a']*p,)+args) 180 aj['phi_prime'] = dot(apply(func_prime, (x + aj['a']*p,)+args), p) 181 f_count = f_count + 1 182 g_count = g_count + 1 183 184 if print_flag: 185 print_data("a_lo", i, a_lo) 186 print_data("a_hi", i, a_hi) 187 print_data("aj", i, aj) 188 189 # Check if the sufficient decrease condition is violated. 190 if not aj['phi'] <= a0['phi'] + mu * aj['a'] * a0['phi_prime']: 191 a_hi = copy.deepcopy(aj) 192 else: 193 # Check if the curvature condition is met and if so, return the step length ai which satisfies the strong Wolfe conditions. 194 if abs(aj['phi_prime']) <= -eta * a0['phi_prime']: 195 if print_flag: 196 print "aj: " + `aj` 197 print "<Finished zooming>" 198 return aj['a'], f_count, g_count 199 200 # Determine if a_hi needs to be reset. 201 if aj['phi_prime'] * (a_hi['a'] - a_lo['a']) >= 0.0: 202 a_hi = copy.deepcopy(a_lo) 203 204 a_lo = copy.deepcopy(aj) 205 206 # Test if the difference in function values is less than the tolerance. 207 if abs(aj_last['phi'] - aj['phi']) <= tol: 208 if print_flag: 209 print "abs(aj_last['phi'] - aj['phi']) <= tol" 210 print "<Finished zooming>" 211 return aj['a'], f_count, g_count 212 213 # Update. 214 aj_last = copy.deepcopy(aj) 215 j = j + 1
216