使用拟牛顿法(BFGS和DFP),分别使用Armijo准则和Wolfe准则来求步长
求解方程
\(f(x_1,x_2)=(x_1^2-2)^4+(x_1-2x_2)^2\)的极小值
import numpy as np # import tensorflow as tf def gfun(x): # 梯度 # x = tf.Variable(x, dtype=tf.float32) # with tf.GradientTape() as tape: # tape.watch(x) # z = fun(x) # return tape.gradient(z, x).numpy() # 这里使用TensorFlow来求梯度,直接手算梯度返回也行 return np.array([4 * (x[0] - 2) ** 3 + 2 * (x[0] - 2 * x[1]), -4 * (x[0] - 2 * x[1])]).reshape(len(x), 1) def fun(x): # 函数 return (x[0] - 2) ** 4 + (x[0] - 2 * x[1]) ** 2 def bfgs_armijo(x0): '''秩1的基于armijo搜索的拟牛顿算法''' maxk = 5000 rho = .55 sigma = .4 epsilon = 1e-5 k = 0 n = len(x0) Hk = np.eye(n) while k < maxk: gk = gfun(x0) if np.linalg.norm(gk) < epsilon: break dk = -Hk @ gk m = 0 mk = 0 while m < 20: # 使用Armijo搜索(非精确线搜索) if fun(x0 + rho ** m * dk) < fun(x0) + sigma * rho ** m * gk.T @ dk: mk = m break m += 1 x = x0 + rho ** mk * dk sk = x - x0 yk = gfun(x) - gk Hk = Hk + (sk - Hk @ yk) @ (sk - Hk @ yk).T / ((sk - Hk @ yk).T @ yk) k += 1 x0 = x return x0, fun(x0), k def bfgs_wolfe(x0): '''基于wolfe搜索的秩1的拟牛顿算法''' maxk = 5000 epsilon = 1e-5 k = 0 n = len(x0) Hk = np.eye(n) while k < maxk: gk = gfun(x0) if np.linalg.norm(gk) < epsilon: break dk = -Hk @ gk rho = 0.4 sigma = 0.5 a = 0 b = np.inf alpha = 1 while True: # 使用Wolfe搜索 if not ((fun(x0) - fun(x0 + alpha * dk)) >= (-rho * alpha * gfun(x0).T @ dk)): b = alpha alpha = (a + alpha) / 2 continue if not (gfun(x0 + alpha * dk).T @ dk >= sigma * gfun(x0).T @ dk): a = alpha alpha = np.min([2 * alpha, (alpha + b) / 2]) continue break x = x0 + alpha * dk sk = x - x0 yk = gfun(x) - gk Hk = Hk + (sk - Hk @ yk) @ (sk - Hk @ yk).T / ((sk - Hk @ yk).T @ yk) k += 1 x0 = x return x0, fun(x0), k def dfp_wolfe(x0): '''基于armijo搜索的秩2的拟牛顿法 当采用精确线搜索时,矩阵序列Hk的正定性条件sk.T@yk>0可以被满足 一般来说,armijo准则不能满足这一条件需要修正一个条件:yk.T@sk>0 ''' maxk = 5000 epsilon = 1e-5 k = 0 n = len(x0) Hk = np.eye(n) # 初始化Hk为单位阵 while k < maxk: gk = gfun(x0) if np.linalg.norm(gk) < epsilon: # 迭代结束条件 break dk = -Hk @ gk rho = 0.4 sigma = 0.5 a = 0 b = np.inf alpha = 1 while True: if not ((fun(x0) - fun(x0 + alpha * dk)) >= (-rho * alpha * gfun(x0).T @ dk)): b = alpha alpha = (a + alpha) / 2 continue if not (gfun(x0 + alpha * dk).T @ dk >= sigma * gfun(x0).T @ dk): a = alpha alpha = np.min([2 * alpha, (alpha + b) / 2]) continue break x = x0 + alpha * dk sk = x - x0 yk = gfun(x) - gk Hk = Hk - (Hk @ yk @ yk.T @ Hk) / (yk.T @ Hk @ yk) + (sk @ sk.T) / (sk.T @ yk) k += 1 x0 = x return x0, fun(x0), k def dfp_armijo(x0): '''基于armijo搜索的秩2的拟牛顿法 当采用精确线搜索时,矩阵序列Hk的正定性条件sk.T@yk>0可以被满足 一般来说,armijo准则不能满足这一条件需要修正一个条件:yk.T@sk>0 ''' maxk = 5000 rho = .55 sigma = .4 epsilon = 1e-5 k = 0 n = len(x0) Hk = np.eye(n) while k < maxk: gk = gfun(x0) if np.linalg.norm(gk) < epsilon: break dk = -Hk @ gk m = 0 mk = 0 while m < 20: # 使用Armijo搜索(非精确线搜索) if fun(x0 + rho ** m * dk) < fun(x0) + sigma * rho ** m * gk.T @ dk: mk = m break m += 1 x = x0 + rho ** mk * dk sk = x - x0 yk = gfun(x) - gk if sk.T @ yk > 0: Hk = Hk - (Hk @ yk @ yk.T @ Hk) / (yk.T @ Hk @ yk) + (sk @ sk.T) / (sk.T @ yk) k += 1 x0 = x return x0, fun(x0), k if __name__ == '__main__': x0 = np.array([[0], [0]]) x0, val, k = bfgs_armijo(x0) # BFGS使用armijo准则 print(f'近似最优点:\n{x0}\n迭代次数:{k}\n目标函数值:{val.item()}') x0 = np.array([[0], [0]]) x0, val, k = bfgs_wolfe(x0) # BFGS使用wolfe准则 print(f'近似最优点:\n{x0}\n迭代次数:{k}\n目标函数值:{val.item()}') x0 = np.array([[0], [0]]) x0, val, k = dfp_armijo(x0) # DFP使用armijo准则 print(f'近似最优点:\n{x0}\n迭代次数:{k}\n目标函数值:{val.item()}') x0 = np.array([[0], [0]]) x0, val, k = dfp_wolfe(x0) # DFP使用wolfe准则 print(f'近似最优点:\n{x0}\n迭代次数:{k}\n目标函数值:{val.item()}')