optimization / old_code /optimisers.py
joel-woodfield's picture
Add old code from gradio version
9357e05
import numexpr
import numpy as np
import sympy
def get_gradient_1d(function):
x = sympy.symbols('x')
expr = sympy.sympify(function)
grad_x = sympy.diff(expr, x)
return grad_x
def get_hessian_1d(function):
x = sympy.symbols('x')
expr = sympy.sympify(function)
hess_x = sympy.diff(expr, x, 2)
return hess_x
def get_gradient_2d(function):
x, y = sympy.symbols('x y')
expr = sympy.sympify(function)
grad_x = sympy.diff(expr, x)
grad_y = sympy.diff(expr, y)
return grad_x, grad_y
def get_hessian_2d(function):
x, y = sympy.symbols('x y')
expr = sympy.sympify(function)
hess_xx = sympy.diff(expr, x, 2)
hess_yy = sympy.diff(expr, y, 2)
hess_xy = sympy.diff(expr, x, y)
hess_yx = sympy.diff(expr, y, x)
return hess_xx, hess_xy, hess_yx, hess_yy
def get_optimizer_trajectory_1d(function, initial_x, optimiser_type, learning_rate, momentum, num_steps):
if optimiser_type == "Gradient Descent":
return get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps)
elif optimiser_type == "Newton":
return get_newton_trajectory_1d(function, initial_x, num_steps)
else:
raise ValueError(f"Unsupported optimiser type: {optimiser_type}")
def get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps):
grad_x = get_gradient_1d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x})
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]}))
if i == 0:
momentum_x = 0
else:
momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])
trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x
trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]})
return trajectory_x, trajectory_y
def get_newton_trajectory_1d(function, initial_x, num_steps):
grad_x = get_gradient_1d(function)
hess_x = get_hessian_1d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x})
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]}))
hess_x_val = float(hess_x.evalf(subs={'x': trajectory_x[i]}))
if hess_x_val == 0:
break
trajectory_x[i + 1] = trajectory_x[i] - grad_x_val / hess_x_val
trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]})
return trajectory_x, trajectory_y
def get_gd_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
if i == 0:
momentum_x = 0
momentum_y = 0
else:
momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])
momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1])
trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x
trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val + momentum_y
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_nesterov_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
for i in range(num_steps):
if i == 0:
momentum_x = 0
momentum_y = 0
else:
momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1])
momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1])
x = trajectory_x[i] + momentum_x
y = trajectory_y[i] + momentum_y
grad_x_val = float(grad_x.evalf(subs={'x': x, 'y': y}))
grad_y_val = float(grad_y.evalf(subs={'x': x, 'y': y}))
trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val
trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_adam_trajectory_2d(function, initial_x, initial_y, learning_rate, rho1, rho2, epsilon, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
m_x, m_y = 0, 0
v_x, v_y = 0, 0
epsilon = 1e-8
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
m_x = rho1 * m_x + (1 - rho1) * grad_x_val
m_y = rho1 * m_y + (1 - rho1) * grad_y_val
v_x = rho2 * v_x + (1 - rho2) * (grad_x_val ** 2)
v_y = rho2 * v_y + (1 - rho2) * (grad_y_val ** 2)
m_hat_x = m_x / (1 - rho1 ** (i + 1))
m_hat_y = m_y / (1 - rho1 ** (i + 1))
v_hat_x = v_x / (1 - rho2 ** (i + 1))
v_hat_y = v_y / (1 - rho2 ** (i + 1))
trajectory_x[i + 1] = trajectory_x[i] - learning_rate * m_hat_x / np.sqrt(v_hat_x + epsilon)
trajectory_y[i + 1] = trajectory_y[i] - learning_rate * m_hat_y / np.sqrt(v_hat_y + epsilon)
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_newton_trajectory_2d(function, initial_x, initial_y, num_steps):
grad_x, grad_y = get_gradient_2d(function)
hess_xx, hess_xy, hess_yx, hess_yy = get_hessian_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
hess_xx_val = float(hess_xx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
hess_xy_val = float(hess_xy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
hess_yx_val = float(hess_yx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
hess_yy_val = float(hess_yy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
hessian_matrix = np.array(
[
[hess_xx_val, hess_xy_val],
[hess_yx_val, hess_yy_val]
],
)
gradient_vector = np.array([grad_x_val, grad_y_val])
try:
hessian_inv = np.linalg.inv(hessian_matrix)
except np.linalg.LinAlgError:
break
step = hessian_inv @ gradient_vector
trajectory_x[i + 1] = trajectory_x[i] - step[0]
trajectory_y[i + 1] = trajectory_y[i] - step[1]
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_adagrad_trajectory_2d(function, initial_x, initial_y, learning_rate, epsilon, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
v_x = 0
v_y = 0
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
v_x += grad_x_val ** 2
v_y += grad_y_val ** 2
trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val
trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_rmsprop_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
v_x = 0
v_y = 0
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2)
v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2)
trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val
trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z
def get_adadelta_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps):
grad_x, grad_y = get_gradient_2d(function)
trajectory_x = np.zeros(num_steps + 1)
trajectory_y = np.zeros(num_steps + 1)
trajectory_z = np.zeros(num_steps + 1)
trajectory_x[0] = initial_x
trajectory_y[0] = initial_y
trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y})
v_x = 0
v_y = 0
s_x = 0
s_y = 0
for i in range(num_steps):
grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]}))
v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2)
v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2)
del_x = np.sqrt(s_x + epsilon) / np.sqrt(v_x + epsilon) * grad_x_val
del_y = np.sqrt(s_y + epsilon) / np.sqrt(v_y + epsilon) * grad_y_val
s_x = rho * s_x + (1 - rho) * del_x ** 2
s_y = rho * s_y + (1 - rho) * del_y ** 2
trajectory_x[i + 1] = trajectory_x[i] - learning_rate * del_x
trajectory_y[i + 1] = trajectory_y[i] - learning_rate * del_y
trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]})
return trajectory_x, trajectory_y, trajectory_z