import numexpr import numpy as np import sympy def get_gradient_1d(function): x = sympy.symbols('x') expr = sympy.sympify(function) grad_x = sympy.diff(expr, x) return grad_x def get_hessian_1d(function): x = sympy.symbols('x') expr = sympy.sympify(function) hess_x = sympy.diff(expr, x, 2) return hess_x def get_gradient_2d(function): x, y = sympy.symbols('x y') expr = sympy.sympify(function) grad_x = sympy.diff(expr, x) grad_y = sympy.diff(expr, y) return grad_x, grad_y def get_hessian_2d(function): x, y = sympy.symbols('x y') expr = sympy.sympify(function) hess_xx = sympy.diff(expr, x, 2) hess_yy = sympy.diff(expr, y, 2) hess_xy = sympy.diff(expr, x, y) hess_yx = sympy.diff(expr, y, x) return hess_xx, hess_xy, hess_yx, hess_yy def get_optimizer_trajectory_1d(function, initial_x, optimiser_type, learning_rate, momentum, num_steps): if optimiser_type == "Gradient Descent": return get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps) elif optimiser_type == "Newton": return get_newton_trajectory_1d(function, initial_x, num_steps) else: raise ValueError(f"Unsupported optimiser type: {optimiser_type}") def get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps): grad_x = get_gradient_1d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x}) for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]})) if i == 0: momentum_x = 0 else: momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]}) return trajectory_x, trajectory_y def get_newton_trajectory_1d(function, initial_x, num_steps): grad_x = get_gradient_1d(function) hess_x = get_hessian_1d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x}) for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]})) hess_x_val = float(hess_x.evalf(subs={'x': trajectory_x[i]})) if hess_x_val == 0: break trajectory_x[i + 1] = trajectory_x[i] - grad_x_val / hess_x_val trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]}) return trajectory_x, trajectory_y def get_gd_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) if i == 0: momentum_x = 0 momentum_y = 0 else: momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1]) trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val + momentum_y trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_nesterov_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) for i in range(num_steps): if i == 0: momentum_x = 0 momentum_y = 0 else: momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1]) x = trajectory_x[i] + momentum_x y = trajectory_y[i] + momentum_y grad_x_val = float(grad_x.evalf(subs={'x': x, 'y': y})) grad_y_val = float(grad_y.evalf(subs={'x': x, 'y': y})) trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_adam_trajectory_2d(function, initial_x, initial_y, learning_rate, rho1, rho2, epsilon, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) m_x, m_y = 0, 0 v_x, v_y = 0, 0 epsilon = 1e-8 for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) m_x = rho1 * m_x + (1 - rho1) * grad_x_val m_y = rho1 * m_y + (1 - rho1) * grad_y_val v_x = rho2 * v_x + (1 - rho2) * (grad_x_val ** 2) v_y = rho2 * v_y + (1 - rho2) * (grad_y_val ** 2) m_hat_x = m_x / (1 - rho1 ** (i + 1)) m_hat_y = m_y / (1 - rho1 ** (i + 1)) v_hat_x = v_x / (1 - rho2 ** (i + 1)) v_hat_y = v_y / (1 - rho2 ** (i + 1)) trajectory_x[i + 1] = trajectory_x[i] - learning_rate * m_hat_x / np.sqrt(v_hat_x + epsilon) trajectory_y[i + 1] = trajectory_y[i] - learning_rate * m_hat_y / np.sqrt(v_hat_y + epsilon) trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_newton_trajectory_2d(function, initial_x, initial_y, num_steps): grad_x, grad_y = get_gradient_2d(function) hess_xx, hess_xy, hess_yx, hess_yy = get_hessian_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) hess_xx_val = float(hess_xx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) hess_xy_val = float(hess_xy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) hess_yx_val = float(hess_yx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) hess_yy_val = float(hess_yy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) hessian_matrix = np.array( [ [hess_xx_val, hess_xy_val], [hess_yx_val, hess_yy_val] ], ) gradient_vector = np.array([grad_x_val, grad_y_val]) try: hessian_inv = np.linalg.inv(hessian_matrix) except np.linalg.LinAlgError: break step = hessian_inv @ gradient_vector trajectory_x[i + 1] = trajectory_x[i] - step[0] trajectory_y[i + 1] = trajectory_y[i] - step[1] trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_adagrad_trajectory_2d(function, initial_x, initial_y, learning_rate, epsilon, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) v_x = 0 v_y = 0 for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) v_x += grad_x_val ** 2 v_y += grad_y_val ** 2 trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_rmsprop_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) v_x = 0 v_y = 0 for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2) v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2) trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z def get_adadelta_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps): grad_x, grad_y = get_gradient_2d(function) trajectory_x = np.zeros(num_steps + 1) trajectory_y = np.zeros(num_steps + 1) trajectory_z = np.zeros(num_steps + 1) trajectory_x[0] = initial_x trajectory_y[0] = initial_y trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) v_x = 0 v_y = 0 s_x = 0 s_y = 0 for i in range(num_steps): grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2) v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2) del_x = np.sqrt(s_x + epsilon) / np.sqrt(v_x + epsilon) * grad_x_val del_y = np.sqrt(s_y + epsilon) / np.sqrt(v_y + epsilon) * grad_y_val s_x = rho * s_x + (1 - rho) * del_x ** 2 s_y = rho * s_y + (1 - rho) * del_y ** 2 trajectory_x[i + 1] = trajectory_x[i] - learning_rate * del_x trajectory_y[i + 1] = trajectory_y[i] - learning_rate * del_y trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) return trajectory_x, trajectory_y, trajectory_z