Spaces:
Running
Running
| import numexpr | |
| import numpy as np | |
| import sympy | |
| def get_gradient_1d(function): | |
| x = sympy.symbols('x') | |
| expr = sympy.sympify(function) | |
| grad_x = sympy.diff(expr, x) | |
| return grad_x | |
| def get_hessian_1d(function): | |
| x = sympy.symbols('x') | |
| expr = sympy.sympify(function) | |
| hess_x = sympy.diff(expr, x, 2) | |
| return hess_x | |
| def get_gradient_2d(function): | |
| x, y = sympy.symbols('x y') | |
| expr = sympy.sympify(function) | |
| grad_x = sympy.diff(expr, x) | |
| grad_y = sympy.diff(expr, y) | |
| return grad_x, grad_y | |
| def get_hessian_2d(function): | |
| x, y = sympy.symbols('x y') | |
| expr = sympy.sympify(function) | |
| hess_xx = sympy.diff(expr, x, 2) | |
| hess_yy = sympy.diff(expr, y, 2) | |
| hess_xy = sympy.diff(expr, x, y) | |
| hess_yx = sympy.diff(expr, y, x) | |
| return hess_xx, hess_xy, hess_yx, hess_yy | |
| def get_optimizer_trajectory_1d(function, initial_x, optimiser_type, learning_rate, momentum, num_steps): | |
| if optimiser_type == "Gradient Descent": | |
| return get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps) | |
| elif optimiser_type == "Newton": | |
| return get_newton_trajectory_1d(function, initial_x, num_steps) | |
| else: | |
| raise ValueError(f"Unsupported optimiser type: {optimiser_type}") | |
| def get_gd_trajectory_1d(function, initial_x, learning_rate, momentum, num_steps): | |
| grad_x = get_gradient_1d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x}) | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]})) | |
| if i == 0: | |
| momentum_x = 0 | |
| else: | |
| momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x | |
| trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]}) | |
| return trajectory_x, trajectory_y | |
| def get_newton_trajectory_1d(function, initial_x, num_steps): | |
| grad_x = get_gradient_1d(function) | |
| hess_x = get_hessian_1d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = numexpr.evaluate(function, local_dict={'x': initial_x}) | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i]})) | |
| hess_x_val = float(hess_x.evalf(subs={'x': trajectory_x[i]})) | |
| if hess_x_val == 0: | |
| break | |
| trajectory_x[i + 1] = trajectory_x[i] - grad_x_val / hess_x_val | |
| trajectory_y[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1]}) | |
| return trajectory_x, trajectory_y | |
| def get_gd_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| if i == 0: | |
| momentum_x = 0 | |
| momentum_y = 0 | |
| else: | |
| momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) | |
| momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1]) | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val + momentum_x | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val + momentum_y | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_nesterov_trajectory_2d(function, initial_x, initial_y, learning_rate, momentum, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| for i in range(num_steps): | |
| if i == 0: | |
| momentum_x = 0 | |
| momentum_y = 0 | |
| else: | |
| momentum_x = momentum * (trajectory_x[i] - trajectory_x[i - 1]) | |
| momentum_y = momentum * (trajectory_y[i] - trajectory_y[i - 1]) | |
| x = trajectory_x[i] + momentum_x | |
| y = trajectory_y[i] + momentum_y | |
| grad_x_val = float(grad_x.evalf(subs={'x': x, 'y': y})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': x, 'y': y})) | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate * grad_x_val | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate * grad_y_val | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_adam_trajectory_2d(function, initial_x, initial_y, learning_rate, rho1, rho2, epsilon, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| m_x, m_y = 0, 0 | |
| v_x, v_y = 0, 0 | |
| epsilon = 1e-8 | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| m_x = rho1 * m_x + (1 - rho1) * grad_x_val | |
| m_y = rho1 * m_y + (1 - rho1) * grad_y_val | |
| v_x = rho2 * v_x + (1 - rho2) * (grad_x_val ** 2) | |
| v_y = rho2 * v_y + (1 - rho2) * (grad_y_val ** 2) | |
| m_hat_x = m_x / (1 - rho1 ** (i + 1)) | |
| m_hat_y = m_y / (1 - rho1 ** (i + 1)) | |
| v_hat_x = v_x / (1 - rho2 ** (i + 1)) | |
| v_hat_y = v_y / (1 - rho2 ** (i + 1)) | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate * m_hat_x / np.sqrt(v_hat_x + epsilon) | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate * m_hat_y / np.sqrt(v_hat_y + epsilon) | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_newton_trajectory_2d(function, initial_x, initial_y, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| hess_xx, hess_xy, hess_yx, hess_yy = get_hessian_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| hess_xx_val = float(hess_xx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| hess_xy_val = float(hess_xy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| hess_yx_val = float(hess_yx.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| hess_yy_val = float(hess_yy.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| hessian_matrix = np.array( | |
| [ | |
| [hess_xx_val, hess_xy_val], | |
| [hess_yx_val, hess_yy_val] | |
| ], | |
| ) | |
| gradient_vector = np.array([grad_x_val, grad_y_val]) | |
| try: | |
| hessian_inv = np.linalg.inv(hessian_matrix) | |
| except np.linalg.LinAlgError: | |
| break | |
| step = hessian_inv @ gradient_vector | |
| trajectory_x[i + 1] = trajectory_x[i] - step[0] | |
| trajectory_y[i + 1] = trajectory_y[i] - step[1] | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_adagrad_trajectory_2d(function, initial_x, initial_y, learning_rate, epsilon, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| v_x = 0 | |
| v_y = 0 | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| v_x += grad_x_val ** 2 | |
| v_y += grad_y_val ** 2 | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_rmsprop_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| v_x = 0 | |
| v_y = 0 | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2) | |
| v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2) | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate / np.sqrt(v_x + epsilon) * grad_x_val | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate / np.sqrt(v_y + epsilon) * grad_y_val | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |
| def get_adadelta_trajectory_2d(function, initial_x, initial_y, learning_rate, rho, epsilon, num_steps): | |
| grad_x, grad_y = get_gradient_2d(function) | |
| trajectory_x = np.zeros(num_steps + 1) | |
| trajectory_y = np.zeros(num_steps + 1) | |
| trajectory_z = np.zeros(num_steps + 1) | |
| trajectory_x[0] = initial_x | |
| trajectory_y[0] = initial_y | |
| trajectory_z[0] = numexpr.evaluate(function, local_dict={'x': initial_x, 'y': initial_y}) | |
| v_x = 0 | |
| v_y = 0 | |
| s_x = 0 | |
| s_y = 0 | |
| for i in range(num_steps): | |
| grad_x_val = float(grad_x.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| grad_y_val = float(grad_y.evalf(subs={'x': trajectory_x[i], 'y': trajectory_y[i]})) | |
| v_x = rho * v_x + (1 - rho) * (grad_x_val ** 2) | |
| v_y = rho * v_y + (1 - rho) * (grad_y_val ** 2) | |
| del_x = np.sqrt(s_x + epsilon) / np.sqrt(v_x + epsilon) * grad_x_val | |
| del_y = np.sqrt(s_y + epsilon) / np.sqrt(v_y + epsilon) * grad_y_val | |
| s_x = rho * s_x + (1 - rho) * del_x ** 2 | |
| s_y = rho * s_y + (1 - rho) * del_y ** 2 | |
| trajectory_x[i + 1] = trajectory_x[i] - learning_rate * del_x | |
| trajectory_y[i + 1] = trajectory_y[i] - learning_rate * del_y | |
| trajectory_z[i + 1] = numexpr.evaluate(function, local_dict={'x': trajectory_x[i + 1], 'y': trajectory_y[i + 1]}) | |
| return trajectory_x, trajectory_y, trajectory_z | |