import numpy as np from sympy import lambdify, Expr def _gradient_values(fx, fy, x: float, y: float) -> list: return [float(fx(x, y)), float(fy(x, y))] def _hessian_values(fxx, fxy, fyy, x: float, y: float) -> list: return [ [float(fxx(x, y)), float(fxy(x, y))], [float(fxy(x, y)), float(fyy(x, y))], ] def gd_univariate( function: Expr, x0: float, learning_rate: float, momentum: float, steps: int, ) -> dict: """ Perform gradient descent on a univariate function. Assumes function is valid and in terms of x """ f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 for i in range(steps - 1): if i == 0: m = 0 else: m = momentum * (x_values[-1] - x_values[-2]) x = x - learning_rate * f_prime(x) + m x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def gd_bivariate( function: Expr, x0: float, y0: float, learning_rate: float, momentum: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 for i in range(steps - 1): if i == 0: mx = 0 my = 0 else: mx = momentum * (x_values[-1] - x_values[-2]) my = momentum * (y_values[-1] - y_values[-2]) x = x - learning_rate * fx(x, y) + mx y = y - learning_rate * fy(x, y) + my x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def nesterov_univariate( function: Expr, x0: float, learning_rate: float, momentum: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 for i in range(steps - 1): if i == 0: m = 0 else: m = momentum * (x_values[-1] - x_values[-2]) x_lookahead = x + m x = x_lookahead - learning_rate * f_prime(x_lookahead) x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def nesterov_bivariate( function: Expr, x0: float, y0: float, learning_rate: float, momentum: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 for i in range(steps - 1): if i == 0: mx = 0 my = 0 else: mx = momentum * (x_values[-1] - x_values[-2]) my = momentum * (y_values[-1] - y_values[-2]) x_lookahead = x + mx y_lookahead = y + my x = x_lookahead - learning_rate * fx(x_lookahead, y_lookahead) y = y_lookahead - learning_rate * fy(x_lookahead, y_lookahead) x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def newton_univariate( function: Expr, x0: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 for i in range(steps - 1): x = x - f_prime(x) / f_prime_prime(x) x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def newton_bivariate( function: Expr, x0: float, y0: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 for i in range(steps - 1): hessian = np.array( [ [fxx(x, y), fxy(x, y)], [fxy(x, y), fyy(x, y)], ], ) grad = np.array([fx(x, y), fy(x, y)]) try: # delta = hessian^-1 * grad delta = np.linalg.solve(hessian, grad) except np.linalg.LinAlgError: # singular hessian - cannot proceed break x = x - delta[0] y = y - delta[1] x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def adagrad_univariate( function: Expr, x0: float, learning_rate: float, epsilon: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 v = 0 # accumulated squared gradients for i in range(steps - 1): g = f_prime(x) v += g ** 2 x = x - (learning_rate / (np.sqrt(v + epsilon))) * g x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def adagrad_bivariate( function: Expr, x0: float, y0: float, learning_rate: float, epsilon: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 # accumulated squared gradients vx = 0 vy = 0 for i in range(steps - 1): gx = fx(x, y) gy = fy(x, y) vx += gx ** 2 vy += gy ** 2 x = x - (learning_rate / (np.sqrt(vx + epsilon))) * gx y = y - (learning_rate / (np.sqrt(vy + epsilon))) * gy x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def rmsprop_univariate( function: Expr, x0: float, learning_rate: float, beta: float, epsilon: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 v = 0 # exponentially weighted average of squared gradients for i in range(steps - 1): g = f_prime(x) v = beta * v + (1 - beta) * g ** 2 x = x - (learning_rate / (np.sqrt(v + epsilon))) * g x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def rmsprop_bivariate( function: Expr, x0: float, y0: float, learning_rate: float, beta: float, epsilon: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 # exponentially weighted average of squared gradients vx = 0 vy = 0 for i in range(steps - 1): gx = fx(x, y) gy = fy(x, y) vx = beta * vx + (1 - beta) * gx ** 2 vy = beta * vy + (1 - beta) * gy ** 2 x = x - (learning_rate / (np.sqrt(vx + epsilon))) * gx y = y - (learning_rate / (np.sqrt(vy + epsilon))) * gy x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def adadelta_univariate( function: Expr, x0: float, beta: float, epsilon: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 v = 0 # exponentially weighted average of squared gradients s = 0 # exponentially weighted average of squared updates for i in range(steps - 1): g = f_prime(x) v = beta * v + (1 - beta) * g ** 2 delta_x = - (np.sqrt(s + epsilon) / np.sqrt(v + epsilon)) * g x = x + delta_x s = beta * s + (1 - beta) * delta_x ** 2 x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def adadelta_bivariate( function: Expr, x0: float, y0: float, beta: float, epsilon: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 # exponentially weighted average of squared gradients vx = 0 vy = 0 # exponentially weighted average of squared updates sx = 0 sy = 0 for i in range(steps - 1): gx = fx(x, y) gy = fy(x, y) vx = beta * vx + (1 - beta) * gx ** 2 vy = beta * vy + (1 - beta) * gy ** 2 delta_x = - (np.sqrt(sx + epsilon) / np.sqrt(vx + epsilon)) * gx delta_y = - (np.sqrt(sy + epsilon) / np.sqrt(vy + epsilon)) * gy x = x + delta_x y = y + delta_y sx = beta * sx + (1 - beta) * delta_x ** 2 sy = beta * sy + (1 - beta) * delta_y ** 2 x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, } def adam_univariate( function: Expr, x0: float, learning_rate: float, beta1: float, beta2: float, epsilon: float, steps: int, ) -> dict: f = lambdify('x', function, modules=['numpy']) f_prime = lambdify('x', function.diff('x'), modules=['numpy']) f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) x_values = [x0] y_values = [f(x0)] derivative_values = [f_prime(x0)] second_derivative_values = [f_prime_prime(x0)] x = x0 m = 0 # first moment v = 0 # second moment for i in range(steps - 1): g = f_prime(x) m = beta1 * m + (1 - beta1) * g v = beta2 * v + (1 - beta2) * g ** 2 m_hat = m / (1 - beta1 ** (i + 1)) v_hat = v / (1 - beta2 ** (i + 1)) x = x - (learning_rate / (np.sqrt(v_hat) + epsilon)) * m_hat x_values.append(x) y_values.append(f(x)) derivative_values.append(f_prime(x)) second_derivative_values.append(f_prime_prime(x)) return { "x": x_values, "y": y_values, "derivative": derivative_values, "secondDerivative": second_derivative_values, } def adam_bivariate( function: Expr, x0: float, y0: float, learning_rate: float, beta1: float, beta2: float, epsilon: float, steps: int, ) -> dict: f = lambdify(('x', 'y'), function, modules=['numpy']) fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) x_values = [x0] y_values = [y0] z_values = [f(x0, y0)] gradient_values = [_gradient_values(fx, fy, x0, y0)] hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] x = x0 y = y0 # first moments mx = 0 my = 0 # second moments vx = 0 vy = 0 for i in range(steps - 1): gx = fx(x, y) gy = fy(x, y) mx = beta1 * mx + (1 - beta1) * gx my = beta1 * my + (1 - beta1) * gy vx = beta2 * vx + (1 - beta2) * gx ** 2 vy = beta2 * vy + (1 - beta2) * gy ** 2 mx_hat = mx / (1 - beta1 ** (i + 1)) my_hat = my / (1 - beta1 ** (i + 1)) vx_hat = vx / (1 - beta2 ** (i + 1)) vy_hat = vy / (1 - beta2 ** (i + 1)) x = x - (learning_rate / (np.sqrt(vx_hat) + epsilon)) * mx_hat y = y - (learning_rate / (np.sqrt(vy_hat) + epsilon)) * my_hat x_values.append(x) y_values.append(y) z_values.append(f(x, y)) gradient_values.append(_gradient_values(fx, fy, x, y)) hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) return { "x": x_values, "y": y_values, "z": z_values, "gradient": gradient_values, "hessian": hessian_values, }