Spaces:
Running
Running
| import numpy as np | |
| from sympy import lambdify, Expr | |
| def _gradient_values(fx, fy, x: float, y: float) -> list: | |
| return [float(fx(x, y)), float(fy(x, y))] | |
| def _hessian_values(fxx, fxy, fyy, x: float, y: float) -> list: | |
| return [ | |
| [float(fxx(x, y)), float(fxy(x, y))], | |
| [float(fxy(x, y)), float(fyy(x, y))], | |
| ] | |
| def gd_univariate( | |
| function: Expr, | |
| x0: float, | |
| learning_rate: float, | |
| momentum: float, | |
| steps: int, | |
| ) -> dict: | |
| """ | |
| Perform gradient descent on a univariate function. | |
| Assumes function is valid and in terms of x | |
| """ | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| for i in range(steps - 1): | |
| if i == 0: | |
| m = 0 | |
| else: | |
| m = momentum * (x_values[-1] - x_values[-2]) | |
| x = x - learning_rate * f_prime(x) + m | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def gd_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| learning_rate: float, | |
| momentum: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| for i in range(steps - 1): | |
| if i == 0: | |
| mx = 0 | |
| my = 0 | |
| else: | |
| mx = momentum * (x_values[-1] - x_values[-2]) | |
| my = momentum * (y_values[-1] - y_values[-2]) | |
| x = x - learning_rate * fx(x, y) + mx | |
| y = y - learning_rate * fy(x, y) + my | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def nesterov_univariate( | |
| function: Expr, | |
| x0: float, | |
| learning_rate: float, | |
| momentum: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| for i in range(steps - 1): | |
| if i == 0: | |
| m = 0 | |
| else: | |
| m = momentum * (x_values[-1] - x_values[-2]) | |
| x_lookahead = x + m | |
| x = x_lookahead - learning_rate * f_prime(x_lookahead) | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def nesterov_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| learning_rate: float, | |
| momentum: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| for i in range(steps - 1): | |
| if i == 0: | |
| mx = 0 | |
| my = 0 | |
| else: | |
| mx = momentum * (x_values[-1] - x_values[-2]) | |
| my = momentum * (y_values[-1] - y_values[-2]) | |
| x_lookahead = x + mx | |
| y_lookahead = y + my | |
| x = x_lookahead - learning_rate * fx(x_lookahead, y_lookahead) | |
| y = y_lookahead - learning_rate * fy(x_lookahead, y_lookahead) | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def newton_univariate( | |
| function: Expr, | |
| x0: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| for i in range(steps - 1): | |
| x = x - f_prime(x) / f_prime_prime(x) | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def newton_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| for i in range(steps - 1): | |
| hessian = np.array( | |
| [ | |
| [fxx(x, y), fxy(x, y)], | |
| [fxy(x, y), fyy(x, y)], | |
| ], | |
| ) | |
| grad = np.array([fx(x, y), fy(x, y)]) | |
| try: | |
| # delta = hessian^-1 * grad | |
| delta = np.linalg.solve(hessian, grad) | |
| except np.linalg.LinAlgError: | |
| # singular hessian - cannot proceed | |
| break | |
| x = x - delta[0] | |
| y = y - delta[1] | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def adagrad_univariate( | |
| function: Expr, | |
| x0: float, | |
| learning_rate: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| v = 0 # accumulated squared gradients | |
| for i in range(steps - 1): | |
| g = f_prime(x) | |
| v += g ** 2 | |
| x = x - (learning_rate / (np.sqrt(v + epsilon))) * g | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def adagrad_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| learning_rate: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| # accumulated squared gradients | |
| vx = 0 | |
| vy = 0 | |
| for i in range(steps - 1): | |
| gx = fx(x, y) | |
| gy = fy(x, y) | |
| vx += gx ** 2 | |
| vy += gy ** 2 | |
| x = x - (learning_rate / (np.sqrt(vx + epsilon))) * gx | |
| y = y - (learning_rate / (np.sqrt(vy + epsilon))) * gy | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def rmsprop_univariate( | |
| function: Expr, | |
| x0: float, | |
| learning_rate: float, | |
| beta: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| v = 0 # exponentially weighted average of squared gradients | |
| for i in range(steps - 1): | |
| g = f_prime(x) | |
| v = beta * v + (1 - beta) * g ** 2 | |
| x = x - (learning_rate / (np.sqrt(v + epsilon))) * g | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def rmsprop_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| learning_rate: float, | |
| beta: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| # exponentially weighted average of squared gradients | |
| vx = 0 | |
| vy = 0 | |
| for i in range(steps - 1): | |
| gx = fx(x, y) | |
| gy = fy(x, y) | |
| vx = beta * vx + (1 - beta) * gx ** 2 | |
| vy = beta * vy + (1 - beta) * gy ** 2 | |
| x = x - (learning_rate / (np.sqrt(vx + epsilon))) * gx | |
| y = y - (learning_rate / (np.sqrt(vy + epsilon))) * gy | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def adadelta_univariate( | |
| function: Expr, | |
| x0: float, | |
| beta: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| v = 0 # exponentially weighted average of squared gradients | |
| s = 0 # exponentially weighted average of squared updates | |
| for i in range(steps - 1): | |
| g = f_prime(x) | |
| v = beta * v + (1 - beta) * g ** 2 | |
| delta_x = - (np.sqrt(s + epsilon) / np.sqrt(v + epsilon)) * g | |
| x = x + delta_x | |
| s = beta * s + (1 - beta) * delta_x ** 2 | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def adadelta_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| beta: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| # exponentially weighted average of squared gradients | |
| vx = 0 | |
| vy = 0 | |
| # exponentially weighted average of squared updates | |
| sx = 0 | |
| sy = 0 | |
| for i in range(steps - 1): | |
| gx = fx(x, y) | |
| gy = fy(x, y) | |
| vx = beta * vx + (1 - beta) * gx ** 2 | |
| vy = beta * vy + (1 - beta) * gy ** 2 | |
| delta_x = - (np.sqrt(sx + epsilon) / np.sqrt(vx + epsilon)) * gx | |
| delta_y = - (np.sqrt(sy + epsilon) / np.sqrt(vy + epsilon)) * gy | |
| x = x + delta_x | |
| y = y + delta_y | |
| sx = beta * sx + (1 - beta) * delta_x ** 2 | |
| sy = beta * sy + (1 - beta) * delta_y ** 2 | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |
| def adam_univariate( | |
| function: Expr, | |
| x0: float, | |
| learning_rate: float, | |
| beta1: float, | |
| beta2: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify('x', function, modules=['numpy']) | |
| f_prime = lambdify('x', function.diff('x'), modules=['numpy']) | |
| f_prime_prime = lambdify('x', function.diff('x', 2), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [f(x0)] | |
| derivative_values = [f_prime(x0)] | |
| second_derivative_values = [f_prime_prime(x0)] | |
| x = x0 | |
| m = 0 # first moment | |
| v = 0 # second moment | |
| for i in range(steps - 1): | |
| g = f_prime(x) | |
| m = beta1 * m + (1 - beta1) * g | |
| v = beta2 * v + (1 - beta2) * g ** 2 | |
| m_hat = m / (1 - beta1 ** (i + 1)) | |
| v_hat = v / (1 - beta2 ** (i + 1)) | |
| x = x - (learning_rate / (np.sqrt(v_hat) + epsilon)) * m_hat | |
| x_values.append(x) | |
| y_values.append(f(x)) | |
| derivative_values.append(f_prime(x)) | |
| second_derivative_values.append(f_prime_prime(x)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "derivative": derivative_values, | |
| "secondDerivative": second_derivative_values, | |
| } | |
| def adam_bivariate( | |
| function: Expr, | |
| x0: float, | |
| y0: float, | |
| learning_rate: float, | |
| beta1: float, | |
| beta2: float, | |
| epsilon: float, | |
| steps: int, | |
| ) -> dict: | |
| f = lambdify(('x', 'y'), function, modules=['numpy']) | |
| fx = lambdify(('x', 'y'), function.diff('x'), modules=['numpy']) | |
| fy = lambdify(('x', 'y'), function.diff('y'), modules=['numpy']) | |
| fxx = lambdify(('x', 'y'), function.diff('x', 2), modules=['numpy']) | |
| fyy = lambdify(('x', 'y'), function.diff('y', 2), modules=['numpy']) | |
| fxy = lambdify(('x', 'y'), function.diff('x', 'y'), modules=['numpy']) | |
| x_values = [x0] | |
| y_values = [y0] | |
| z_values = [f(x0, y0)] | |
| gradient_values = [_gradient_values(fx, fy, x0, y0)] | |
| hessian_values = [_hessian_values(fxx, fxy, fyy, x0, y0)] | |
| x = x0 | |
| y = y0 | |
| # first moments | |
| mx = 0 | |
| my = 0 | |
| # second moments | |
| vx = 0 | |
| vy = 0 | |
| for i in range(steps - 1): | |
| gx = fx(x, y) | |
| gy = fy(x, y) | |
| mx = beta1 * mx + (1 - beta1) * gx | |
| my = beta1 * my + (1 - beta1) * gy | |
| vx = beta2 * vx + (1 - beta2) * gx ** 2 | |
| vy = beta2 * vy + (1 - beta2) * gy ** 2 | |
| mx_hat = mx / (1 - beta1 ** (i + 1)) | |
| my_hat = my / (1 - beta1 ** (i + 1)) | |
| vx_hat = vx / (1 - beta2 ** (i + 1)) | |
| vy_hat = vy / (1 - beta2 ** (i + 1)) | |
| x = x - (learning_rate / (np.sqrt(vx_hat) + epsilon)) * mx_hat | |
| y = y - (learning_rate / (np.sqrt(vy_hat) + epsilon)) * my_hat | |
| x_values.append(x) | |
| y_values.append(y) | |
| z_values.append(f(x, y)) | |
| gradient_values.append(_gradient_values(fx, fy, x, y)) | |
| hessian_values.append(_hessian_values(fxx, fxy, fyy, x, y)) | |
| return { | |
| "x": x_values, | |
| "y": y_values, | |
| "z": z_values, | |
| "gradient": gradient_values, | |
| "hessian": hessian_values, | |
| } | |