Spaces:
Running
Running
| import io | |
| import gradio as gr | |
| import matplotlib.colors as mcolors | |
| import matplotlib.pyplot as plt | |
| import numexpr | |
| import numpy as np | |
| from PIL import Image | |
| import logging | |
| logging.basicConfig( | |
| level=logging.INFO, # set minimum level to capture (DEBUG, INFO, WARNING, ERROR, CRITICAL) | |
| format="%(asctime)s [%(levelname)s] %(message)s", # log format | |
| ) | |
| logger = logging.getLogger("ELVIS") | |
| from optimisers import ( | |
| get_gradient_2d, | |
| get_hessian_2d, | |
| get_gd_trajectory_2d, | |
| get_nesterov_trajectory_2d, | |
| get_adagrad_trajectory_2d, | |
| get_rmsprop_trajectory_2d, | |
| get_adadelta_trajectory_2d, | |
| get_adam_trajectory_2d, | |
| get_newton_trajectory_2d, | |
| ) | |
| def format_gradient(xx, yy): | |
| return f"[{xx}, {yy}]" | |
| def format_hessian(xx, xy, yx, yy): | |
| return f"[[{xx}, {xy}], [{yx}, {yy}]]" | |
| class Bivariate: | |
| DEFAULT_FUNCTION = "x**2 + 9 * y**2" | |
| DEFAULT_INIT_X = 0.5 | |
| DEFAULT_INIT_Y = 0.5 | |
| DEFAULT_OPTIMISER = "Gradient Descent" | |
| DEFAULT_NUM_STEPS = 20 | |
| DEFAULT_LEARNING_RATE = 0.1 | |
| DEFAULT_MOMENTUM = 0 | |
| DEFAULT_EPS = 1e-8 | |
| DEFAULT_RHO_RMSPROP = 0.99 | |
| DEFAULT_RHO_ADADELTA = 0.9 | |
| DEFAULT_EPS_ADADELTA = 1e-2 | |
| DEFAULT_RHO1_ADAM = 0.9 | |
| DEFAULT_RHO2_ADAM = 0.999 | |
| def __init__(self, width=1200, height=900): | |
| self.width = width | |
| self.height = height | |
| self.function = self.DEFAULT_FUNCTION | |
| self.initial_x = self.DEFAULT_INIT_X | |
| self.initial_y = self.DEFAULT_INIT_Y | |
| # common optimisation hyperparameters | |
| self.optimiser_type = self.DEFAULT_OPTIMISER | |
| self.num_steps = self.DEFAULT_NUM_STEPS | |
| self.learning_rate = self.DEFAULT_LEARNING_RATE | |
| # gradient descent and nesterov only | |
| self.momentum = self.DEFAULT_MOMENTUM | |
| # adaptive gradients | |
| self.rho_rmsprop = self.DEFAULT_RHO_RMSPROP | |
| self.rho_adadelta = self.DEFAULT_RHO_ADADELTA | |
| self.rho1_adam = self.DEFAULT_RHO1_ADAM | |
| self.rho2_adam = self.DEFAULT_RHO2_ADAM | |
| self.eps = self.DEFAULT_EPS | |
| self.eps_adadelta = self.DEFAULT_EPS_ADADELTA | |
| self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() | |
| self.trajectory_idx = 0 | |
| self.plots = [] | |
| self.generate_plots() | |
| def get_optimisation_trajectory(self): | |
| if self.optimiser_type == "Gradient Descent": | |
| return get_gd_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.momentum, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "Nesterov": | |
| return get_nesterov_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.momentum, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "AdaGrad": | |
| return get_adagrad_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.eps, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "RMSProp": | |
| return get_rmsprop_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.rho_rmsprop, | |
| self.eps, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "AdaDelta": | |
| return get_adadelta_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.rho_adadelta, | |
| self.eps_adadelta, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "Adam": | |
| return get_adam_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.learning_rate, | |
| self.rho1_adam, | |
| self.rho2_adam, | |
| self.eps, | |
| self.num_steps, | |
| ) | |
| elif self.optimiser_type == "Newton": | |
| return get_newton_trajectory_2d( | |
| self.function, | |
| self.initial_x, | |
| self.initial_y, | |
| self.num_steps, | |
| ) | |
| else: | |
| raise ValueError(f"Unknown optimiser type: {self.optimiser_type}") | |
| def generate_plots(self): | |
| self.plots.clear() | |
| fig, ax = plt.subplots() | |
| cbar = None | |
| # precompute for [-1, 1] domain | |
| x = np.linspace(-1, 1, 100) | |
| y = np.linspace(-1, 1, 100) | |
| xx, yy = np.meshgrid(x, y) | |
| try: | |
| zz = numexpr.evaluate(self.function, local_dict={'x': xx, 'y': yy}) | |
| except Exception as e: | |
| logger.error("Error evaluating function '%s': %s", function, e) | |
| zz = np.zeros_like(xx) | |
| norm = mcolors.Normalize(vmin=zz.min(), vmax=zz.max()) | |
| for idx in range(self.num_steps): | |
| x_radius = np.maximum(np.abs(np.min(self.trajectory_x[:idx + 1])), np.abs(np.max(self.trajectory_x[:idx + 1]))) | |
| y_radius = np.maximum(np.abs(np.min(self.trajectory_y[:idx + 1])), np.abs(np.max(self.trajectory_y[:idx + 1]))) | |
| radius = np.maximum(x_radius, y_radius) | |
| if radius > 1: | |
| x = np.linspace(-1.2 * radius, 1.2 * radius, 100) | |
| y = np.linspace(-1.2 * radius, 1.2 * radius, 100) | |
| xx, yy = np.meshgrid(x, y) | |
| try: | |
| zz = numexpr.evaluate(self.function, local_dict={'x': xx, 'y': yy}) | |
| except Exception as e: | |
| logger.error("Error evaluating function '%s': %s", function, e) | |
| zz = np.zeros_like(xx) | |
| ax.clear() | |
| countour = ax.contourf(xx, yy, zz, levels=50, cmap='viridis', norm=norm) | |
| ax.plot(self.trajectory_x[:idx + 1], self.trajectory_y[:idx + 1], marker='o', color='indianred') | |
| ax.plot(self.trajectory_x[idx], self.trajectory_y[idx], marker='o', color='red') | |
| if cbar is None: | |
| cbar = fig.colorbar(countour, ax=ax) | |
| else: | |
| cbar.update_normal(countour) | |
| ax.set_xlabel("x") | |
| ax.set_ylabel("y") | |
| cbar.set_label("f(x, y)") | |
| buf = io.BytesIO() | |
| fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) | |
| plt.close(fig) | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| self.plots.append(img) | |
| def update_plot(self): | |
| plot = self.plots[self.trajectory_idx] | |
| self.plot = plot | |
| return plot | |
| def handle_trajectory_change(self): | |
| self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() | |
| self.trajectory_idx = 0 | |
| self.generate_plots() | |
| self.update_plot() | |
| def handle_slider_change(self, trajectory_idx): | |
| self.trajectory_idx = trajectory_idx | |
| return self.update_plot() | |
| def handle_trajectory_button(self): | |
| if self.trajectory_idx < self.num_steps - 1: | |
| self.trajectory_idx += 1 | |
| # plot is updated from slider changing | |
| return self.trajectory_idx | |
| def handle_function_change(self, function): | |
| self.function = function | |
| self.handle_trajectory_change() | |
| gradient = get_gradient_2d(function) | |
| hessian = get_hessian_2d(function) | |
| return format_gradient(*gradient), format_hessian(*hessian), self.trajectory_idx, self.plot | |
| def handle_learning_rate_change(self, learning_rate): | |
| self.learning_rate = learning_rate | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_momentum_change(self, momentum): | |
| self.momentum = momentum | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_rho_rmsprop_change(self, rho_rmsprop): | |
| self.rho_rmsprop = rho_rmsprop | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_rho_adadelta_change(self, rho_adadelta): | |
| self.rho_adadelta = rho_adadelta | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_rho1_adam_change(self, rho1_adam): | |
| self.rho1_adam = rho1_adam | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_rho2_adam_change(self, rho2_adam): | |
| self.rho2_adam = rho2_adam | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_eps_change(self, eps): | |
| self.eps = eps | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_eps_adadelta_change(self, eps_adadelta): | |
| self.eps_adadelta = eps_adadelta | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_initial_x_change(self, initial_x): | |
| self.initial_x = initial_x | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_initial_y_change(self, initial_y): | |
| self.initial_y = initial_y | |
| self.handle_trajectory_change() | |
| return self.trajectory_idx, self.plot | |
| def handle_optimiser_change(self, optimiser_type): | |
| self.optimiser_type = optimiser_type | |
| self.handle_trajectory_change() | |
| if optimiser_type == "Gradient Descent": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=True) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=False) | |
| eps_adadelta_update = gr.update(visible=False) | |
| elif optimiser_type == "Newton": | |
| hessian_update = gr.update(visible=True) | |
| learning_rate_update = gr.update(visible=False) | |
| momentum_update = gr.update(visible=False) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=False) | |
| eps_adadelta_update = gr.update(visible=False) | |
| elif optimiser_type == "Nesterov": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=True) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=False) | |
| eps_adadelta_update = gr.update(visible=False) | |
| elif optimiser_type == "AdaGrad": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=False) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=True) | |
| eps_adadelta_update = gr.update(visible=False) | |
| elif optimiser_type == "RMSProp": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=False) | |
| rho_rmsprop_update = gr.update(visible=True) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=True) | |
| eps_adadelta_update = gr.update(visible=False) | |
| elif optimiser_type == "AdaDelta": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=False) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=True) | |
| rho1_adam_upate = gr.update(visible=False) | |
| rho2_adam_update = gr.update(visible=False) | |
| eps_update = gr.update(visible=False) | |
| eps_adadelta_update = gr.update(visible=True) | |
| elif optimiser_type == "Adam": | |
| hessian_update = gr.update(visible=False) | |
| learning_rate_update = gr.update(visible=True) | |
| momentum_update = gr.update(visible=False) | |
| rho_rmsprop_update = gr.update(visible=False) | |
| rho_adadelta_update = gr.update(visible=False) | |
| rho1_adam_upate = gr.update(visible=True) | |
| rho2_adam_update = gr.update(visible=True) | |
| eps_update = gr.update(visible=True) | |
| eps_adadelta_update = gr.update(visible=False) | |
| else: | |
| raise ValueError(f"Unknown optimiser type: {optimiser_type}") | |
| return ( | |
| hessian_update, | |
| learning_rate_update, | |
| momentum_update, | |
| rho_rmsprop_update, | |
| rho_adadelta_update, | |
| rho1_adam_upate, | |
| rho2_adam_update, | |
| eps_update, | |
| eps_adadelta_update, | |
| self.trajectory_idx, | |
| self.plot, | |
| ) | |
| def reset(self): | |
| self.function = self.DEFAULT_FUNCTION | |
| self.initial_x = self.DEFAULT_INIT_X | |
| self.initial_y = self.DEFAULT_INIT_Y | |
| self.num_steps = self.DEFAULT_NUM_STEPS | |
| self.optimiser_type = self.DEFAULT_OPTIMISER | |
| self.learning_rate = self.DEFAULT_LEARNING_RATE | |
| # gradient descent and nesterov only | |
| self.momentum = self.DEFAULT_MOMENTUM | |
| # adaptive gradients | |
| self.rho_rmsprop = self.DEFAULT_RHO_RMSPROP | |
| self.rho_adadelta = self.DEFAULT_RHO_ADADELTA | |
| self.rho1_adam = self.DEFAULT_RHO1_ADAM | |
| self.rho2_adam = self.DEFAULT_RHO2_ADAM | |
| self.eps = self.DEFAULT_EPS | |
| self.eps_adadelta = self.DEFAULT_EPS_ADADELTA | |
| self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() | |
| self.trajectory_idx = 0 | |
| self.plots = [] | |
| self.generate_plots() | |
| def build(self): | |
| with gr.Tab("Bivariate"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| self.plot = gr.Image( | |
| value=self.update_plot(), | |
| container=True, | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Tab("Settings"): | |
| function = gr.Textbox( | |
| label="Function", | |
| value=self.function, | |
| interactive=True, | |
| container=True, | |
| ) | |
| gradient = gr.Textbox( | |
| label="Gradient", | |
| value=format_gradient(*get_gradient_2d(self.function)), | |
| interactive=False, | |
| container=True, | |
| ) | |
| hessian = gr.Textbox( | |
| label="Hessian", | |
| value=format_hessian(*get_hessian_2d(self.function)), | |
| interactive=False, | |
| container=True, | |
| visible=False, | |
| ) | |
| with gr.Row(): | |
| optimiser_type = gr.Dropdown( | |
| label="Optimiser", | |
| choices=["Gradient Descent", "Nesterov", "AdaGrad", "RMSProp", "AdaDelta", "Adam", "Newton"], | |
| value=self.optimiser_type, | |
| interactive=True, | |
| container=True, | |
| ) | |
| with gr.Row(): | |
| initial_x = gr.Number( | |
| label="Initial x", | |
| value=self.initial_x, | |
| interactive=True, | |
| container=True, | |
| ) | |
| initial_y = gr.Number( | |
| label="Initial y", | |
| value=self.initial_y, | |
| interactive=True, | |
| container=True, | |
| ) | |
| # hyperparameter row | |
| with gr.Row(): | |
| learning_rate = gr.Number( | |
| label="Learning Rate", | |
| value=self.learning_rate, | |
| interactive=True, | |
| container=True, | |
| ) | |
| momentum = gr.Number( | |
| label="Momentum", | |
| value=self.momentum, | |
| interactive=True, | |
| container=True, | |
| ) | |
| rho_rmsprop = gr.Number( | |
| label="rho", | |
| value=self.DEFAULT_RHO_RMSPROP, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| rho_adadelta = gr.Number( | |
| label="rho", | |
| value=self.DEFAULT_RHO_ADADELTA, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| rho1_adam = gr.Number( | |
| label="rho1", | |
| value=self.DEFAULT_RHO1_ADAM, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| rho2_adam = gr.Number( | |
| label="rho2", | |
| value=self.DEFAULT_RHO2_ADAM, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| eps = gr.Number( | |
| label="Epsilon", | |
| value=self.eps, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| eps_adadelta = gr.Number( | |
| label="Epsilon", | |
| value=self.eps_adadelta, | |
| interactive=True, | |
| container=True, | |
| visible=False, | |
| ) | |
| with gr.Tab("Trajectory"): | |
| trajectory_slider = gr.Slider( | |
| label="Optimisation step", | |
| minimum=0, | |
| maximum=self.num_steps - 1, | |
| step=1, | |
| value=0, | |
| interactive=True, | |
| ) | |
| trajectory_button = gr.Button("Optimisation step") | |
| function.submit( | |
| self.handle_function_change, | |
| inputs=[function], | |
| outputs=[gradient, hessian, trajectory_slider, self.plot], | |
| ) | |
| optimiser_type.change( | |
| self.handle_optimiser_change, | |
| inputs=[optimiser_type], | |
| outputs=[hessian, learning_rate, momentum, rho_rmsprop, rho_adadelta, rho1_adam, rho2_adam, eps, eps_adadelta, trajectory_slider, self.plot], | |
| ) | |
| initial_x.submit( | |
| self.handle_initial_x_change, | |
| inputs=[initial_x], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| initial_y.submit( | |
| self.handle_initial_y_change, | |
| inputs=[initial_y], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| learning_rate.submit( | |
| self.handle_learning_rate_change, | |
| inputs=[learning_rate], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| momentum.submit( | |
| self.handle_momentum_change, | |
| inputs=[momentum], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| rho_rmsprop.submit( | |
| self.handle_rho_rmsprop_change, | |
| inputs=[rho_rmsprop], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| rho_adadelta.submit( | |
| self.handle_rho_adadelta_change, | |
| inputs=[rho_adadelta], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| rho1_adam.submit( | |
| self.handle_rho1_adam_change, | |
| inputs=[rho1_adam], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| rho2_adam.submit( | |
| self.handle_rho2_adam_change, | |
| inputs=[rho2_adam], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| eps.submit( | |
| self.handle_eps_change, | |
| inputs=[eps], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| eps_adadelta.submit( | |
| self.handle_eps_adadelta_change, | |
| inputs=[eps_adadelta], | |
| outputs=[trajectory_slider, self.plot], | |
| ) | |
| trajectory_slider.change( | |
| self.handle_slider_change, | |
| inputs=[trajectory_slider], | |
| outputs=[self.plot], | |
| ) | |
| trajectory_button.click( | |
| self.handle_trajectory_button, | |
| outputs=[trajectory_slider], | |
| ) | |