import io import gradio as gr import matplotlib.colors as mcolors import matplotlib.pyplot as plt import numexpr import numpy as np from PIL import Image import logging logging.basicConfig( level=logging.INFO, # set minimum level to capture (DEBUG, INFO, WARNING, ERROR, CRITICAL) format="%(asctime)s [%(levelname)s] %(message)s", # log format ) logger = logging.getLogger("ELVIS") from optimisers import ( get_gradient_2d, get_hessian_2d, get_gd_trajectory_2d, get_nesterov_trajectory_2d, get_adagrad_trajectory_2d, get_rmsprop_trajectory_2d, get_adadelta_trajectory_2d, get_adam_trajectory_2d, get_newton_trajectory_2d, ) def format_gradient(xx, yy): return f"[{xx}, {yy}]" def format_hessian(xx, xy, yx, yy): return f"[[{xx}, {xy}], [{yx}, {yy}]]" class Bivariate: DEFAULT_FUNCTION = "x**2 + 9 * y**2" DEFAULT_INIT_X = 0.5 DEFAULT_INIT_Y = 0.5 DEFAULT_OPTIMISER = "Gradient Descent" DEFAULT_NUM_STEPS = 20 DEFAULT_LEARNING_RATE = 0.1 DEFAULT_MOMENTUM = 0 DEFAULT_EPS = 1e-8 DEFAULT_RHO_RMSPROP = 0.99 DEFAULT_RHO_ADADELTA = 0.9 DEFAULT_EPS_ADADELTA = 1e-2 DEFAULT_RHO1_ADAM = 0.9 DEFAULT_RHO2_ADAM = 0.999 def __init__(self, width=1200, height=900): self.width = width self.height = height self.function = self.DEFAULT_FUNCTION self.initial_x = self.DEFAULT_INIT_X self.initial_y = self.DEFAULT_INIT_Y # common optimisation hyperparameters self.optimiser_type = self.DEFAULT_OPTIMISER self.num_steps = self.DEFAULT_NUM_STEPS self.learning_rate = self.DEFAULT_LEARNING_RATE # gradient descent and nesterov only self.momentum = self.DEFAULT_MOMENTUM # adaptive gradients self.rho_rmsprop = self.DEFAULT_RHO_RMSPROP self.rho_adadelta = self.DEFAULT_RHO_ADADELTA self.rho1_adam = self.DEFAULT_RHO1_ADAM self.rho2_adam = self.DEFAULT_RHO2_ADAM self.eps = self.DEFAULT_EPS self.eps_adadelta = self.DEFAULT_EPS_ADADELTA self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() self.trajectory_idx = 0 self.plots = [] self.generate_plots() def get_optimisation_trajectory(self): if self.optimiser_type == "Gradient Descent": return get_gd_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.momentum, self.num_steps, ) elif self.optimiser_type == "Nesterov": return get_nesterov_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.momentum, self.num_steps, ) elif self.optimiser_type == "AdaGrad": return get_adagrad_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.eps, self.num_steps, ) elif self.optimiser_type == "RMSProp": return get_rmsprop_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.rho_rmsprop, self.eps, self.num_steps, ) elif self.optimiser_type == "AdaDelta": return get_adadelta_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.rho_adadelta, self.eps_adadelta, self.num_steps, ) elif self.optimiser_type == "Adam": return get_adam_trajectory_2d( self.function, self.initial_x, self.initial_y, self.learning_rate, self.rho1_adam, self.rho2_adam, self.eps, self.num_steps, ) elif self.optimiser_type == "Newton": return get_newton_trajectory_2d( self.function, self.initial_x, self.initial_y, self.num_steps, ) else: raise ValueError(f"Unknown optimiser type: {self.optimiser_type}") def generate_plots(self): self.plots.clear() fig, ax = plt.subplots() cbar = None # precompute for [-1, 1] domain x = np.linspace(-1, 1, 100) y = np.linspace(-1, 1, 100) xx, yy = np.meshgrid(x, y) try: zz = numexpr.evaluate(self.function, local_dict={'x': xx, 'y': yy}) except Exception as e: logger.error("Error evaluating function '%s': %s", function, e) zz = np.zeros_like(xx) norm = mcolors.Normalize(vmin=zz.min(), vmax=zz.max()) for idx in range(self.num_steps): x_radius = np.maximum(np.abs(np.min(self.trajectory_x[:idx + 1])), np.abs(np.max(self.trajectory_x[:idx + 1]))) y_radius = np.maximum(np.abs(np.min(self.trajectory_y[:idx + 1])), np.abs(np.max(self.trajectory_y[:idx + 1]))) radius = np.maximum(x_radius, y_radius) if radius > 1: x = np.linspace(-1.2 * radius, 1.2 * radius, 100) y = np.linspace(-1.2 * radius, 1.2 * radius, 100) xx, yy = np.meshgrid(x, y) try: zz = numexpr.evaluate(self.function, local_dict={'x': xx, 'y': yy}) except Exception as e: logger.error("Error evaluating function '%s': %s", function, e) zz = np.zeros_like(xx) ax.clear() countour = ax.contourf(xx, yy, zz, levels=50, cmap='viridis', norm=norm) ax.plot(self.trajectory_x[:idx + 1], self.trajectory_y[:idx + 1], marker='o', color='indianred') ax.plot(self.trajectory_x[idx], self.trajectory_y[idx], marker='o', color='red') if cbar is None: cbar = fig.colorbar(countour, ax=ax) else: cbar.update_normal(countour) ax.set_xlabel("x") ax.set_ylabel("y") cbar.set_label("f(x, y)") buf = io.BytesIO() fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) plt.close(fig) buf.seek(0) img = Image.open(buf) self.plots.append(img) def update_plot(self): plot = self.plots[self.trajectory_idx] self.plot = plot return plot def handle_trajectory_change(self): self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() self.trajectory_idx = 0 self.generate_plots() self.update_plot() def handle_slider_change(self, trajectory_idx): self.trajectory_idx = trajectory_idx return self.update_plot() def handle_trajectory_button(self): if self.trajectory_idx < self.num_steps - 1: self.trajectory_idx += 1 # plot is updated from slider changing return self.trajectory_idx def handle_function_change(self, function): self.function = function self.handle_trajectory_change() gradient = get_gradient_2d(function) hessian = get_hessian_2d(function) return format_gradient(*gradient), format_hessian(*hessian), self.trajectory_idx, self.plot def handle_learning_rate_change(self, learning_rate): self.learning_rate = learning_rate self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_momentum_change(self, momentum): self.momentum = momentum self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_rho_rmsprop_change(self, rho_rmsprop): self.rho_rmsprop = rho_rmsprop self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_rho_adadelta_change(self, rho_adadelta): self.rho_adadelta = rho_adadelta self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_rho1_adam_change(self, rho1_adam): self.rho1_adam = rho1_adam self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_rho2_adam_change(self, rho2_adam): self.rho2_adam = rho2_adam self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_eps_change(self, eps): self.eps = eps self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_eps_adadelta_change(self, eps_adadelta): self.eps_adadelta = eps_adadelta self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_initial_x_change(self, initial_x): self.initial_x = initial_x self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_initial_y_change(self, initial_y): self.initial_y = initial_y self.handle_trajectory_change() return self.trajectory_idx, self.plot def handle_optimiser_change(self, optimiser_type): self.optimiser_type = optimiser_type self.handle_trajectory_change() if optimiser_type == "Gradient Descent": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=True) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=False) eps_adadelta_update = gr.update(visible=False) elif optimiser_type == "Newton": hessian_update = gr.update(visible=True) learning_rate_update = gr.update(visible=False) momentum_update = gr.update(visible=False) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=False) eps_adadelta_update = gr.update(visible=False) elif optimiser_type == "Nesterov": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=True) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=False) eps_adadelta_update = gr.update(visible=False) elif optimiser_type == "AdaGrad": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=False) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=True) eps_adadelta_update = gr.update(visible=False) elif optimiser_type == "RMSProp": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=False) rho_rmsprop_update = gr.update(visible=True) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=True) eps_adadelta_update = gr.update(visible=False) elif optimiser_type == "AdaDelta": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=False) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=True) rho1_adam_upate = gr.update(visible=False) rho2_adam_update = gr.update(visible=False) eps_update = gr.update(visible=False) eps_adadelta_update = gr.update(visible=True) elif optimiser_type == "Adam": hessian_update = gr.update(visible=False) learning_rate_update = gr.update(visible=True) momentum_update = gr.update(visible=False) rho_rmsprop_update = gr.update(visible=False) rho_adadelta_update = gr.update(visible=False) rho1_adam_upate = gr.update(visible=True) rho2_adam_update = gr.update(visible=True) eps_update = gr.update(visible=True) eps_adadelta_update = gr.update(visible=False) else: raise ValueError(f"Unknown optimiser type: {optimiser_type}") return ( hessian_update, learning_rate_update, momentum_update, rho_rmsprop_update, rho_adadelta_update, rho1_adam_upate, rho2_adam_update, eps_update, eps_adadelta_update, self.trajectory_idx, self.plot, ) def reset(self): self.function = self.DEFAULT_FUNCTION self.initial_x = self.DEFAULT_INIT_X self.initial_y = self.DEFAULT_INIT_Y self.num_steps = self.DEFAULT_NUM_STEPS self.optimiser_type = self.DEFAULT_OPTIMISER self.learning_rate = self.DEFAULT_LEARNING_RATE # gradient descent and nesterov only self.momentum = self.DEFAULT_MOMENTUM # adaptive gradients self.rho_rmsprop = self.DEFAULT_RHO_RMSPROP self.rho_adadelta = self.DEFAULT_RHO_ADADELTA self.rho1_adam = self.DEFAULT_RHO1_ADAM self.rho2_adam = self.DEFAULT_RHO2_ADAM self.eps = self.DEFAULT_EPS self.eps_adadelta = self.DEFAULT_EPS_ADADELTA self.trajectory_x, self.trajectory_y, self.trajectory_z = self.get_optimisation_trajectory() self.trajectory_idx = 0 self.plots = [] self.generate_plots() def build(self): with gr.Tab("Bivariate"): with gr.Row(): with gr.Column(scale=2): self.plot = gr.Image( value=self.update_plot(), container=True, ) with gr.Column(scale=1): with gr.Tab("Settings"): function = gr.Textbox( label="Function", value=self.function, interactive=True, container=True, ) gradient = gr.Textbox( label="Gradient", value=format_gradient(*get_gradient_2d(self.function)), interactive=False, container=True, ) hessian = gr.Textbox( label="Hessian", value=format_hessian(*get_hessian_2d(self.function)), interactive=False, container=True, visible=False, ) with gr.Row(): optimiser_type = gr.Dropdown( label="Optimiser", choices=["Gradient Descent", "Nesterov", "AdaGrad", "RMSProp", "AdaDelta", "Adam", "Newton"], value=self.optimiser_type, interactive=True, container=True, ) with gr.Row(): initial_x = gr.Number( label="Initial x", value=self.initial_x, interactive=True, container=True, ) initial_y = gr.Number( label="Initial y", value=self.initial_y, interactive=True, container=True, ) # hyperparameter row with gr.Row(): learning_rate = gr.Number( label="Learning Rate", value=self.learning_rate, interactive=True, container=True, ) momentum = gr.Number( label="Momentum", value=self.momentum, interactive=True, container=True, ) rho_rmsprop = gr.Number( label="rho", value=self.DEFAULT_RHO_RMSPROP, interactive=True, container=True, visible=False, ) rho_adadelta = gr.Number( label="rho", value=self.DEFAULT_RHO_ADADELTA, interactive=True, container=True, visible=False, ) rho1_adam = gr.Number( label="rho1", value=self.DEFAULT_RHO1_ADAM, interactive=True, container=True, visible=False, ) rho2_adam = gr.Number( label="rho2", value=self.DEFAULT_RHO2_ADAM, interactive=True, container=True, visible=False, ) eps = gr.Number( label="Epsilon", value=self.eps, interactive=True, container=True, visible=False, ) eps_adadelta = gr.Number( label="Epsilon", value=self.eps_adadelta, interactive=True, container=True, visible=False, ) with gr.Tab("Trajectory"): trajectory_slider = gr.Slider( label="Optimisation step", minimum=0, maximum=self.num_steps - 1, step=1, value=0, interactive=True, ) trajectory_button = gr.Button("Optimisation step") function.submit( self.handle_function_change, inputs=[function], outputs=[gradient, hessian, trajectory_slider, self.plot], ) optimiser_type.change( self.handle_optimiser_change, inputs=[optimiser_type], outputs=[hessian, learning_rate, momentum, rho_rmsprop, rho_adadelta, rho1_adam, rho2_adam, eps, eps_adadelta, trajectory_slider, self.plot], ) initial_x.submit( self.handle_initial_x_change, inputs=[initial_x], outputs=[trajectory_slider, self.plot], ) initial_y.submit( self.handle_initial_y_change, inputs=[initial_y], outputs=[trajectory_slider, self.plot], ) learning_rate.submit( self.handle_learning_rate_change, inputs=[learning_rate], outputs=[trajectory_slider, self.plot], ) momentum.submit( self.handle_momentum_change, inputs=[momentum], outputs=[trajectory_slider, self.plot], ) rho_rmsprop.submit( self.handle_rho_rmsprop_change, inputs=[rho_rmsprop], outputs=[trajectory_slider, self.plot], ) rho_adadelta.submit( self.handle_rho_adadelta_change, inputs=[rho_adadelta], outputs=[trajectory_slider, self.plot], ) rho1_adam.submit( self.handle_rho1_adam_change, inputs=[rho1_adam], outputs=[trajectory_slider, self.plot], ) rho2_adam.submit( self.handle_rho2_adam_change, inputs=[rho2_adam], outputs=[trajectory_slider, self.plot], ) eps.submit( self.handle_eps_change, inputs=[eps], outputs=[trajectory_slider, self.plot], ) eps_adadelta.submit( self.handle_eps_adadelta_change, inputs=[eps_adadelta], outputs=[trajectory_slider, self.plot], ) trajectory_slider.change( self.handle_slider_change, inputs=[trajectory_slider], outputs=[self.plot], ) trajectory_button.click( self.handle_trajectory_button, outputs=[trajectory_slider], )