Spaces:

elvis-hf
/

gp_visualizer

Sleeping

App Files Files Community

gp_visualizer / old_code /gp_visualizer.py

joel-woodfield

Refactor file structure

03e72c7 3 months ago

raw

history blame contribute delete

15.4 kB

	from dataclasses import dataclass
	import time

	import ast
	import gradio as gr
	import io
	import matplotlib.pyplot as plt
	import numpy as np
	from PIL import Image
	from sklearn.gaussian_process import GaussianProcessRegressor
	from sklearn.gaussian_process.kernels import (
	DotProduct,
	WhiteKernel,
	ConstantKernel,
	RBF,
	Matern,
	RationalQuadratic,
	ExpSineSquared,
	Kernel,
	)

	import logging
	logging.basicConfig(
	level=logging.INFO, # set minimum level to capture (DEBUG, INFO, WARNING, ERROR, CRITICAL)
	format="%(asctime)s [%(levelname)s] %(message)s", # log format
	)
	logger = logging.getLogger("ELVIS")

	from dataset import Dataset, DatasetView, get_function


	@dataclass(frozen=True)
	class PlotOptions:
	show_training_data: bool = True
	show_true_function: bool = True
	show_mean_prediction: bool = True
	show_prediction_interval: bool = True

	def update(self, **kwargs):
	return PlotOptions(
	show_training_data=kwargs.get("show_training_data", self.show_training_data),
	show_true_function=kwargs.get("show_true_function", self.show_true_function),
	show_mean_prediction=kwargs.get("show_mean_prediction", self.show_mean_prediction),
	show_prediction_interval=kwargs.get("show_prediction_interval", self.show_prediction_interval),
	)

	def __hash__(self):
	return hash(
	(
	self.show_training_data,
	self.show_true_function,
	self.show_mean_prediction,
	self.show_prediction_interval,
	)
	)


	def eval_kernel(kernel_str) -> Kernel:
	# List of allowed kernel constructors
	allowed_names = {
	'RBF': RBF,
	'Matern': Matern,
	'RationalQuadratic': RationalQuadratic,
	'ExpSineSquared': ExpSineSquared,
	'DotProduct': DotProduct,
	'WhiteKernel': WhiteKernel,
	'ConstantKernel': ConstantKernel,
	}

	# Parse and check the syntax safely
	try:
	tree = ast.parse(kernel_str, mode='eval')
	except SyntaxError as e:
	raise ValueError(f"Invalid syntax: {e}")

	# Evaluate in restricted namespace
	try:
	result = eval(
	compile(tree, '<string>', 'eval'),
	{"__builtins__": None}, # disable access to Python builtins like open
	allowed_names # only allow things in this list
	)
	except Exception as e:
	raise ValueError(f"Error evaluating kernel: {e}")

	return result


	@dataclass
	class ModelState:
	model: GaussianProcessRegressor
	kernel: str
	distribution: str

	def __hash__(self):
	return hash(
	(
	self.kernel,
	self.distribution,
	)
	)


	class GpVisualizer:
	def __init__(self, width, height):
	self.canvas_width = width
	self.canvas_height = height

	self.plot_cmap = plt.get_cmap("tab20")

	self.css = """
	.hidden-button {
	display: none;
	}"""

	def plot(
	self,
	dataset: Dataset,
	model_state: ModelState,
	plot_options: PlotOptions,
	sample_y: bool = False,
	sample_y_seed: int = 0,
	) -> Image.Image:
	print("Plotting")
	t1 = time.time()
	fig = plt.figure(figsize=(self.canvas_width / 100., self.canvas_height / 100.0), dpi=100)
	# set entire figure to be the canvas to allow simple conversion of mouse
	# position to coordinates in the figure
	ax = fig.add_axes([0., 0., 1., 1.]) #
	ax.margins(x=0, y=0) # no padding in both directions

	x_train = dataset.x
	y_train = dataset.y

	if dataset.mode == "generate":
	x_test, y_test = get_function(dataset.function, xlim=(-2, 2), nsample=100)
	y_pred, y_std = model_state.model.predict(x_test, return_std=True)
	elif x_train.shape[0] > 0:
	x_test = np.linspace(x_train.min() - 1, x_train.max() + 1, 100).reshape(-1, 1)
	y_test = None
	y_pred, y_std = model_state.model.predict(x_test, return_std=True)
	else:
	x_test = None
	y_test = None
	y_pred = None
	y_std = None

	# plot
	fig, ax = plt.subplots(figsize=(8, 8))
	ax.set_title("")
	ax.set_xlabel("x")
	ax.set_ylabel("y")

	if y_test is not None:
	min_y = min(y_test.min(), (y_pred - 1.96 * y_std).min())
	max_y = max(y_test.max(), (y_pred + 1.96 * y_std).max())
	ax.set_ylim(min_y - 1, max_y + 1)
	elif y_train.shape[0] > 0:
	min_y = min(y_train.min(), (y_pred - 1.96 * y_std).min())
	max_y = max(y_train.max(), (y_pred + 1.96 * y_std).max())
	ax.set_ylim(min_y - 1, max_y + 1)

	if plot_options.show_training_data:
	plt.scatter(
	x_train.flatten(),
	y_train,
	label='training data',
	color=self.plot_cmap(0),
	)

	if plot_options.show_true_function and x_test is not None and y_test is not None:
	plt.plot(
	x_test.flatten(),
	y_test,
	label='true function',
	color=self.plot_cmap(1),
	)

	if plot_options.show_mean_prediction and x_test is not None and y_pred is not None:
	plt.plot(
	x_test.flatten(),
	y_pred,
	linestyle="--",
	label='mean prediction',
	color=self.plot_cmap(2),
	)
	if plot_options.show_prediction_interval and x_test is not None and y_std is not None:
	plt.fill_between(
	x_test.flatten(),
	y_pred - 1.96 * y_std,
	y_pred + 1.96 * y_std,
	color=self.plot_cmap(3),
	alpha=0.2,
	label='95% prediction interval',
	)

	if x_test is not None and sample_y:
	y_sample = model_state.model.sample_y(
	x_test, random_state=sample_y_seed
	).flatten()

	plt.plot(
	x_test.flatten(),
	y_sample,
	linestyle=":",
	label="model sample",
	color=self.plot_cmap(4),
	)

	plt.legend()

	buf = io.BytesIO()
	fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0)
	plt.close(fig)
	buf.seek(0)
	img = Image.open(buf)
	plt.close(fig)

	t2 = time.time()
	logger.info(f"Plotting took {t2 - t1:.4f} seconds")

	return img

	def init_model(
	self,
	kernel: str,
	dataset: Dataset,
	distribution: str,
	) -> GaussianProcessRegressor:
	model = GaussianProcessRegressor(kernel=eval_kernel(kernel))
	if distribution == "posterior":
	if dataset.x.shape[0] > 0:
	model.fit(dataset.x, dataset.y)
	elif distribution != "prior":
	raise ValueError(f"Unknown distribution: {distribution}")

	return model

	def update_dataset(
	self,
	dataset: Dataset,
	model_state: ModelState,
	plot_options: PlotOptions,
	) -> tuple[ModelState, Image.Image]:
	print("updating dataset")
	model = self.init_model(
	model_state.kernel,
	dataset,
	model_state.distribution,
	)
	model_state = ModelState(
	model=model, kernel=model_state.kernel, distribution=model_state.distribution
	)

	new_canvas = self.plot(dataset, model_state, plot_options)

	return model_state, new_canvas

	def update_model(
	self,
	kernel_str: str,
	distribution: str,
	model_state: ModelState,
	dataset: Dataset,
	plot_options: PlotOptions,
	) -> tuple[ModelState, Image.Image]:
	print("updating kernel")
	try:
	model = self.init_model(
	kernel_str,
	dataset,
	distribution.lower(),
	)
	model_state = ModelState(
	model=model, kernel=kernel_str, distribution=distribution.lower()
	)
	except Exception as e:
	logger.error(f"Error updating kernel: {e}")
	gr.Info(f" ⚠️ Error updating kerne: {e}")

	new_canvas = self.plot(dataset, model_state, plot_options)

	return model_state, new_canvas

	def sample(
	self,
	model_state: ModelState,
	dataset: Dataset,
	plot_options: PlotOptions,
	) -> Image.Image:
	print("sampling from model")
	seed = int(time.time() * 100) % 10000

	new_canvas = self.plot(
	dataset,
	model_state,
	plot_options,
	sample_y=True,
	sample_y_seed=seed,
	)

	return new_canvas

	def clear_sample(
	self,
	model_state: ModelState,
	dataset: Dataset,
	plot_options: PlotOptions,
	) -> Image.Image:
	print("clearing sample from model")

	new_canvas = self.plot(
	dataset,
	model_state,
	plot_options,
	sample_y=False,
	)

	return new_canvas

	def launch(self):
	# build the Gradio interface
	with gr.Blocks(css=self.css) as demo:
	# app title
	gr.HTML("<div style='text-align:left; font-size:40px; font-weight: bold;'>Gaussian Process Visualizer</div>")

	# states
	dataset = gr.State(Dataset())
	plot_options = gr.State(PlotOptions())

	kernel = "RBF() + WhiteKernel()"
	model = self.init_model(kernel, dataset.value, "posterior")
	model_state = gr.State(
	ModelState(model=model, kernel=kernel, distribution="posterior")
	)

	# GUI elements and layout
	with gr.Row():
	with gr.Column(scale=2):
	canvas = gr.Image(
	value=self.plot(
	dataset.value,
	model_state.value,
	plot_options.value,
	),
	# show_download_button=False,
	container=True,
	)

	with gr.Column(scale=1):
	with gr.Tab("Dataset"):
	dataset_view = DatasetView()
	dataset_view.build(state=dataset)
	dataset.change(
	fn=self.update_dataset,
	inputs=[dataset, model_state, plot_options],
	outputs=[model_state, canvas],
	)

	with gr.Tab("Model"):
	kernel_box = gr.Textbox(
	label="Kernel",
	value=model_state.value.kernel,
	interactive=True,
	)
	kernel_submit = gr.Button("Update Kernel")
	distribution = gr.Radio(
	label="Distribution",
	choices=["Prior", "Posterior"],
	value="Posterior",
	)
	kernel_box.submit(
	fn=self.update_model,
	inputs=[kernel_box, distribution, model_state, dataset, plot_options],
	outputs=[model_state, canvas],
	)
	kernel_submit.click(
	fn=self.update_model,
	inputs=[kernel_box, distribution, model_state, dataset, plot_options],
	outputs=[model_state, canvas],
	)
	distribution.change(
	fn=self.update_model,
	inputs=[kernel_box, distribution, model_state, dataset, plot_options],
	outputs=[model_state, canvas],
	)

	sample_button = gr.Button("Sample")
	clear_sample_button = gr.Button("Clear Sample")
	sample_button.click(
	fn=self.sample,
	inputs=[model_state, dataset, plot_options],
	outputs=[canvas],
	)
	clear_sample_button.click(
	fn=self.clear_sample,
	inputs=[model_state, dataset, plot_options],
	outputs=[canvas],
	)

	with gr.Tab("Plot Options"):
	show_training_data = gr.Checkbox(
	label="Show Training Data",
	value=True,
	)
	show_true_function = gr.Checkbox(
	label="Show True Function",
	value=True,
	)
	show_mean_prediction = gr.Checkbox(
	label="Show Mean Prediction",
	value=True,
	)
	show_prediction_interval = gr.Checkbox(
	label="Show Prediction Interval",
	value=True,
	)
	show_training_data.change(
	fn=lambda val, options: options.update(show_training_data=val),
	inputs=[show_training_data, plot_options],
	outputs=[plot_options],
	)
	show_true_function.change(
	fn=lambda val, options: options.update(show_true_function=val),
	inputs=[show_true_function, plot_options],
	outputs=[plot_options],
	)
	show_mean_prediction.change(
	fn=lambda val, options: options.update(show_mean_prediction=val),
	inputs=[show_mean_prediction, plot_options],
	outputs=[plot_options],
	)
	show_prediction_interval.change(
	fn=lambda val, options: options.update(show_prediction_interval=val),
	inputs=[show_prediction_interval, plot_options],
	outputs=[plot_options],
	)
	plot_options.change(
	fn=self.plot,
	inputs=[dataset, model_state, plot_options],
	outputs=[canvas],
	)

	with gr.Tab("Usage"):
	with open("usage.md", "r") as f:
	usage_md = f.read()
	gr.Markdown(usage_md)


	demo.launch()

	visualizer = GpVisualizer(width=1200, height=900)
	visualizer.launch()