gp_visualizer / backend /src /logic.py
Joel Woodfield
Refactor to use manager in backend
de9ce02
import ast
from dataclasses import dataclass
from typing import Literal
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
Kernel,
RBF,
Matern,
RationalQuadratic,
ExpSineSquared,
DotProduct,
WhiteKernel,
ConstantKernel,
)
from sympy import Expr, lambdify
@dataclass
class DataGenerationOptions:
method: Literal["grid", "random"]
num_samples: int
noise: float = 0.
@dataclass
class Dataset:
x: list[float]
y: list[float]
@dataclass
class PlotData:
x: np.ndarray
pred_mean: np.ndarray
pred_std: np.ndarray
y: np.ndarray | None = None
def generate_dataset(
function: Expr,
xlim: tuple[float, float],
generation_options: DataGenerationOptions,
) -> Dataset:
f = lambdify("x", function, modules='numpy')
if generation_options.method == 'grid':
x = np.linspace(xlim[0], xlim[1], generation_options.num_samples)
elif generation_options.method == 'random':
x = np.random.uniform(xlim[0], xlim[1], generation_options.num_samples)
else:
raise ValueError(f"Unknown generation method: {generation_options.method}")
y = f(x)
if generation_options.noise > 0:
y += np.random.normal(0, generation_options.noise, size=y.shape)
return Dataset(x=x.tolist(), y=y.tolist())
def load_dataset_from_csv(
file_path: str, header: bool, x_col: int, y_col: int
) -> Dataset:
data = np.genfromtxt(file_path, delimiter=',', skip_header=1 if header else 0)
data = data[~np.isnan(data).any(axis=1)] # remove rows with NaN values
x = data[:, x_col].tolist()
y = data[:, y_col].tolist()
return Dataset(x=x, y=y)
def generate_true_curve(
function: Expr,
xlim: tuple[int, int],
num_points: int = 1000,
) -> Dataset:
f = lambdify("x", function, modules='numpy')
x = np.linspace(xlim[0], xlim[1], num_points)
y = f(x)
return Dataset(x=x.tolist(), y=y.tolist())
def train_model(
dataset: Dataset,
kernel: Kernel,
distribution: Literal["Prior", "Posterior"],
) -> GaussianProcessRegressor:
gp = GaussianProcessRegressor(kernel=kernel)
if distribution == "Posterior":
x = np.array(dataset.x).reshape(-1, 1)
y = np.array(dataset.y)
gp.fit(x, y)
elif distribution != "Prior":
raise ValueError(f"Unknown distribution type: {distribution}")
return gp
def predict(
model: GaussianProcessRegressor,
x: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
y_mean, y_std = model.predict(x, return_std=True)
return y_mean, y_std
def sample(
model: GaussianProcessRegressor,
x: np.ndarray,
) -> np.ndarray:
y_samples = model.sample_y(x, n_samples=1).flatten()
return y_samples
def eval_kernel(kernel: str) -> Kernel:
# List of allowed kernel constructors
allowed_names = {
'RBF': RBF,
'Matern': Matern,
'RationalQuadratic': RationalQuadratic,
'ExpSineSquared': ExpSineSquared,
'DotProduct': DotProduct,
'WhiteKernel': WhiteKernel,
'ConstantKernel': ConstantKernel,
}
# Parse and check the syntax safely
try:
tree = ast.parse(kernel, mode='eval')
except SyntaxError as e:
raise ValueError(f"Invalid syntax: {e}")
# Evaluate in restricted namespace
try:
result = eval(
compile(tree, '<string>', 'eval'),
{"__builtins__": None}, # disable access to Python builtins like open
allowed_names # only allow things in this list
)
except Exception as e:
raise ValueError(f"Error evaluating kernel: {e}")
return result
def compute_plot_values(
dataset: Dataset,
kernel_input: str,
distribution: Literal["Prior", "Posterior"],
xmin: float,
xmax: float,
) -> PlotData:
kernel = eval_kernel(kernel_input)
model = train_model(dataset, kernel, distribution)
x_plot = np.linspace(xmin, xmax, 1000).reshape(-1, 1)
y_mean, y_std = predict(model, x_plot)
return PlotData(x=x_plot.flatten(), pred_mean=y_mean, pred_std=y_std)