Spaces:
Sleeping
Sleeping
| import ast | |
| from dataclasses import dataclass | |
| from typing import Literal | |
| import numpy as np | |
| from sklearn.gaussian_process import GaussianProcessRegressor | |
| from sklearn.gaussian_process.kernels import ( | |
| Kernel, | |
| RBF, | |
| Matern, | |
| RationalQuadratic, | |
| ExpSineSquared, | |
| DotProduct, | |
| WhiteKernel, | |
| ConstantKernel, | |
| ) | |
| from sympy import Expr, lambdify | |
| class DataGenerationOptions: | |
| method: Literal["grid", "random"] | |
| num_samples: int | |
| noise: float = 0. | |
| class Dataset: | |
| x: list[float] | |
| y: list[float] | |
| class PlotData: | |
| x: np.ndarray | |
| pred_mean: np.ndarray | |
| pred_std: np.ndarray | |
| y: np.ndarray | None = None | |
| def generate_dataset( | |
| function: Expr, | |
| xlim: tuple[float, float], | |
| generation_options: DataGenerationOptions, | |
| ) -> Dataset: | |
| f = lambdify("x", function, modules='numpy') | |
| if generation_options.method == 'grid': | |
| x = np.linspace(xlim[0], xlim[1], generation_options.num_samples) | |
| elif generation_options.method == 'random': | |
| x = np.random.uniform(xlim[0], xlim[1], generation_options.num_samples) | |
| else: | |
| raise ValueError(f"Unknown generation method: {generation_options.method}") | |
| y = f(x) | |
| if generation_options.noise > 0: | |
| y += np.random.normal(0, generation_options.noise, size=y.shape) | |
| return Dataset(x=x.tolist(), y=y.tolist()) | |
| def load_dataset_from_csv( | |
| file_path: str, header: bool, x_col: int, y_col: int | |
| ) -> Dataset: | |
| data = np.genfromtxt(file_path, delimiter=',', skip_header=1 if header else 0) | |
| data = data[~np.isnan(data).any(axis=1)] # remove rows with NaN values | |
| x = data[:, x_col].tolist() | |
| y = data[:, y_col].tolist() | |
| return Dataset(x=x, y=y) | |
| def generate_true_curve( | |
| function: Expr, | |
| xlim: tuple[int, int], | |
| num_points: int = 1000, | |
| ) -> Dataset: | |
| f = lambdify("x", function, modules='numpy') | |
| x = np.linspace(xlim[0], xlim[1], num_points) | |
| y = f(x) | |
| return Dataset(x=x.tolist(), y=y.tolist()) | |
| def train_model( | |
| dataset: Dataset, | |
| kernel: Kernel, | |
| distribution: Literal["Prior", "Posterior"], | |
| ) -> GaussianProcessRegressor: | |
| gp = GaussianProcessRegressor(kernel=kernel) | |
| if distribution == "Posterior": | |
| x = np.array(dataset.x).reshape(-1, 1) | |
| y = np.array(dataset.y) | |
| gp.fit(x, y) | |
| elif distribution != "Prior": | |
| raise ValueError(f"Unknown distribution type: {distribution}") | |
| return gp | |
| def predict( | |
| model: GaussianProcessRegressor, | |
| x: np.ndarray, | |
| ) -> tuple[np.ndarray, np.ndarray]: | |
| y_mean, y_std = model.predict(x, return_std=True) | |
| return y_mean, y_std | |
| def sample( | |
| model: GaussianProcessRegressor, | |
| x: np.ndarray, | |
| ) -> np.ndarray: | |
| y_samples = model.sample_y(x, n_samples=1).flatten() | |
| return y_samples | |
| def eval_kernel(kernel: str) -> Kernel: | |
| # List of allowed kernel constructors | |
| allowed_names = { | |
| 'RBF': RBF, | |
| 'Matern': Matern, | |
| 'RationalQuadratic': RationalQuadratic, | |
| 'ExpSineSquared': ExpSineSquared, | |
| 'DotProduct': DotProduct, | |
| 'WhiteKernel': WhiteKernel, | |
| 'ConstantKernel': ConstantKernel, | |
| } | |
| # Parse and check the syntax safely | |
| try: | |
| tree = ast.parse(kernel, mode='eval') | |
| except SyntaxError as e: | |
| raise ValueError(f"Invalid syntax: {e}") | |
| # Evaluate in restricted namespace | |
| try: | |
| result = eval( | |
| compile(tree, '<string>', 'eval'), | |
| {"__builtins__": None}, # disable access to Python builtins like open | |
| allowed_names # only allow things in this list | |
| ) | |
| except Exception as e: | |
| raise ValueError(f"Error evaluating kernel: {e}") | |
| return result | |
| def compute_plot_values( | |
| dataset: Dataset, | |
| kernel_input: str, | |
| distribution: Literal["Prior", "Posterior"], | |
| xmin: float, | |
| xmax: float, | |
| ) -> PlotData: | |
| kernel = eval_kernel(kernel_input) | |
| model = train_model(dataset, kernel, distribution) | |
| x_plot = np.linspace(xmin, xmax, 1000).reshape(-1, 1) | |
| y_mean, y_std = predict(model, x_plot) | |
| return PlotData(x=x_plot.flatten(), pred_mean=y_mean, pred_std=y_std) | |