| |
| """ |
| Create simple test datasets for PPO symbolic regression experiments. |
| No constants (C) - just simple expressions to verify PPO works. |
| """ |
|
|
| import numpy as np |
| import pandas as pd |
| from pathlib import Path |
|
|
| def create_dataset(formula_func, formula_name, n_vars, n_samples=500, |
| x_range=(-2, 2), output_dir="./data/ppo_test"): |
| """ |
| Create a synthetic regression dataset. |
| |
| Args: |
| formula_func: Function that takes X array and returns y |
| formula_name: Name for the dataset (used as filename) |
| n_vars: Number of input variables |
| n_samples: Number of data points |
| x_range: Range for random X values |
| output_dir: Directory to save CSV files |
| """ |
| output_dir = Path(output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| np.random.seed(42) |
| X = np.random.uniform(x_range[0], x_range[1], (n_samples, n_vars)) |
|
|
| |
| y = formula_func(X) |
|
|
| |
| columns = [f"x_{i+1}" for i in range(n_vars)] + ["y"] |
| data = np.column_stack([X, y]) |
| df = pd.DataFrame(data, columns=columns) |
|
|
| |
| output_file = output_dir / f"{formula_name}.csv" |
| df.to_csv(output_file, index=False) |
| print(f"Created: {output_file} ({n_samples} samples, {n_vars} vars)") |
| print(f" Formula: {formula_name}") |
| print(f" y range: [{y.min():.3f}, {y.max():.3f}]") |
|
|
| return output_file |
|
|
|
|
| def main(): |
| print("=" * 60) |
| print("Creating PPO Test Datasets (No Constants)") |
| print("=" * 60) |
|
|
| |
| |
| |
| print("\n--- EASY DATASETS (2 variables) ---") |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] + X[:, 1], |
| formula_name="add_x1_x2", |
| n_vars=2 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] * X[:, 1], |
| formula_name="mul_x1_x2", |
| n_vars=2 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] - X[:, 1], |
| formula_name="sub_x1_x2", |
| n_vars=2 |
| ) |
|
|
| |
| |
| |
| print("\n--- MEDIUM DATASETS (unary functions) ---") |
|
|
| |
| create_dataset( |
| formula_func=lambda X: np.sin(X[:, 0]), |
| formula_name="sin_x1", |
| n_vars=1 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: np.cos(X[:, 0]), |
| formula_name="cos_x1", |
| n_vars=1 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] * X[:, 0], |
| formula_name="square_x1", |
| n_vars=1 |
| ) |
|
|
| |
| |
| |
| print("\n--- HARD DATASETS (composed expressions) ---") |
|
|
| |
| create_dataset( |
| formula_func=lambda X: np.sin(X[:, 0]) + X[:, 1], |
| formula_name="sin_x1_plus_x2", |
| n_vars=2 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] * np.sin(X[:, 1]), |
| formula_name="x1_mul_sin_x2", |
| n_vars=2 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: np.sin(X[:, 0] + X[:, 1]), |
| formula_name="sin_x1_plus_x2_composed", |
| n_vars=2 |
| ) |
|
|
| |
| create_dataset( |
| formula_func=lambda X: X[:, 0] * X[:, 1] + X[:, 0], |
| formula_name="x1_mul_x2_plus_x1", |
| n_vars=2 |
| ) |
|
|
| print("\n" + "=" * 60) |
| print("Done! Created 10 test datasets in ./data/ppo_test/") |
| print("=" * 60) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|