| |
| """Generate test data for self-healing ML system.""" |
| import numpy as np |
| import pandas as pd |
| from pathlib import Path |
|
|
| def generate_test_data(n_samples: int = 1000, n_features: int = 10): |
| """Generate synthetic test data.""" |
| |
| X = np.random.randn(n_samples, n_features) |
| |
| |
| coef = np.random.randn(n_features) |
| logits = X @ coef + np.random.randn(n_samples) * 0.5 |
| y = (logits > 0).astype(int) |
| |
| |
| feature_names = [f"feature_{i}" for i in range(n_features)] |
| df = pd.DataFrame(X, columns=feature_names) |
| df['target'] = y |
| |
| return df |
|
|
| if __name__ == "__main__": |
| |
| data_dir = Path("data/raw") |
| data_dir.mkdir(parents=True, exist_ok=True) |
| |
| train_data = generate_test_data(1000, 10) |
| test_data = generate_test_data(200, 10) |
| |
| train_data.to_csv(data_dir / "train_data.csv", index=False) |
| test_data.to_csv(data_dir / "test_data.csv", index=False) |
| |
| print(f"Generated test data: {len(train_data)} training, {len(test_data)} test samples")
|
|
|