File size: 1,151 Bytes
11898c7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #!/usr/bin/env python3
"""Generate test data for self-healing ML system."""
import numpy as np
import pandas as pd
from pathlib import Path
def generate_test_data(n_samples: int = 1000, n_features: int = 10):
"""Generate synthetic test data."""
# Generate features
X = np.random.randn(n_samples, n_features)
# Generate labels (binary classification)
coef = np.random.randn(n_features)
logits = X @ coef + np.random.randn(n_samples) * 0.5
y = (logits > 0).astype(int)
# Create DataFrame
feature_names = [f"feature_{i}" for i in range(n_features)]
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y
return df
if __name__ == "__main__":
# Generate and save test data
data_dir = Path("data/raw")
data_dir.mkdir(parents=True, exist_ok=True)
train_data = generate_test_data(1000, 10)
test_data = generate_test_data(200, 10)
train_data.to_csv(data_dir / "train_data.csv", index=False)
test_data.to_csv(data_dir / "test_data.csv", index=False)
print(f"Generated test data: {len(train_data)} training, {len(test_data)} test samples")
|