Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Run the full mutagenicity classification pipeline.""" | |
| from pathlib import Path | |
| from preprocess_data import save_preprocessed | |
| from train_models import run_and_save | |
| from utils import WORKSPACE | |
| DATA_DIR = WORKSPACE / "data" | |
| RESULTS_DIR = WORKSPACE / "results" | |
| def main(): | |
| data_path = WORKSPACE / "Mutagenicity_N6512 2.csv" | |
| if not data_path.exists(): | |
| data_path = WORKSPACE / "Mutagenicity_N6512.csv" | |
| if not data_path.exists(): | |
| raise FileNotFoundError( | |
| f"Dataset not found. Expected Mutagenicity_N6512.csv in {WORKSPACE}" | |
| ) | |
| print("Step 1: Preprocessing...") | |
| X, y, names = save_preprocessed(DATA_DIR) | |
| print(f" Loaded {len(y)} molecules, {X.shape[1]} features") | |
| print("Step 2: Training models...") | |
| metrics = run_and_save(RESULTS_DIR) | |
| print("\nMetrics:") | |
| for name, m in metrics.items(): | |
| print(f" {name}: F1={m['f1']:.4f} AUC={m['roc_auc']:.4f}") | |
| print(f"\nResults saved to {RESULTS_DIR}/") | |
| if __name__ == "__main__": | |
| main() | |