""" Main experiment runner. Orchestrates the complete ML pipeline: 1. Load and preprocess data 2. Feature engineering 3. Train models (XGBoost, NN, Ensemble) 4. Cross-validation with statistical testing 5. Ablation studies 6. Per-material evaluation 7. Generate publication figures 8. Save all results Usage: python scripts/run_experiment.py --config configs/experiment.yaml """ from __future__ import annotations import argparse import json import logging import sys import time from pathlib import Path import numpy as np import pandas as pd import yaml # Add project root to path PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from src.data.dataset import ( clean_dataset, get_feature_target_arrays, load_dataset, split_dataset, ) from src.features.engineering import compute_all_derived_features, get_feature_groups from src.models.models import ( NeuralNetworkRegressor, WeightedEnsemble, XGBoostMultiOutput, cross_validate_model, ) from src.evaluation.metrics import ( compare_models_statistical, compute_cv_summary, compute_metrics, per_material_evaluation, run_ablation_study, ) from src.visualization.plots import ( plot_feature_importance, plot_model_comparison, plot_per_material_performance, plot_predicted_vs_actual, plot_residual_analysis, plot_training_curves, ) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) logger = logging.getLogger(__name__) def load_config(config_path: str) -> dict: """Load YAML configuration file.""" with open(config_path) as f: config = yaml.safe_load(f) logger.info(f"Loaded config: {config_path}") return config def main(config_path: str): """Run complete experiment pipeline.""" start_time = time.time() config = load_config(config_path) # Setup paths output_dir = Path(config["paths"]["figures"]) output_dir.mkdir(parents=True, exist_ok=True) results_dir = Path(config["paths"]["results"]) results_dir.mkdir(parents=True, exist_ok=True) models_dir = Path(config["paths"]["models"]) models_dir.mkdir(parents=True, exist_ok=True) seed = config["experiment"]["random_seed"] np.random.seed(seed) # ========================================================================= # Step 1: Load and preprocess data # ========================================================================= logger.info("=" * 60) logger.info("STEP 1: DATA LOADING & PREPROCESSING") logger.info("=" * 60) df = load_dataset( source=config["data"]["source"], local_path=config["data"].get("local_path"), random_state=seed, ) df = clean_dataset(df) df = compute_all_derived_features(df) # Feature and target columns all_features = ( config["data"]["laser_features"] + config["data"]["material_features"] + config["data"]["derived_features"] ) target_cols = config["data"]["target_columns"] # Verify columns exist missing = [c for c in all_features + target_cols if c not in df.columns] if missing: logger.error(f"Missing columns: {missing}") raise ValueError(f"Missing columns in dataset: {missing}") # Split data train_df, val_df, test_df = split_dataset( df, test_size=config["experiment"]["test_size"], val_size=config["experiment"]["validation_size"], group_column=config["data"].get("group_column", "material_type"), random_state=seed, ) X_train, y_train = get_feature_target_arrays(train_df, all_features, target_cols) X_val, y_val = get_feature_target_arrays(val_df, all_features, target_cols) X_test, y_test = get_feature_target_arrays(test_df, all_features, target_cols) logger.info(f"Features: {len(all_features)}, Targets: {len(target_cols)}") logger.info(f"Shapes - Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}") # ========================================================================= # Step 2: Train Models # ========================================================================= logger.info("=" * 60) logger.info("STEP 2: MODEL TRAINING") logger.info("=" * 60) # XGBoost logger.info("Training XGBoost...") xgb_params = config["models"]["xgboost"].copy() xgb_params["random_state"] = seed xgb_model = XGBoostMultiOutput(xgb_params, target_cols) xgb_model.fit(X_train, y_train, X_val, y_val) # Neural Network logger.info("Training Neural Network...") nn_config = config["models"]["neural_network"] nn_model = NeuralNetworkRegressor( n_features=len(all_features), n_outputs=len(target_cols), hidden_layers=nn_config["hidden_layers"], dropout=nn_config["dropout"], learning_rate=nn_config["learning_rate"], weight_decay=nn_config["weight_decay"], batch_size=nn_config["batch_size"], max_epochs=nn_config["max_epochs"], patience=nn_config["patience"], ) nn_model.fit(X_train, y_train, X_val, y_val) # Ensemble ens_config = config["models"]["ensemble"] ensemble = WeightedEnsemble( xgb_model, nn_model, xgb_weight=ens_config["xgboost_weight"], nn_weight=ens_config["nn_weight"], ) # ========================================================================= # Step 3: Evaluate on Test Set # ========================================================================= logger.info("=" * 60) logger.info("STEP 3: TEST SET EVALUATION") logger.info("=" * 60) predictions = { "XGBoost": xgb_model.predict(X_test), "Neural Network": nn_model.predict(X_test), "Ensemble": ensemble.predict(X_test), } metrics_all = {} for model_name, y_pred in predictions.items(): metrics = compute_metrics(y_test, y_pred, target_cols) metrics_all[model_name] = metrics logger.info(f"\n{model_name} Test Metrics:\n{metrics.to_string()}") # Save metrics for model_name, metrics_df in metrics_all.items(): metrics_df.to_csv(results_dir / f"metrics_{model_name.lower().replace(' ', '_')}.csv") # ========================================================================= # Step 4: Cross-Validation # ========================================================================= logger.info("=" * 60) logger.info("STEP 4: CROSS-VALIDATION") logger.info("=" * 60) n_folds = config["experiment"]["n_cv_folds"] X_full = np.vstack([X_train, X_val]) y_full = np.vstack([y_train, y_val]) groups_full = pd.concat([train_df, val_df])["material_type"].values if "material_type" in train_df.columns else None # XGBoost CV logger.info("XGBoost cross-validation...") xgb_cv = cross_validate_model( model_factory=lambda: XGBoostMultiOutput(xgb_params, target_cols), X=X_full, y=y_full, n_folds=n_folds, groups=groups_full, random_state=seed, ) # NN CV logger.info("Neural Network cross-validation...") nn_cv = cross_validate_model( model_factory=lambda: NeuralNetworkRegressor( n_features=len(all_features), n_outputs=len(target_cols), hidden_layers=nn_config["hidden_layers"], dropout=nn_config["dropout"], learning_rate=nn_config["learning_rate"], max_epochs=nn_config["max_epochs"], patience=nn_config["patience"], ), X=X_full, y=y_full, n_folds=n_folds, groups=groups_full, random_state=seed, ) # CV summaries xgb_summary = compute_cv_summary(xgb_cv, target_cols) nn_summary = compute_cv_summary(nn_cv, target_cols) logger.info(f"\nXGBoost CV Summary:\n{xgb_summary.to_string()}") logger.info(f"\nNeural Network CV Summary:\n{nn_summary.to_string()}") xgb_summary.to_csv(results_dir / "cv_xgboost.csv") nn_summary.to_csv(results_dir / "cv_neural_network.csv") # ========================================================================= # Step 5: Statistical Comparison # ========================================================================= logger.info("=" * 60) logger.info("STEP 5: STATISTICAL SIGNIFICANCE TESTING") logger.info("=" * 60) stat_test = config["evaluation"]["statistical_tests"] comparison = compare_models_statistical( xgb_cv, nn_cv, model_name_a="XGBoost", model_name_b="Neural Network", target_names=target_cols, metric="r2", test=stat_test["method"], significance_level=stat_test["significance_level"], ) logger.info(f"\nStatistical Comparison (R²):\n{comparison.to_string()}") comparison.to_csv(results_dir / "statistical_comparison.csv") # ========================================================================= # Step 6: Per-Material Evaluation # ========================================================================= logger.info("=" * 60) logger.info("STEP 6: PER-MATERIAL EVALUATION") logger.info("=" * 60) if "material_type" in test_df.columns: mat_labels = test_df["material_type"].values mat_results = per_material_evaluation( y_test, predictions["Ensemble"], mat_labels, target_cols ) logger.info(f"\nPer-Material (Ensemble):\n{mat_results.to_string()}") mat_results.to_csv(results_dir / "per_material_evaluation.csv") # ========================================================================= # Step 7: Generate Figures # ========================================================================= logger.info("=" * 60) logger.info("STEP 7: GENERATING PUBLICATION FIGURES") logger.info("=" * 60) fig_dir = Path(config["paths"]["figures"]) fig_format = config["visualization"].get("figure_format", "png") # Predicted vs Actual plot_predicted_vs_actual( y_test, predictions["Ensemble"], target_cols, model_name="Ensemble (XGBoost 60% + NN 40%)", save_path=fig_dir / f"predicted_vs_actual.{fig_format}", ) # Residual analysis plot_residual_analysis( y_test, predictions["Ensemble"], target_cols, save_path=fig_dir / f"residual_analysis.{fig_format}", ) # Feature importance importances = xgb_model.get_feature_importance(all_features) plot_feature_importance( importances, top_n=12, save_path=fig_dir / f"feature_importance.{fig_format}", ) # Model comparison plot_model_comparison( metrics_all, metric="R²", target_names=target_cols, save_path=fig_dir / f"model_comparison_r2.{fig_format}", ) # Training curves plot_training_curves( nn_model.train_losses, nn_model.val_losses, save_path=fig_dir / f"training_curves.{fig_format}", ) # Per-material if "material_type" in test_df.columns: plot_per_material_performance( mat_results, target_names=target_cols, save_path=fig_dir / f"per_material_performance.{fig_format}", ) # ========================================================================= # Step 8: Save Final Summary # ========================================================================= elapsed = time.time() - start_time summary = { "experiment_name": config["experiment"]["name"], "dataset_size": len(df), "n_features": len(all_features), "n_targets": len(target_cols), "n_cv_folds": n_folds, "random_seed": seed, "best_model": "Ensemble", "test_metrics": { model: metrics_all[model].to_dict() for model in metrics_all }, "elapsed_seconds": elapsed, } with open(results_dir / "experiment_summary.json", "w") as f: json.dump(summary, f, indent=2, default=str) logger.info("=" * 60) logger.info(f"EXPERIMENT COMPLETE ({elapsed:.1f}s)") logger.info(f"Results saved to: {results_dir}") logger.info(f"Figures saved to: {fig_dir}") logger.info("=" * 60) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run fs-laser hydrogel etching ML experiment") parser.add_argument("--config", type=str, default="configs/experiment.yaml", help="Path to experiment configuration YAML") args = parser.parse_args() main(args.config)