Reinforcement Learning
stable-baselines3
Joblib
PyTorch
tabular-regression
xgboost
femtosecond-laser
hydrogel
GelMA
HAMA
laser-machining
SAC
materials-science
manufacturing
ml-intern
Instructions to use TWLab/femtosecond-laser-hydrogel-etching-model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use TWLab/femtosecond-laser-hydrogel-etching-model with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="TWLab/femtosecond-laser-hydrogel-etching-model", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
| """ | |
| Main experiment runner. | |
| Orchestrates the complete ML pipeline: | |
| 1. Load and preprocess data | |
| 2. Feature engineering | |
| 3. Train models (XGBoost, NN, Ensemble) | |
| 4. Cross-validation with statistical testing | |
| 5. Ablation studies | |
| 6. Per-material evaluation | |
| 7. Generate publication figures | |
| 8. Save all results | |
| Usage: | |
| python scripts/run_experiment.py --config configs/experiment.yaml | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import logging | |
| import sys | |
| import time | |
| from pathlib import Path | |
| import numpy as np | |
| import pandas as pd | |
| import yaml | |
| # Add project root to path | |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(PROJECT_ROOT)) | |
| from src.data.dataset import ( | |
| clean_dataset, | |
| get_feature_target_arrays, | |
| load_dataset, | |
| split_dataset, | |
| ) | |
| from src.features.engineering import compute_all_derived_features, get_feature_groups | |
| from src.models.models import ( | |
| NeuralNetworkRegressor, | |
| WeightedEnsemble, | |
| XGBoostMultiOutput, | |
| cross_validate_model, | |
| ) | |
| from src.evaluation.metrics import ( | |
| compare_models_statistical, | |
| compute_cv_summary, | |
| compute_metrics, | |
| per_material_evaluation, | |
| run_ablation_study, | |
| ) | |
| from src.visualization.plots import ( | |
| plot_feature_importance, | |
| plot_model_comparison, | |
| plot_per_material_performance, | |
| plot_predicted_vs_actual, | |
| plot_residual_analysis, | |
| plot_training_curves, | |
| ) | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def load_config(config_path: str) -> dict: | |
| """Load YAML configuration file.""" | |
| with open(config_path) as f: | |
| config = yaml.safe_load(f) | |
| logger.info(f"Loaded config: {config_path}") | |
| return config | |
| def main(config_path: str): | |
| """Run complete experiment pipeline.""" | |
| start_time = time.time() | |
| config = load_config(config_path) | |
| # Setup paths | |
| output_dir = Path(config["paths"]["figures"]) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| results_dir = Path(config["paths"]["results"]) | |
| results_dir.mkdir(parents=True, exist_ok=True) | |
| models_dir = Path(config["paths"]["models"]) | |
| models_dir.mkdir(parents=True, exist_ok=True) | |
| seed = config["experiment"]["random_seed"] | |
| np.random.seed(seed) | |
| # ========================================================================= | |
| # Step 1: Load and preprocess data | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 1: DATA LOADING & PREPROCESSING") | |
| logger.info("=" * 60) | |
| df = load_dataset( | |
| source=config["data"]["source"], | |
| local_path=config["data"].get("local_path"), | |
| random_state=seed, | |
| ) | |
| df = clean_dataset(df) | |
| df = compute_all_derived_features(df) | |
| # Feature and target columns | |
| all_features = ( | |
| config["data"]["laser_features"] | |
| + config["data"]["material_features"] | |
| + config["data"]["derived_features"] | |
| ) | |
| target_cols = config["data"]["target_columns"] | |
| # Verify columns exist | |
| missing = [c for c in all_features + target_cols if c not in df.columns] | |
| if missing: | |
| logger.error(f"Missing columns: {missing}") | |
| raise ValueError(f"Missing columns in dataset: {missing}") | |
| # Split data | |
| train_df, val_df, test_df = split_dataset( | |
| df, | |
| test_size=config["experiment"]["test_size"], | |
| val_size=config["experiment"]["validation_size"], | |
| group_column=config["data"].get("group_column", "material_type"), | |
| random_state=seed, | |
| ) | |
| X_train, y_train = get_feature_target_arrays(train_df, all_features, target_cols) | |
| X_val, y_val = get_feature_target_arrays(val_df, all_features, target_cols) | |
| X_test, y_test = get_feature_target_arrays(test_df, all_features, target_cols) | |
| logger.info(f"Features: {len(all_features)}, Targets: {len(target_cols)}") | |
| logger.info(f"Shapes - Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}") | |
| # ========================================================================= | |
| # Step 2: Train Models | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 2: MODEL TRAINING") | |
| logger.info("=" * 60) | |
| # XGBoost | |
| logger.info("Training XGBoost...") | |
| xgb_params = config["models"]["xgboost"].copy() | |
| xgb_params["random_state"] = seed | |
| xgb_model = XGBoostMultiOutput(xgb_params, target_cols) | |
| xgb_model.fit(X_train, y_train, X_val, y_val) | |
| # Neural Network | |
| logger.info("Training Neural Network...") | |
| nn_config = config["models"]["neural_network"] | |
| nn_model = NeuralNetworkRegressor( | |
| n_features=len(all_features), | |
| n_outputs=len(target_cols), | |
| hidden_layers=nn_config["hidden_layers"], | |
| dropout=nn_config["dropout"], | |
| learning_rate=nn_config["learning_rate"], | |
| weight_decay=nn_config["weight_decay"], | |
| batch_size=nn_config["batch_size"], | |
| max_epochs=nn_config["max_epochs"], | |
| patience=nn_config["patience"], | |
| ) | |
| nn_model.fit(X_train, y_train, X_val, y_val) | |
| # Ensemble | |
| ens_config = config["models"]["ensemble"] | |
| ensemble = WeightedEnsemble( | |
| xgb_model, nn_model, | |
| xgb_weight=ens_config["xgboost_weight"], | |
| nn_weight=ens_config["nn_weight"], | |
| ) | |
| # ========================================================================= | |
| # Step 3: Evaluate on Test Set | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 3: TEST SET EVALUATION") | |
| logger.info("=" * 60) | |
| predictions = { | |
| "XGBoost": xgb_model.predict(X_test), | |
| "Neural Network": nn_model.predict(X_test), | |
| "Ensemble": ensemble.predict(X_test), | |
| } | |
| metrics_all = {} | |
| for model_name, y_pred in predictions.items(): | |
| metrics = compute_metrics(y_test, y_pred, target_cols) | |
| metrics_all[model_name] = metrics | |
| logger.info(f"\n{model_name} Test Metrics:\n{metrics.to_string()}") | |
| # Save metrics | |
| for model_name, metrics_df in metrics_all.items(): | |
| metrics_df.to_csv(results_dir / f"metrics_{model_name.lower().replace(' ', '_')}.csv") | |
| # ========================================================================= | |
| # Step 4: Cross-Validation | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 4: CROSS-VALIDATION") | |
| logger.info("=" * 60) | |
| n_folds = config["experiment"]["n_cv_folds"] | |
| X_full = np.vstack([X_train, X_val]) | |
| y_full = np.vstack([y_train, y_val]) | |
| groups_full = pd.concat([train_df, val_df])["material_type"].values if "material_type" in train_df.columns else None | |
| # XGBoost CV | |
| logger.info("XGBoost cross-validation...") | |
| xgb_cv = cross_validate_model( | |
| model_factory=lambda: XGBoostMultiOutput(xgb_params, target_cols), | |
| X=X_full, y=y_full, n_folds=n_folds, groups=groups_full, random_state=seed, | |
| ) | |
| # NN CV | |
| logger.info("Neural Network cross-validation...") | |
| nn_cv = cross_validate_model( | |
| model_factory=lambda: NeuralNetworkRegressor( | |
| n_features=len(all_features), n_outputs=len(target_cols), | |
| hidden_layers=nn_config["hidden_layers"], dropout=nn_config["dropout"], | |
| learning_rate=nn_config["learning_rate"], max_epochs=nn_config["max_epochs"], | |
| patience=nn_config["patience"], | |
| ), | |
| X=X_full, y=y_full, n_folds=n_folds, groups=groups_full, random_state=seed, | |
| ) | |
| # CV summaries | |
| xgb_summary = compute_cv_summary(xgb_cv, target_cols) | |
| nn_summary = compute_cv_summary(nn_cv, target_cols) | |
| logger.info(f"\nXGBoost CV Summary:\n{xgb_summary.to_string()}") | |
| logger.info(f"\nNeural Network CV Summary:\n{nn_summary.to_string()}") | |
| xgb_summary.to_csv(results_dir / "cv_xgboost.csv") | |
| nn_summary.to_csv(results_dir / "cv_neural_network.csv") | |
| # ========================================================================= | |
| # Step 5: Statistical Comparison | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 5: STATISTICAL SIGNIFICANCE TESTING") | |
| logger.info("=" * 60) | |
| stat_test = config["evaluation"]["statistical_tests"] | |
| comparison = compare_models_statistical( | |
| xgb_cv, nn_cv, | |
| model_name_a="XGBoost", | |
| model_name_b="Neural Network", | |
| target_names=target_cols, | |
| metric="r2", | |
| test=stat_test["method"], | |
| significance_level=stat_test["significance_level"], | |
| ) | |
| logger.info(f"\nStatistical Comparison (R²):\n{comparison.to_string()}") | |
| comparison.to_csv(results_dir / "statistical_comparison.csv") | |
| # ========================================================================= | |
| # Step 6: Per-Material Evaluation | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 6: PER-MATERIAL EVALUATION") | |
| logger.info("=" * 60) | |
| if "material_type" in test_df.columns: | |
| mat_labels = test_df["material_type"].values | |
| mat_results = per_material_evaluation( | |
| y_test, predictions["Ensemble"], mat_labels, target_cols | |
| ) | |
| logger.info(f"\nPer-Material (Ensemble):\n{mat_results.to_string()}") | |
| mat_results.to_csv(results_dir / "per_material_evaluation.csv") | |
| # ========================================================================= | |
| # Step 7: Generate Figures | |
| # ========================================================================= | |
| logger.info("=" * 60) | |
| logger.info("STEP 7: GENERATING PUBLICATION FIGURES") | |
| logger.info("=" * 60) | |
| fig_dir = Path(config["paths"]["figures"]) | |
| fig_format = config["visualization"].get("figure_format", "png") | |
| # Predicted vs Actual | |
| plot_predicted_vs_actual( | |
| y_test, predictions["Ensemble"], target_cols, | |
| model_name="Ensemble (XGBoost 60% + NN 40%)", | |
| save_path=fig_dir / f"predicted_vs_actual.{fig_format}", | |
| ) | |
| # Residual analysis | |
| plot_residual_analysis( | |
| y_test, predictions["Ensemble"], target_cols, | |
| save_path=fig_dir / f"residual_analysis.{fig_format}", | |
| ) | |
| # Feature importance | |
| importances = xgb_model.get_feature_importance(all_features) | |
| plot_feature_importance( | |
| importances, top_n=12, | |
| save_path=fig_dir / f"feature_importance.{fig_format}", | |
| ) | |
| # Model comparison | |
| plot_model_comparison( | |
| metrics_all, metric="R²", target_names=target_cols, | |
| save_path=fig_dir / f"model_comparison_r2.{fig_format}", | |
| ) | |
| # Training curves | |
| plot_training_curves( | |
| nn_model.train_losses, nn_model.val_losses, | |
| save_path=fig_dir / f"training_curves.{fig_format}", | |
| ) | |
| # Per-material | |
| if "material_type" in test_df.columns: | |
| plot_per_material_performance( | |
| mat_results, target_names=target_cols, | |
| save_path=fig_dir / f"per_material_performance.{fig_format}", | |
| ) | |
| # ========================================================================= | |
| # Step 8: Save Final Summary | |
| # ========================================================================= | |
| elapsed = time.time() - start_time | |
| summary = { | |
| "experiment_name": config["experiment"]["name"], | |
| "dataset_size": len(df), | |
| "n_features": len(all_features), | |
| "n_targets": len(target_cols), | |
| "n_cv_folds": n_folds, | |
| "random_seed": seed, | |
| "best_model": "Ensemble", | |
| "test_metrics": { | |
| model: metrics_all[model].to_dict() for model in metrics_all | |
| }, | |
| "elapsed_seconds": elapsed, | |
| } | |
| with open(results_dir / "experiment_summary.json", "w") as f: | |
| json.dump(summary, f, indent=2, default=str) | |
| logger.info("=" * 60) | |
| logger.info(f"EXPERIMENT COMPLETE ({elapsed:.1f}s)") | |
| logger.info(f"Results saved to: {results_dir}") | |
| logger.info(f"Figures saved to: {fig_dir}") | |
| logger.info("=" * 60) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Run fs-laser hydrogel etching ML experiment") | |
| parser.add_argument("--config", type=str, default="configs/experiment.yaml", | |
| help="Path to experiment configuration YAML") | |
| args = parser.parse_args() | |
| main(args.config) | |