NeerajCodz's picture
feat: full project β€” ML simulation, dashboard UI, models on HF Hub
f381be8
"""
src.models.classical.regressors
===============================
Classical ML regression models for SOH and RUL prediction.
All models follow a unified interface:
train_*(X_train, y_train, **kwargs) β†’ fitted model
evaluate_*(model, X_test, y_test) β†’ metrics dict
Hyperparameter optimization is done with Optuna where applicable.
"""
from __future__ import annotations
from typing import Any
import joblib
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from src.evaluation.metrics import regression_metrics, tolerance_accuracy
from src.utils.config import CV_FOLDS, MODELS_DIR, N_OPTUNA_TRIALS, RANDOM_STATE
def _save_model(model: Any, name: str) -> None:
path = MODELS_DIR / "classical" / f"{name}.joblib"
joblib.dump(model, path)
def _load_model(name: str) -> Any:
path = MODELS_DIR / "classical" / f"{name}.joblib"
return joblib.load(path)
# ── Ridge Regression ─────────────────────────────────────────────────────────
def train_ridge(X: np.ndarray, y: np.ndarray, alpha: float = 1.0) -> Ridge:
model = Ridge(alpha=alpha, random_state=RANDOM_STATE)
model.fit(X, y)
_save_model(model, "ridge")
return model
# ── Lasso Regression ─────────────────────────────────────────────────────────
def train_lasso(X: np.ndarray, y: np.ndarray, alpha: float = 0.01) -> Lasso:
model = Lasso(alpha=alpha, random_state=RANDOM_STATE, max_iter=10000)
model.fit(X, y)
_save_model(model, "lasso")
return model
# ── ElasticNet ───────────────────────────────────────────────────────────────
def train_elasticnet(X: np.ndarray, y: np.ndarray, alpha: float = 0.01, l1_ratio: float = 0.5) -> ElasticNet:
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=RANDOM_STATE, max_iter=10000)
model.fit(X, y)
_save_model(model, "elasticnet")
return model
# ── KNN Regressor ────────────────────────────────────────────────────────────
def train_knn(X: np.ndarray, y: np.ndarray, n_neighbors: int = 5) -> KNeighborsRegressor:
model = KNeighborsRegressor(n_neighbors=n_neighbors, weights="distance", n_jobs=-1)
model.fit(X, y)
_save_model(model, f"knn_k{n_neighbors}")
return model
# ── SVR ──────────────────────────────────────────────────────────────────────
def train_svr(X: np.ndarray, y: np.ndarray, C: float = 10.0, gamma: str = "scale") -> SVR:
model = SVR(kernel="rbf", C=C, gamma=gamma)
model.fit(X, y)
_save_model(model, "svr")
return model
# ── Random Forest ────────────────────────────────────────────────────────────
def train_random_forest(
X: np.ndarray, y: np.ndarray,
n_estimators: int = 500,
max_depth: int | None = None,
) -> RandomForestRegressor:
model = RandomForestRegressor(
n_estimators=n_estimators, max_depth=max_depth,
random_state=RANDOM_STATE, n_jobs=-1,
)
model.fit(X, y)
_save_model(model, "random_forest")
return model
# ── XGBoost with Optuna HPO ─────────────────────────────────────────────────
def train_xgboost(
X: np.ndarray, y: np.ndarray,
n_trials: int = N_OPTUNA_TRIALS,
cv_folds: int = CV_FOLDS,
) -> Any:
"""Train XGBoost regressor with Optuna hyperparameter optimization."""
import optuna
from xgboost import XGBRegressor
optuna.logging.set_verbosity(optuna.logging.WARNING)
def objective(trial):
params = {
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
"max_depth": trial.suggest_int("max_depth", 3, 12),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"subsample": trial.suggest_float("subsample", 0.6, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
"min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
}
model = XGBRegressor(
**params, tree_method="hist", random_state=RANDOM_STATE,
verbosity=0, n_jobs=-1,
)
scores = cross_val_score(model, X, y, cv=cv_folds, scoring="neg_mean_absolute_error")
return -scores.mean()
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
best_params = study.best_params
best_model = XGBRegressor(
**best_params, tree_method="hist", random_state=RANDOM_STATE,
verbosity=0, n_jobs=-1,
)
best_model.fit(X, y)
_save_model(best_model, "xgboost")
_save_model(study.best_params, "xgboost_best_params")
return best_model
# ── LightGBM with Optuna HPO ────────────────────────────────────────────────
def train_lightgbm(
X: np.ndarray, y: np.ndarray,
n_trials: int = N_OPTUNA_TRIALS,
cv_folds: int = CV_FOLDS,
) -> Any:
"""Train LightGBM regressor with Optuna hyperparameter optimization."""
import optuna
from lightgbm import LGBMRegressor
optuna.logging.set_verbosity(optuna.logging.WARNING)
def objective(trial):
params = {
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
"max_depth": trial.suggest_int("max_depth", 3, 15),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"subsample": trial.suggest_float("subsample", 0.6, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
"num_leaves": trial.suggest_int("num_leaves", 16, 128),
"min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
}
model = LGBMRegressor(**params, random_state=RANDOM_STATE, verbosity=-1, n_jobs=-1)
scores = cross_val_score(model, X, y, cv=cv_folds, scoring="neg_mean_absolute_error")
return -scores.mean()
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
best_params = study.best_params
best_model = LGBMRegressor(**best_params, random_state=RANDOM_STATE, verbosity=-1, n_jobs=-1)
best_model.fit(X, y)
_save_model(best_model, "lightgbm")
_save_model(study.best_params, "lightgbm_best_params")
return best_model
# ── Unified evaluation ───────────────────────────────────────────────────────
def evaluate_model(
model: Any,
X_test: np.ndarray,
y_test: np.ndarray,
model_name: str = "",
soh_tolerance: float = 2.0,
rul_tolerance: float = 5.0,
target_type: str = "soh",
) -> dict[str, float]:
"""Evaluate any sklearn-compatible model and return metrics dict."""
y_pred = model.predict(X_test)
metrics = regression_metrics(y_test, y_pred, prefix=model_name)
tol = soh_tolerance if target_type == "soh" else rul_tolerance
metrics[f"{model_name}_tolerance_acc"] = tolerance_accuracy(y_test, y_pred, tol)
return metrics