Spaces:
Running
Running
File size: 8,444 Bytes
f381be8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | """
src.models.classical.regressors
===============================
Classical ML regression models for SOH and RUL prediction.
All models follow a unified interface:
train_*(X_train, y_train, **kwargs) β fitted model
evaluate_*(model, X_test, y_test) β metrics dict
Hyperparameter optimization is done with Optuna where applicable.
"""
from __future__ import annotations
from typing import Any
import joblib
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet, Lasso, Ridge
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from src.evaluation.metrics import regression_metrics, tolerance_accuracy
from src.utils.config import CV_FOLDS, MODELS_DIR, N_OPTUNA_TRIALS, RANDOM_STATE
def _save_model(model: Any, name: str) -> None:
path = MODELS_DIR / "classical" / f"{name}.joblib"
joblib.dump(model, path)
def _load_model(name: str) -> Any:
path = MODELS_DIR / "classical" / f"{name}.joblib"
return joblib.load(path)
# ββ Ridge Regression βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_ridge(X: np.ndarray, y: np.ndarray, alpha: float = 1.0) -> Ridge:
model = Ridge(alpha=alpha, random_state=RANDOM_STATE)
model.fit(X, y)
_save_model(model, "ridge")
return model
# ββ Lasso Regression βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_lasso(X: np.ndarray, y: np.ndarray, alpha: float = 0.01) -> Lasso:
model = Lasso(alpha=alpha, random_state=RANDOM_STATE, max_iter=10000)
model.fit(X, y)
_save_model(model, "lasso")
return model
# ββ ElasticNet βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_elasticnet(X: np.ndarray, y: np.ndarray, alpha: float = 0.01, l1_ratio: float = 0.5) -> ElasticNet:
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=RANDOM_STATE, max_iter=10000)
model.fit(X, y)
_save_model(model, "elasticnet")
return model
# ββ KNN Regressor ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_knn(X: np.ndarray, y: np.ndarray, n_neighbors: int = 5) -> KNeighborsRegressor:
model = KNeighborsRegressor(n_neighbors=n_neighbors, weights="distance", n_jobs=-1)
model.fit(X, y)
_save_model(model, f"knn_k{n_neighbors}")
return model
# ββ SVR ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_svr(X: np.ndarray, y: np.ndarray, C: float = 10.0, gamma: str = "scale") -> SVR:
model = SVR(kernel="rbf", C=C, gamma=gamma)
model.fit(X, y)
_save_model(model, "svr")
return model
# ββ Random Forest ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_random_forest(
X: np.ndarray, y: np.ndarray,
n_estimators: int = 500,
max_depth: int | None = None,
) -> RandomForestRegressor:
model = RandomForestRegressor(
n_estimators=n_estimators, max_depth=max_depth,
random_state=RANDOM_STATE, n_jobs=-1,
)
model.fit(X, y)
_save_model(model, "random_forest")
return model
# ββ XGBoost with Optuna HPO βββββββββββββββββββββββββββββββββββββββββββββββββ
def train_xgboost(
X: np.ndarray, y: np.ndarray,
n_trials: int = N_OPTUNA_TRIALS,
cv_folds: int = CV_FOLDS,
) -> Any:
"""Train XGBoost regressor with Optuna hyperparameter optimization."""
import optuna
from xgboost import XGBRegressor
optuna.logging.set_verbosity(optuna.logging.WARNING)
def objective(trial):
params = {
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
"max_depth": trial.suggest_int("max_depth", 3, 12),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"subsample": trial.suggest_float("subsample", 0.6, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
"min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
}
model = XGBRegressor(
**params, tree_method="hist", random_state=RANDOM_STATE,
verbosity=0, n_jobs=-1,
)
scores = cross_val_score(model, X, y, cv=cv_folds, scoring="neg_mean_absolute_error")
return -scores.mean()
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
best_params = study.best_params
best_model = XGBRegressor(
**best_params, tree_method="hist", random_state=RANDOM_STATE,
verbosity=0, n_jobs=-1,
)
best_model.fit(X, y)
_save_model(best_model, "xgboost")
_save_model(study.best_params, "xgboost_best_params")
return best_model
# ββ LightGBM with Optuna HPO ββββββββββββββββββββββββββββββββββββββββββββββββ
def train_lightgbm(
X: np.ndarray, y: np.ndarray,
n_trials: int = N_OPTUNA_TRIALS,
cv_folds: int = CV_FOLDS,
) -> Any:
"""Train LightGBM regressor with Optuna hyperparameter optimization."""
import optuna
from lightgbm import LGBMRegressor
optuna.logging.set_verbosity(optuna.logging.WARNING)
def objective(trial):
params = {
"n_estimators": trial.suggest_int("n_estimators", 100, 1000),
"max_depth": trial.suggest_int("max_depth", 3, 15),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"subsample": trial.suggest_float("subsample", 0.6, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
"num_leaves": trial.suggest_int("num_leaves", 16, 128),
"min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
}
model = LGBMRegressor(**params, random_state=RANDOM_STATE, verbosity=-1, n_jobs=-1)
scores = cross_val_score(model, X, y, cv=cv_folds, scoring="neg_mean_absolute_error")
return -scores.mean()
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
best_params = study.best_params
best_model = LGBMRegressor(**best_params, random_state=RANDOM_STATE, verbosity=-1, n_jobs=-1)
best_model.fit(X, y)
_save_model(best_model, "lightgbm")
_save_model(study.best_params, "lightgbm_best_params")
return best_model
# ββ Unified evaluation βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def evaluate_model(
model: Any,
X_test: np.ndarray,
y_test: np.ndarray,
model_name: str = "",
soh_tolerance: float = 2.0,
rul_tolerance: float = 5.0,
target_type: str = "soh",
) -> dict[str, float]:
"""Evaluate any sklearn-compatible model and return metrics dict."""
y_pred = model.predict(X_test)
metrics = regression_metrics(y_test, y_pred, prefix=model_name)
tol = soh_tolerance if target_type == "soh" else rul_tolerance
metrics[f"{model_name}_tolerance_acc"] = tolerance_accuracy(y_test, y_pred, tol)
return metrics
|