Spaces:
Build error
Build error
| from abc import ABC, abstractmethod | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.base import RegressorMixin | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | |
| from sklearn.model_selection import cross_val_score | |
| from src.core import setup_logger | |
| logger = setup_logger(__name__) | |
| try: | |
| import shap | |
| except ImportError: | |
| shap = None | |
| # --- MODEL BUILDING --- | |
| class ModelBuildingStrategy(ABC): | |
| def build_and_train_model(self, X_train: pd.DataFrame, y_train: pd.Series) -> RegressorMixin: | |
| pass | |
| class XGBoostStrategy(ModelBuildingStrategy): | |
| def __init__(self, **params): | |
| self.params = params | |
| def build_and_train_model(self, X_train: pd.DataFrame, y_train: pd.Series) -> Pipeline: | |
| from xgboost import XGBRegressor | |
| logger.info("Building XGBoost model.") | |
| # Filtering logic for Rossmann | |
| valid_mask = (y_train > 0) | |
| if "Open" in X_train.columns: | |
| valid_mask = valid_mask & (X_train["Open"] == 1) | |
| X_filtered = X_train[valid_mask] | |
| y_log = np.log1p(y_train[valid_mask]) | |
| pipeline = Pipeline([ | |
| ("scaler", StandardScaler()), | |
| ("model", XGBRegressor(**self.params)) | |
| ]) | |
| pipeline.fit(X_filtered, y_log) | |
| return pipeline | |
| # --- EVALUATION --- | |
| class ModelEvaluator: | |
| def calculate_rmspe(y_true, y_pred): | |
| mask = y_true > 0 | |
| return np.sqrt(np.mean(((y_true[mask] - y_pred[mask]) / y_true[mask])**2)) * 100 | |
| def evaluate(model, X_test, y_test): | |
| y_pred_log = model.predict(X_test) | |
| y_pred = np.expm1(y_pred_log) | |
| y_true = y_test if not isinstance(y_test, pd.Series) else y_test.values | |
| mse = mean_squared_error(y_true, y_pred) | |
| mae = mean_absolute_error(y_true, y_pred) | |
| rmspe = ModelEvaluator.calculate_rmspe(y_true, y_pred) | |
| return {"MSE": mse, "MAE": mae, "RMSPE": rmspe} | |
| # --- EXPLAINABILITY --- | |
| class ModelExplainer: | |
| def __init__(self, model, X_train): | |
| self.model = model | |
| self.X_train = X_train | |
| if shap is None: | |
| logger.warning("SHAP not installed. Explainer will not function.") | |
| def plot_importance(self, X, save_path=None): | |
| if hasattr(self.model, 'named_steps'): | |
| importances = self.model.named_steps['model'].feature_importances_ | |
| else: | |
| importances = self.model.feature_importances_ | |
| feat_imp = pd.Series(importances, index=X.columns).sort_values(ascending=False) | |
| plt.figure(figsize=(10, 6)) | |
| feat_imp.head(20).plot(kind='bar') | |
| if save_path: | |
| plt.savefig(save_path) | |
| plt.close() | |
| return feat_imp | |