#!/usr/bin/env python3 import shap import joblib import pandas as pd import matplotlib.pyplot as plt from pathlib import Path import json import numpy as np import os # ensure matplotlib cache lives inside repo RESULTS_DIR = Path("results") RESULTS_DIR.mkdir(exist_ok=True) os.environ.setdefault("MPLCONFIGDIR", str(RESULTS_DIR / ".matplotlib")) Path(os.environ["MPLCONFIGDIR"]).mkdir(parents=True, exist_ok=True) # === Load model + metadata === model = joblib.load("models/rain_xgb_tuned.joblib") meta = json.load(open("models/rain_xgb_tuned_meta.json")) features = meta["features"] # === Load data === df = pd.read_csv("results/hourly.csv", parse_dates=["time"]) # Rebuild features exactly like training import importlib.util spec = importlib.util.spec_from_file_location( "train_xgb_tuned_final", Path("scripts/train_xgb_tuned_final.py") ) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) build_features = module.build_features Xdf = build_features(df) X = Xdf.values.astype(np.float32) # Use last 500 samples for analysis (avoid overkill) X_sample = X[-200:] # === SHAP Explainer === explainer = shap.Explainer(model.predict_proba, X_sample, algorithm="permutation") shap_values = explainer(X_sample) # === Global importance === Path("results").mkdir(exist_ok=True) plt.figure() shap.summary_plot(shap_values, X_sample, feature_names=features, show=False) plt.tight_layout() plt.savefig("results/shap_summary.png", dpi=300) plt.close() # === Bar chart version === plt.figure() shap.summary_plot(shap_values, X_sample, feature_names=features, plot_type="bar", show=False) plt.tight_layout() plt.savefig("results/shap_top.png", dpi=300) plt.close() print("✅ SHAP visualisations saved: results/shap_summary.png and results/shap_top.png")