Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Generates a SHAP dependence plot showing how HUMIDITY and | |
| TEMPERATURE | |
| jointly influence rain predictions. Outputs: | |
| - results/shap_interaction.png | |
| """ | |
| import json | |
| from pathlib import Path | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import shap | |
| import importlib.util | |
| import os | |
| # Keep matplotlib caches inside repo to avoid home directory issues | |
| RESULTS_DIR = Path("results") | |
| RESULTS_DIR.mkdir(exist_ok=True) | |
| os.environ.setdefault("MPLCONFIGDIR", str(RESULTS_DIR / ".matplotlib")) | |
| Path(os.environ["MPLCONFIGDIR"]).mkdir(parents=True, exist_ok=True) | |
| # Load model + meta | |
| model = joblib.load("models/rain_xgb_tuned.joblib") | |
| booster = model.get_booster() | |
| config = json.loads(booster.save_config()) | |
| base_score = config.get("learner", {}).get("learner_model_param", {}).get("base_score") | |
| if base_score: | |
| cleaned = base_score.strip("[]") | |
| try: | |
| float(cleaned) | |
| except ValueError: | |
| cleaned = "0.5" | |
| config["learner"]["learner_model_param"]["base_score"] = cleaned | |
| booster.load_config(json.dumps(config)) | |
| meta = json.loads(Path("models/rain_xgb_tuned_meta.json").read_text()) | |
| features = meta["features"] | |
| # Load data and rebuild features exactly like training | |
| df = pd.read_csv("results/hourly.csv", parse_dates=["time"]) | |
| spec = importlib.util.spec_from_file_location( | |
| "train_xgb_tuned_final", "scripts/train_xgb_tuned_final.py" | |
| ) | |
| module = importlib.util.module_from_spec(spec) | |
| spec.loader.exec_module(module) | |
| build_features = module.build_features | |
| Xdf = build_features(df) # same order as training | |
| X = Xdf.values | |
| X_sample = X[-120:] if len(X) > 120 else X | |
| X_sample_df = pd.DataFrame(X_sample, columns=features) | |
| X_sample_df = pd.DataFrame(X_sample, columns=features) | |
| # Prefer TreeExplainer for XGBoost; fallback to generic Explainer if needed | |
| try: | |
| explainer = shap.TreeExplainer(booster, data=X_sample) | |
| shap_result = explainer(X_sample) | |
| except Exception: | |
| explainer = shap.Explainer(model.predict_proba, X_sample, algorithm="permutation") | |
| shap_result = explainer(X_sample) | |
| # Normalize SHAP output to a 2D array aligned with feature columns | |
| if hasattr(shap_result, "values"): | |
| values = shap_result.values | |
| if values.ndim == 3: # multi-class, take positive class (index 1) | |
| values = values[:, :, 1] | |
| shap_values = values | |
| else: | |
| shap_values = np.array(shap_result) | |
| # Ensure sample frame matches SHAP output rows | |
| X_plot = X_sample_df.iloc[-shap_values.shape[0]:] | |
| Path("results").mkdir(exist_ok=True) | |
| # 1) Dependence plot: humidity colored by temp_c (classic interaction view) | |
| plt.figure() | |
| shap.dependence_plot( | |
| "humidity", | |
| shap_values, | |
| X_plot, | |
| interaction_index="temp_c", | |
| show=False | |
| ) | |
| plt.tight_layout() | |
| plt.savefig("results/shap_interaction.png", dpi=300) | |
| plt.close() | |
| # 2) (Optional) Reverse view: temp_c colored by humidity | |
| plt.figure() | |
| shap.dependence_plot( | |
| "temp_c", | |
| shap_values, | |
| X_plot, | |
| interaction_index="humidity", | |
| show=False | |
| ) | |
| plt.tight_layout() | |
| plt.savefig("results/shap_interaction_rev.png", dpi=300) | |
| plt.close() | |
| print("✅ Saved results/shap_interaction.png and results/shap_interaction_rev.png") | |