import json import os from pathlib import Path import joblib import numpy as np import pandas as pd from sklearn.inspection import permutation_importance from sklearn.model_selection import train_test_split RESULTS_DIR = "results" os.environ.setdefault("MPLCONFIGDIR", os.path.join(RESULTS_DIR, ".matplotlib")) os.environ.setdefault("XDG_CACHE_HOME", os.path.join(RESULTS_DIR, ".cache")) Path(os.environ["MPLCONFIGDIR"]).mkdir(parents=True, exist_ok=True) Path(os.environ["XDG_CACHE_HOME"]).mkdir(parents=True, exist_ok=True) import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt def build_dataset(meta: dict) -> tuple[np.ndarray, np.ndarray]: df = pd.read_csv("results/hourly.csv", parse_dates=["time"]) horizon = meta["horizon_hours"] precip = df["precip_mm"].values rain_future = np.zeros(len(df), dtype=int) for i in range(len(precip) - horizon): rain_future[i] = 1 if np.any(precip[i + 1 : i + 1 + horizon] > 0) else 0 df = df.iloc[: len(precip) - horizon].copy() labels = rain_future[: len(df)] features = df[meta["features"]].values return features, labels def plot_importance(feature_names: list[str], importances: np.ndarray, std: np.ndarray) -> None: order = np.argsort(importances)[::-1] feature_names = np.array(feature_names)[order] importances = importances[order] plt.figure(figsize=(8, 5)) y_pos = np.arange(len(feature_names)) plt.barh(y_pos, importances, align="center") plt.yticks(y_pos, feature_names) plt.gca().invert_yaxis() plt.xlabel("Permutation importance (F1 drop)") plt.title("Rain classifier — feature importances") plt.tight_layout() Path(RESULTS_DIR).mkdir(exist_ok=True) plt.savefig(os.path.join(RESULTS_DIR, "feature_importance.png")) plt.close() def main() -> None: meta = json.load(open("models/rain_model_meta.json")) model = joblib.load("models/rain_classifier_hourly.joblib") X, y = build_dataset(meta) _, X_test, _, y_test = train_test_split(X, y, test_size=0.3, shuffle=False) result = permutation_importance( model, X_test, y_test, n_repeats=25, random_state=42, scoring="f1", ) plot_importance(meta["features"], result.importances_mean, result.importances_std) print("✅ Wrote results/feature_importance.png") if __name__ == "__main__": main()