import json
import os
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_curve, roc_curve, auc
from sklearn.model_selection import train_test_split

RESULTS_DIR = "results"
os.environ.setdefault("MPLCONFIGDIR", os.path.join(RESULTS_DIR, ".matplotlib"))
os.environ.setdefault("XDG_CACHE_HOME", os.path.join(RESULTS_DIR, ".cache"))

from pathlib import Path

Path(os.environ["MPLCONFIGDIR"]).mkdir(parents=True, exist_ok=True)
Path(os.environ["XDG_CACHE_HOME"]).mkdir(parents=True, exist_ok=True)

import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt

meta = json.load(open("models/rain_model_meta.json"))
clf = joblib.load("models/rain_classifier_hourly.joblib")
df = pd.read_csv("results/hourly.csv", parse_dates=["time"])

H = meta["horizon_hours"]
features = meta["features"]

precip_next = np.zeros(len(df), dtype=int)
prec = df["precip_mm"].values
for i in range(len(prec) - H):
    precip_next[i] = 1 if np.any(prec[i + 1 : i + 1 + H] > 0) else 0

df = df.iloc[: len(precip_next)].copy()
df["rain_next6h"] = precip_next[: len(df)]

X = df[features].values
y = df["rain_next6h"].values

_, X_test, _, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)
proba = clf.predict_proba(X_test)[:, 1]

precision, recall, _ = precision_recall_curve(y_test, proba)
fpr, tpr, _ = roc_curve(y_test, proba)

plt.figure()
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision–Recall")
plt.tight_layout()
plt.savefig("results/pr_curve.png")
plt.close()

plt.figure()
plt.plot(fpr, tpr)
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title(f"ROC (AUC={auc(fpr, tpr):.2f})")
plt.tight_layout()
plt.savefig("results/roc_curve.png")
plt.close()

print("✅ Wrote results/pr_curve.png and results/roc_curve.png")