Spaces:
Runtime error
Runtime error
File size: 2,889 Bytes
6eff894 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
confusion_matrix, classification_report,
roc_auc_score, roc_curve, precision_recall_fscore_support
)
df = pd.read_csv("results/daily.csv")
df["precip_tomorrow"] = df["precip_mm"].shift(-1)
df = df.dropna() # drop last row without tomorrow
df["rain_tomorrow"] = (df["precip_tomorrow"] > 0).astype(int)
features = [
"temp_max_c",
"temp_min_c",
"cloudcover",
"wind_speed",
"humidity_max",
"humidity_min",
"precip_mm", # rain today often implies rain persists
]
X = df[features].values
y = df["rain_tomorrow"].values
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, shuffle=False
)
clf = Pipeline([
("scaler", StandardScaler()),
("logreg", LogisticRegression(max_iter=200,
class_weight="balanced"))
])
clf.fit(X_train, y_train)
proba = clf.predict_proba(X_test)[:, 1] # P(rain)
pred_default = (proba >= 0.5).astype(int) # default threshold
labels = [0, 1]
cm = confusion_matrix(y_test, pred_default, labels=labels)
tn, fp, fn, tp = cm.ravel()
prec, rec, f1, _ = precision_recall_fscore_support(
y_test, pred_default, average="binary", zero_division=0
)
auc = (
roc_auc_score(y_test, proba)
if len(np.unique(y_test)) > 1 and len(np.unique(proba)) > 1
else float("nan")
)
print("📊 Confusion Matrix (threshold=0.50)")
print(cm)
auc_str = f"{auc:.3f}" if np.isfinite(auc) else "n/a"
print(f"\nPrecision: {prec:.3f} Recall: {rec:.3f} F1: {f1:.3f} ROC-AUC: {auc_str}")
print("\nDetailed report:")
print(classification_report(y_test, pred_default, digits=3, zero_division=0, labels=labels))
always_no = np.zeros_like(y_test)
prec0, rec0, f10, _ = precision_recall_fscore_support(
y_test, always_no, average="binary", zero_division=0
)
print("\n⚠️ Baseline — always 'no rain'")
print(f"Precision: {prec0:.3f} Recall: {rec0:.3f} F1: {f10:.3f}")
today_rain = (df["precip_mm"].values[-len(y_test)-1:-1] > 0).astype(int)
precp, recp, f1p, _ = precision_recall_fscore_support(
y_test, today_rain, average="binary", zero_division=0
)
print("\n🧠 Baseline — 'tomorrow rain = today rain'")
print(f"Precision: {precp:.3f} Recall: {recp:.3f} F1: {f1p:.3f}")
thr = 0.35
pred_tuned = (proba >= thr).astype(int)
prec_t, rec_t, f1_t, _ = precision_recall_fscore_support(
y_test, pred_tuned, average="binary", zero_division=0
)
print(f"\n🎛️ Threshold {thr:.2f} → Precision: {prec_t:.3f} Recall: {rec_t:.3f} F1: {f1_t:.3f}")
import joblib
os.makedirs("models", exist_ok=True)
joblib.dump(clf, "models/rain_classifier.joblib")
print("\n💾 Saved: models/rain_classifier.joblib")
|