Spaces:

KB-Infinity-Tech
/

AIMSRICHackatonDay2Deployed

Sleeping

File size: 7,956 Bytes

099d46e

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a4b8f307",
   "metadata": {},
   "source": [
    "# T2.3 · Evaluation Notebook\n",
    "**Rolling 30-day held-out evaluation** — Brier score, Duration MAE, Lead Time\n",
    "\n",
    "AIMS KTT Hackathon 2026"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a251a2c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install deps\n",
    "!pip install pandas numpy scikit-learn lightgbm matplotlib -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d54cb386",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from forecaster import Forecaster, rolling_eval, build_features, FEATURE_COLS\n",
    "from prioritizer import plan, load_data, format_digest\n",
    "\n",
    "plt.style.use(\"dark_background\")\n",
    "print(\"Imports OK\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0c39297",
   "metadata": {},
   "source": [
    "## 1. Data Overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5395955",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"grid_history.csv\")\n",
    "df[\"timestamp\"] = pd.to_datetime(df[\"timestamp\"])\n",
    "print(f\"Shape: {df.shape}\")\n",
    "print(f\"Outage rate: {df.outage.mean():.3f}\")\n",
    "print(f\"Mean duration (outage hours): {df[df.outage==1].duration_min.mean():.1f} min\")\n",
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "193bf41b",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(2, 2, figsize=(12, 6))\n",
    "df.groupby(df.timestamp.dt.hour)[\"outage\"].mean().plot(ax=axes[0,0], title=\"Outage rate by hour\", color=\"#ef4444\")\n",
    "df.groupby(df.timestamp.dt.dayofweek)[\"outage\"].mean().plot(ax=axes[0,1], title=\"Outage rate by weekday\", color=\"#f97316\")\n",
    "df.groupby(df.timestamp.dt.month)[\"outage\"].mean().plot(ax=axes[1,0], title=\"Outage rate by month\", color=\"#6366f1\")\n",
    "df[df.outage==1][\"duration_min\"].hist(ax=axes[1,1], bins=30, title=\"Duration distribution\", color=\"#22c55e\", edgecolor=\"black\")\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"eda_plots.png\", dpi=80, bbox_inches=\"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf4734e9",
   "metadata": {},
   "source": [
    "## 2. Rolling 30-Day Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "769732fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "metrics = rolling_eval(\"grid_history.csv\", window_days=30)\n",
    "print(\"=\" * 40)\n",
    "for k, v in metrics.items():\n",
    "    print(f\"  {k}: {v}\")\n",
    "print(\"=\" * 40)\n",
    "\n",
    "# Brier score interpretation\n",
    "naive_rate = pd.read_csv(\"grid_history.csv\")[\"outage\"].mean()\n",
    "naive_brier = naive_rate * (1 - naive_rate)\n",
    "print(f\"\n",
    "Naive Brier (always predict base rate {naive_rate:.3f}): {naive_brier:.4f}\")\n",
    "print(f\"Model Brier: {metrics['brier_score']:.4f}\")\n",
    "print(f\"Brier Skill Score: {1 - metrics['brier_score']/naive_brier:.3f} (higher = better)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b98f9689",
   "metadata": {},
   "source": [
    "## 3. Forecast Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c58bc81c",
   "metadata": {},
   "outputs": [],
   "source": [
    "fc = Forecaster().fit(\"grid_history.csv\")\n",
    "forecast = fc.predict_next_24h()\n",
    "\n",
    "hours = [f[\"hour\"] for f in forecast]\n",
    "probs = [f[\"p_outage\"] for f in forecast]\n",
    "p_low = [f[\"p_outage_low\"] for f in forecast]\n",
    "p_high = [f[\"p_outage_high\"] for f in forecast]\n",
    "durations = [f[\"expected_duration_min\"] for f in forecast]\n",
    "risks = [f[\"risk_level\"] for f in forecast]\n",
    "\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 7), sharex=True)\n",
    "\n",
    "# Colors by risk\n",
    "cols = [\"#ef4444\" if r==\"HIGH\" else \"#f97316\" if r==\"MEDIUM\" else \"#22c55e\" for r in risks]\n",
    "\n",
    "ax1.fill_between(hours, p_low, p_high, alpha=0.25, color=\"#6366f1\", label=\"Uncertainty band\")\n",
    "ax1.plot(hours, probs, \"o-\", color=\"#6366f1\", lw=2, ms=5, label=\"P(outage)\")\n",
    "ax1.axhline(0.25, color=\"#ef4444\", ls=\"--\", lw=1, label=\"HIGH threshold\")\n",
    "ax1.axhline(0.12, color=\"#f97316\", ls=\"--\", lw=1, label=\"MEDIUM threshold\")\n",
    "ax1.set_ylabel(\"P(outage)\")\n",
    "ax1.set_title(\"24-Hour Outage Forecast with Uncertainty Band\")\n",
    "ax1.legend(fontsize=9)\n",
    "ax1.set_ylim(0, 0.6)\n",
    "\n",
    "ax2.bar(hours, durations, color=cols, alpha=0.8, label=\"Expected duration (min)\")\n",
    "ax2.set_xlabel(\"Hour of day\")\n",
    "ax2.set_ylabel(\"E[duration | outage] (min)\")\n",
    "ax2.set_title(\"Expected Outage Duration by Hour\")\n",
    "ax2.set_xticks(hours)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"forecast_plot.png\", dpi=80, bbox_inches=\"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6b33fb28",
   "metadata": {},
   "source": [
    "## 4. Appliance Plan — Salon Archetype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51ceeb44",
   "metadata": {},
   "outputs": [],
   "source": [
    "appliances, businesses = load_data()\n",
    "result = plan(forecast, appliances, \"salon\")\n",
    "s = result[\"summary\"]\n",
    "print(f\"Business: {result['business']}\")\n",
    "print(f\"Net benefit vs naïve: {s['net_benefit_rwf']:,.0f} RWF\")\n",
    "print(f\"Total plan revenue: {s['total_revenue_plan_rwf']:,.0f} RWF\")\n",
    "print(f\"Disruption penalty avoided: {s['disruption_penalty_avoided_rwf']:,.0f} RWF\")\n",
    "print(f\"Hours with shedding: {s['hours_with_shed']}/24\")\n",
    "print()\n",
    "\n",
    "# Show plan table\n",
    "rows = []\n",
    "for h in result[\"plan\"]:\n",
    "    off = [a[\"name\"] for a in h[\"appliances\"] if a[\"state\"]==\"OFF\"]\n",
    "    rows.append({\"Hour\": h[\"hour\"], \"Time\": h[\"timestamp\"][11:], \"Risk\": h[\"risk_level\"],\n",
    "                 \"P(out)\": f\"{h['p_outage']:.3f}\", \"OFF\": \", \".join(off) if off else \"—\"})\n",
    "pd.DataFrame(rows).to_string(index=False) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b65b920",
   "metadata": {},
   "source": [
    "## 5. SMS Digest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21e98b96",
   "metadata": {},
   "outputs": [],
   "source": [
    "sms = format_digest(result, forecast)\n",
    "for i, msg in enumerate(sms, 1):\n",
    "    print(f\"SMS {i}/3 ({len(msg)} chars):\")\n",
    "    print(msg)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3e987ce",
   "metadata": {},
   "source": [
    "## 6. Feature Importance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d9de197",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df_feat = build_features(pd.read_csv(\"grid_history.csv\"))\n",
    "fc2 = Forecaster().fit(\"grid_history.csv\")\n",
    "\n",
    "fimp = pd.Series(fc2.clf.feature_importances_, index=FEATURE_COLS).sort_values(ascending=False)\n",
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
    "fimp.plot(kind=\"barh\", ax=ax, color=\"#6366f1\")\n",
    "ax.set_title(\"LightGBM Feature Importances — Outage Classifier\")\n",
    "ax.set_xlabel(\"Importance\")\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"feature_importance.png\", dpi=80, bbox_inches=\"tight\")\n",
    "plt.show()\n",
    "print(fimp.to_string())"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 5
}