Spaces:

Pybunny
/

NILMbench

Sleeping

App Files Files Community

Pybunny commited on 8 days ago

Commit

e6c3b50

verified ·

1 Parent(s): 1e17653

Add Benchmark-your-model tab

Browse files

Files changed (1) hide show

app.py +227 -22

app.py CHANGED Viewed

@@ -1,14 +1,26 @@
 """NILMbench HuggingFace Space.
-Single-frame demo of the FaustineCNN baseline. Model weights, classes, and
-recall-constrained cutoffs are pulled from the HF model repo
-``Pybunny/nilmbench-faustine`` at startup. Example frames are bundled with
-the Space so the demo works offline of the laptop.
 """
 from __future__ import annotations
 import json
 from pathlib import Path
 import numpy as np
@@ -19,11 +31,17 @@ import gradio as gr
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-from huggingface_hub import hf_hub_download
 HERE = Path(__file__).resolve().parent
 EXAMPLES_DIR = HERE / "examples"
 MODEL_REPO = "Pybunny/nilmbench-faustine"
 # UK-DALE House 2 calibration constants (from calibration_house_2.cfg).
 V_PER_ADC = 1.88296904357e-7
@@ -34,7 +52,7 @@ I_FACTOR = ADC_FULL_SCALE * I_PER_ADC   # ~102.5
 # ----------------------------------------------------------------------
-# Model (self-contained so the Space has no dependency on the nilmbench pkg)
 # ----------------------------------------------------------------------
 class FaustineCNN(nn.Module):
     def __init__(self, n_categories: int):
@@ -89,7 +107,7 @@ MODEL, CLASSES, CUTOFFS = load_assets()
 # ----------------------------------------------------------------------
-# Inference + plotting
 # ----------------------------------------------------------------------
 def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
     if vi_norm.shape != (2, 96000):
@@ -101,8 +119,6 @@ def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
 def predict(vi_norm: np.ndarray, aggregate_W: float) -> dict[str, float]:
     with torch.no_grad():
         scores = MODEL(_to_2d_image(vi_norm)).cpu().numpy().squeeze(0)
-    # FaustineCNN outputs per-category Bernoulli activations; renormalise
-    # across categories to obtain shares, then scale by the aggregate.
     shares = scores / (scores.sum() + 1e-9)
     raw = shares * float(aggregate_W)
     out = {}
@@ -155,9 +171,6 @@ def make_overview_plot(vi_norm: np.ndarray, preds: dict[str, float],
     return fig
-# ----------------------------------------------------------------------
-# Gradio handlers
-# ----------------------------------------------------------------------
 def list_examples() -> list[str]:
     if not EXAMPLES_DIR.exists():
         return []
@@ -195,37 +208,229 @@ def run_upload(file_obj, aggregate_W: float):
     return make_overview_plot(vi, preds, None), preds
 # ----------------------------------------------------------------------
 # UI
 # ----------------------------------------------------------------------
 def build_ui() -> gr.Blocks:
     examples = list_examples()
-    with gr.Blocks(title="NILMbench demo") as demo:
         gr.Markdown(
-            "# NILMbench demo\n"
-            "FaustineCNN trained on UK-DALE House 1, applied to a single "
-            "6-second 16 kHz V/I segment from House 2. Predicted power is "
-            "post-processed with the recall-constrained cutoffs from the paper.\n\n"
             "Source code: <https://github.com/Saharmgh/NILMbench> · "
-            "Model: <https://huggingface.co/Pybunny/nilmbench-faustine>"
         )
         with gr.Tabs():
-            with gr.TabItem("Built-in example"):
                 ex = gr.Dropdown(examples, label="Example frame",
                                   value=examples[0] if examples else None)
-                btn = gr.Button("Run", variant="primary")
                 plot_a = gr.Plot()
                 lab_a = gr.JSON(label="Predicted power per category (W)")
                 btn.click(run_example, ex, [plot_a, lab_a])
-            with gr.TabItem("Upload your own"):
                 up = gr.File(label="V/I segment (.npy, shape (2, 96000), "
                                     "FLAC-normalised float in [-1, 1])")
                 agg = gr.Slider(0, 8000, value=300, step=10,
                                  label="Aggregate active power (W)")
-                btn2 = gr.Button("Run", variant="primary")
                 plot_b = gr.Plot()
                 lab_b = gr.JSON(label="Predicted power per category (W)")
                 btn2.click(run_upload, [up, agg], [plot_b, lab_b])
     return demo

 """NILMbench HuggingFace Space.
+Three tabs:
+1. **Built-in example** – run the FaustineCNN baseline on a packaged
+   6-second 16 kHz V/I frame from UK-DALE House 2.
+2. **Upload V/I frame** – run FaustineCNN on a user-supplied single frame.
+3. **Benchmark your model** – upload a ``.py`` model definition + a ``.pt``
+   weights file and score it on the dense UK-DALE House 2 benchmark (full
+   60,000 frames; the Space defaults to a 500-frame quick check to stay
+   within the free-tier compute budget).
+Model weights, classes, and recall-constrained cutoffs for the baseline are
+pulled from the HF model repo ``Pybunny/nilmbench-faustine`` at startup.
 """
 from __future__ import annotations
+import importlib.util
 import json
+import sys
+import tempfile
+import traceback
 from pathlib import Path
 import numpy as np
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+from huggingface_hub import hf_hub_download, snapshot_download
+# nilmbench is installed from the companion GitHub repo (see requirements.txt).
+from nilmbench.runner import run_user_model
+from nilmbench.benchmark import evaluate_dense
+from nilmbench.io.report import render_markdown_report
 HERE = Path(__file__).resolve().parent
 EXAMPLES_DIR = HERE / "examples"
 MODEL_REPO = "Pybunny/nilmbench-faustine"
+DATASET_REPO = "Pybunny/nilmbench-ukdale"
 # UK-DALE House 2 calibration constants (from calibration_house_2.cfg).
 V_PER_ADC = 1.88296904357e-7
 # ----------------------------------------------------------------------
+# Baseline model (self-contained for the single-frame demo)
 # ----------------------------------------------------------------------
 class FaustineCNN(nn.Module):
     def __init__(self, n_categories: int):
 # ----------------------------------------------------------------------
+# Single-frame inference (tabs 1 and 2)
 # ----------------------------------------------------------------------
 def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
     if vi_norm.shape != (2, 96000):
 def predict(vi_norm: np.ndarray, aggregate_W: float) -> dict[str, float]:
     with torch.no_grad():
         scores = MODEL(_to_2d_image(vi_norm)).cpu().numpy().squeeze(0)
     shares = scores / (scores.sum() + 1e-9)
     raw = shares * float(aggregate_W)
     out = {}
     return fig
 def list_examples() -> list[str]:
     if not EXAMPLES_DIR.exists():
         return []
     return make_overview_plot(vi, preds, None), preds
+# ----------------------------------------------------------------------
+# Tab 3: full benchmark, with the user's uploaded model
+# ----------------------------------------------------------------------
+_BENCHMARK_DATA_DIR: Path | None = None
+def _ensure_benchmark_data() -> Path:
+    """Snapshot-download the dense House-2 split (cached after first call)."""
+    global _BENCHMARK_DATA_DIR
+    if _BENCHMARK_DATA_DIR is not None:
+        return _BENCHMARK_DATA_DIR
+    local = snapshot_download(
+        repo_id=DATASET_REPO,
+        repo_type="dataset",
+        allow_patterns=["benchmark/*", "summary.json", "README.md"],
+    )
+    _BENCHMARK_DATA_DIR = Path(local)
+    return _BENCHMARK_DATA_DIR
+def _import_user_module(file_path: Path, class_name: str):
+    """Dynamically import a user-uploaded ``.py`` and return the class."""
+    spec = importlib.util.spec_from_file_location("user_model_module", file_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Could not load module from {file_path}")
+    mod = importlib.util.module_from_spec(spec)
+    sys.modules["user_model_module"] = mod
+    spec.loader.exec_module(mod)
+    if not hasattr(mod, class_name):
+        raise AttributeError(
+            f"Uploaded module has no attribute '{class_name}'. "
+            f"Available: {[n for n in dir(mod) if not n.startswith('_')]}"
+        )
+    return getattr(mod, class_name)
+def _subset_dataset(data_root: Path, max_frames: int) -> Path:
+    """Make a temporary benchmark/ directory with the first N frames only.
+    Lets us cap compute time on the free Space tier.
+    """
+    src = data_root / "benchmark"
+    n_total = int(np.load(src / "x_vi_6s.npy", mmap_mode="r").shape[0])
+    if max_frames >= n_total:
+        return data_root  # use full set
+    tmp_root = Path(tempfile.mkdtemp(prefix="nilmbench_subset_"))
+    sub = tmp_root / "benchmark"
+    sub.mkdir(parents=True)
+    x = np.load(src / "x_vi_6s.npy", mmap_mode="r")
+    np.save(sub / "x_vi_6s.npy", np.asarray(x[:max_frames]))
+    lab = np.load(src / "labels_and_index.npz", allow_pickle=True)
+    sliced = {}
+    for k in lab.files:
+        v = lab[k]
+        if v.ndim >= 1 and v.shape[0] == n_total:
+            sliced[k] = v[:max_frames]
+        else:
+            sliced[k] = v
+    np.savez_compressed(sub / "labels_and_index.npz", **sliced)
+    return tmp_root
+def run_benchmark_upload(model_file, weights_file, class_name: str,
+                          output_kind: str, max_frames: int, batch_size: int):
+    """Run the user's model on the dense House-2 set and render a report."""
+    if model_file is None:
+        return "**Please upload a Python file defining your model.**", None
+    class_name = (class_name or "Model").strip() or "Model"
+    try:
+        ModelCls = _import_user_module(Path(model_file.name), class_name)
+    except Exception as exc:
+        return (f"**Failed to import model class `{class_name}`:**\n\n"
+                f"```\n{traceback.format_exc()}\n```"), None
+    try:
+        data_root = _ensure_benchmark_data()
+    except Exception:
+        return (f"**Could not download benchmark data:**\n\n"
+                f"```\n{traceback.format_exc()}\n```"), None
+    try:
+        active_root = _subset_dataset(data_root, int(max_frames))
+    except Exception:
+        return (f"**Could not prepare data subset:**\n\n"
+                f"```\n{traceback.format_exc()}\n```"), None
+    tmpdir = Path(tempfile.mkdtemp(prefix="nilmbench_report_"))
+    preds_path = tmpdir / "predictions.npz"
+    try:
+        # We already have the class; rebind via a temporary module name so
+        # nilmbench.runner's importer can find it.
+        sys.modules["__nilmbench_user__"] = sys.modules["user_model_module"]
+        run = run_user_model(
+            module_spec=f"__nilmbench_user__:{class_name}",
+            weights_path=weights_file.name if weights_file is not None else None,
+            data_root=active_root,
+            out_path=preds_path,
+            batch_size=int(batch_size),
+            device="cpu",
+            output_kind=output_kind,
+            strict_load=False,
+            model_name=class_name,
+        )
+    except Exception:
+        return (f"**Model failed during inference:**\n\n"
+                f"```\n{traceback.format_exc()}\n```"), None
+    preds = np.load(preds_path, allow_pickle=True)
+    result = evaluate_dense(
+        y_true_W=preds["y_true"].astype(np.float32),
+        y_pred_W=preds["y_pred"].astype(np.float32),
+        classes=[str(c) for c in preds["class_names"]],
+        model_name=class_name,
+    )
+    extra = {
+        "Model class": class_name,
+        "Weights file": Path(weights_file.name).name if weights_file else "(none)",
+        "Frames scored": f"{run.n_frames} / 60,000",
+        "Output kind": output_kind,
+    }
+    md = render_markdown_report(
+        result,
+        title=f"NILMbench report — {class_name}",
+        extra=extra,
+    )
+    score_json_path = tmpdir / "score.json"
+    score_json_path.write_text(json.dumps(result.to_dict(), indent=2, sort_keys=True))
+    return md, str(score_json_path)
 # ----------------------------------------------------------------------
 # UI
 # ----------------------------------------------------------------------
 def build_ui() -> gr.Blocks:
     examples = list_examples()
+    with gr.Blocks(title="NILMbench") as demo:
         gr.Markdown(
+            "# NILMbench\n"
+            "Open benchmark for high-frequency NILM regression on UK-DALE 2015 "
+            "(House 1 → House 2). Headline metric: modified Jaccard index "
+            "**MJ$_{20W}$** with hybrid tolerance.\n\n"
             "Source code: <https://github.com/Saharmgh/NILMbench> · "
+            "Baseline model: <https://huggingface.co/Pybunny/nilmbench-faustine> · "
+            "Dataset: <https://huggingface.co/datasets/Pybunny/nilmbench-ukdale>"
         )
         with gr.Tabs():
+            with gr.TabItem("Single frame · built-in example"):
                 ex = gr.Dropdown(examples, label="Example frame",
                                   value=examples[0] if examples else None)
+                btn = gr.Button("Run FaustineCNN", variant="primary")
                 plot_a = gr.Plot()
                 lab_a = gr.JSON(label="Predicted power per category (W)")
                 btn.click(run_example, ex, [plot_a, lab_a])
+            with gr.TabItem("Single frame · upload V/I"):
                 up = gr.File(label="V/I segment (.npy, shape (2, 96000), "
                                     "FLAC-normalised float in [-1, 1])")
                 agg = gr.Slider(0, 8000, value=300, step=10,
                                  label="Aggregate active power (W)")
+                btn2 = gr.Button("Run FaustineCNN", variant="primary")
                 plot_b = gr.Plot()
                 lab_b = gr.JSON(label="Predicted power per category (W)")
                 btn2.click(run_upload, [up, agg], [plot_b, lab_b])
+            with gr.TabItem("Benchmark your model"):
+                gr.Markdown(
+                    "Upload a `.py` file defining a `torch.nn.Module` "
+                    "subclass and its trained weights `.pt`. The Space "
+                    "downloads the dense House-2 benchmark split from "
+                    "[`Pybunny/nilmbench-ukdale`](https://huggingface.co/datasets/Pybunny/nilmbench-ukdale) "
+                    "on first run (cached afterwards), runs your model on the "
+                    "selected number of frames, and produces a full score "
+                    "sheet.\n\n"
+                    "**Model contract** (see "
+                    "[`examples/byom_template.py`](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_template.py)):\n"
+                    "* `forward(x)` takes `x` shape `(B, 2, 96000)` (V then I).\n"
+                    "* Returns non-negative `(B, K)` per-category power in "
+                    "watts. If your model emits shares in [0, 1] instead, "
+                    "select **shares** below and the runner will scale by the "
+                    "per-frame aggregate.\n"
+                    "* If the constructor accepts a keyword named "
+                    "`n_categories` (or `num_classes` / `K`), it is filled in "
+                    "automatically.\n"
+                )
+                with gr.Row():
+                    with gr.Column():
+                        model_py = gr.File(label="Model definition (.py)",
+                                            file_types=[".py"])
+                        weights = gr.File(label="Weights (.pt, optional for "
+                                                 "sanity checks)",
+                                          file_types=[".pt", ".pth"])
+                        class_name = gr.Textbox(label="Class name",
+                                                value="Model")
+                        output_kind = gr.Radio(
+                            ["watts", "shares"],
+                            value="watts",
+                            label="Output kind",
+                            info="watts = per-category power directly; "
+                                 "shares = renormalise + scale by aggregate")
+                        max_frames = gr.Slider(50, 60000, value=500, step=50,
+                                                label="Frames to score",
+                                                info="Free-tier CPU. Default 500 "
+                                                     "≈ 1–2 min. Full 60 000 "
+                                                     "frames may take longer.")
+                        batch_size = gr.Slider(1, 64, value=16, step=1,
+                                                label="Batch size")
+                        run_btn = gr.Button("Run benchmark", variant="primary")
+                    with gr.Column():
+                        report_md = gr.Markdown(label="Report")
+                        score_file = gr.File(label="Download score.json")
+                run_btn.click(
+                    run_benchmark_upload,
+                    [model_py, weights, class_name, output_kind, max_frames, batch_size],
+                    [report_md, score_file],
+                )
     return demo