Pybunny commited on
Commit
e6c3b50
·
verified ·
1 Parent(s): 1e17653

Add Benchmark-your-model tab

Browse files
Files changed (1) hide show
  1. app.py +227 -22
app.py CHANGED
@@ -1,14 +1,26 @@
1
  """NILMbench HuggingFace Space.
2
 
3
- Single-frame demo of the FaustineCNN baseline. Model weights, classes, and
4
- recall-constrained cutoffs are pulled from the HF model repo
5
- ``Pybunny/nilmbench-faustine`` at startup. Example frames are bundled with
6
- the Space so the demo works offline of the laptop.
 
 
 
 
 
 
 
 
7
  """
8
 
9
  from __future__ import annotations
10
 
 
11
  import json
 
 
 
12
  from pathlib import Path
13
 
14
  import numpy as np
@@ -19,11 +31,17 @@ import gradio as gr
19
  import matplotlib
20
  matplotlib.use("Agg")
21
  import matplotlib.pyplot as plt
22
- from huggingface_hub import hf_hub_download
 
 
 
 
 
23
 
24
  HERE = Path(__file__).resolve().parent
25
  EXAMPLES_DIR = HERE / "examples"
26
  MODEL_REPO = "Pybunny/nilmbench-faustine"
 
27
 
28
  # UK-DALE House 2 calibration constants (from calibration_house_2.cfg).
29
  V_PER_ADC = 1.88296904357e-7
@@ -34,7 +52,7 @@ I_FACTOR = ADC_FULL_SCALE * I_PER_ADC # ~102.5
34
 
35
 
36
  # ----------------------------------------------------------------------
37
- # Model (self-contained so the Space has no dependency on the nilmbench pkg)
38
  # ----------------------------------------------------------------------
39
  class FaustineCNN(nn.Module):
40
  def __init__(self, n_categories: int):
@@ -89,7 +107,7 @@ MODEL, CLASSES, CUTOFFS = load_assets()
89
 
90
 
91
  # ----------------------------------------------------------------------
92
- # Inference + plotting
93
  # ----------------------------------------------------------------------
94
  def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
95
  if vi_norm.shape != (2, 96000):
@@ -101,8 +119,6 @@ def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
101
  def predict(vi_norm: np.ndarray, aggregate_W: float) -> dict[str, float]:
102
  with torch.no_grad():
103
  scores = MODEL(_to_2d_image(vi_norm)).cpu().numpy().squeeze(0)
104
- # FaustineCNN outputs per-category Bernoulli activations; renormalise
105
- # across categories to obtain shares, then scale by the aggregate.
106
  shares = scores / (scores.sum() + 1e-9)
107
  raw = shares * float(aggregate_W)
108
  out = {}
@@ -155,9 +171,6 @@ def make_overview_plot(vi_norm: np.ndarray, preds: dict[str, float],
155
  return fig
156
 
157
 
158
- # ----------------------------------------------------------------------
159
- # Gradio handlers
160
- # ----------------------------------------------------------------------
161
  def list_examples() -> list[str]:
162
  if not EXAMPLES_DIR.exists():
163
  return []
@@ -195,37 +208,229 @@ def run_upload(file_obj, aggregate_W: float):
195
  return make_overview_plot(vi, preds, None), preds
196
 
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  # ----------------------------------------------------------------------
199
  # UI
200
  # ----------------------------------------------------------------------
201
  def build_ui() -> gr.Blocks:
202
  examples = list_examples()
203
- with gr.Blocks(title="NILMbench demo") as demo:
204
  gr.Markdown(
205
- "# NILMbench demo\n"
206
- "FaustineCNN trained on UK-DALE House 1, applied to a single "
207
- "6-second 16 kHz V/I segment from House 2. Predicted power is "
208
- "post-processed with the recall-constrained cutoffs from the paper.\n\n"
209
  "Source code: <https://github.com/Saharmgh/NILMbench> · "
210
- "Model: <https://huggingface.co/Pybunny/nilmbench-faustine>"
 
211
  )
212
  with gr.Tabs():
213
- with gr.TabItem("Built-in example"):
214
  ex = gr.Dropdown(examples, label="Example frame",
215
  value=examples[0] if examples else None)
216
- btn = gr.Button("Run", variant="primary")
217
  plot_a = gr.Plot()
218
  lab_a = gr.JSON(label="Predicted power per category (W)")
219
  btn.click(run_example, ex, [plot_a, lab_a])
220
- with gr.TabItem("Upload your own"):
 
221
  up = gr.File(label="V/I segment (.npy, shape (2, 96000), "
222
  "FLAC-normalised float in [-1, 1])")
223
  agg = gr.Slider(0, 8000, value=300, step=10,
224
  label="Aggregate active power (W)")
225
- btn2 = gr.Button("Run", variant="primary")
226
  plot_b = gr.Plot()
227
  lab_b = gr.JSON(label="Predicted power per category (W)")
228
  btn2.click(run_upload, [up, agg], [plot_b, lab_b])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  return demo
230
 
231
 
 
1
  """NILMbench HuggingFace Space.
2
 
3
+ Three tabs:
4
+
5
+ 1. **Built-in example** run the FaustineCNN baseline on a packaged
6
+ 6-second 16 kHz V/I frame from UK-DALE House 2.
7
+ 2. **Upload V/I frame** – run FaustineCNN on a user-supplied single frame.
8
+ 3. **Benchmark your model** – upload a ``.py`` model definition + a ``.pt``
9
+ weights file and score it on the dense UK-DALE House 2 benchmark (full
10
+ 60,000 frames; the Space defaults to a 500-frame quick check to stay
11
+ within the free-tier compute budget).
12
+
13
+ Model weights, classes, and recall-constrained cutoffs for the baseline are
14
+ pulled from the HF model repo ``Pybunny/nilmbench-faustine`` at startup.
15
  """
16
 
17
  from __future__ import annotations
18
 
19
+ import importlib.util
20
  import json
21
+ import sys
22
+ import tempfile
23
+ import traceback
24
  from pathlib import Path
25
 
26
  import numpy as np
 
31
  import matplotlib
32
  matplotlib.use("Agg")
33
  import matplotlib.pyplot as plt
34
+ from huggingface_hub import hf_hub_download, snapshot_download
35
+
36
+ # nilmbench is installed from the companion GitHub repo (see requirements.txt).
37
+ from nilmbench.runner import run_user_model
38
+ from nilmbench.benchmark import evaluate_dense
39
+ from nilmbench.io.report import render_markdown_report
40
 
41
  HERE = Path(__file__).resolve().parent
42
  EXAMPLES_DIR = HERE / "examples"
43
  MODEL_REPO = "Pybunny/nilmbench-faustine"
44
+ DATASET_REPO = "Pybunny/nilmbench-ukdale"
45
 
46
  # UK-DALE House 2 calibration constants (from calibration_house_2.cfg).
47
  V_PER_ADC = 1.88296904357e-7
 
52
 
53
 
54
  # ----------------------------------------------------------------------
55
+ # Baseline model (self-contained for the single-frame demo)
56
  # ----------------------------------------------------------------------
57
  class FaustineCNN(nn.Module):
58
  def __init__(self, n_categories: int):
 
107
 
108
 
109
  # ----------------------------------------------------------------------
110
+ # Single-frame inference (tabs 1 and 2)
111
  # ----------------------------------------------------------------------
112
  def _to_2d_image(vi_norm: np.ndarray) -> torch.Tensor:
113
  if vi_norm.shape != (2, 96000):
 
119
  def predict(vi_norm: np.ndarray, aggregate_W: float) -> dict[str, float]:
120
  with torch.no_grad():
121
  scores = MODEL(_to_2d_image(vi_norm)).cpu().numpy().squeeze(0)
 
 
122
  shares = scores / (scores.sum() + 1e-9)
123
  raw = shares * float(aggregate_W)
124
  out = {}
 
171
  return fig
172
 
173
 
 
 
 
174
  def list_examples() -> list[str]:
175
  if not EXAMPLES_DIR.exists():
176
  return []
 
208
  return make_overview_plot(vi, preds, None), preds
209
 
210
 
211
+ # ----------------------------------------------------------------------
212
+ # Tab 3: full benchmark, with the user's uploaded model
213
+ # ----------------------------------------------------------------------
214
+ _BENCHMARK_DATA_DIR: Path | None = None
215
+
216
+
217
+ def _ensure_benchmark_data() -> Path:
218
+ """Snapshot-download the dense House-2 split (cached after first call)."""
219
+ global _BENCHMARK_DATA_DIR
220
+ if _BENCHMARK_DATA_DIR is not None:
221
+ return _BENCHMARK_DATA_DIR
222
+ local = snapshot_download(
223
+ repo_id=DATASET_REPO,
224
+ repo_type="dataset",
225
+ allow_patterns=["benchmark/*", "summary.json", "README.md"],
226
+ )
227
+ _BENCHMARK_DATA_DIR = Path(local)
228
+ return _BENCHMARK_DATA_DIR
229
+
230
+
231
+ def _import_user_module(file_path: Path, class_name: str):
232
+ """Dynamically import a user-uploaded ``.py`` and return the class."""
233
+ spec = importlib.util.spec_from_file_location("user_model_module", file_path)
234
+ if spec is None or spec.loader is None:
235
+ raise ImportError(f"Could not load module from {file_path}")
236
+ mod = importlib.util.module_from_spec(spec)
237
+ sys.modules["user_model_module"] = mod
238
+ spec.loader.exec_module(mod)
239
+ if not hasattr(mod, class_name):
240
+ raise AttributeError(
241
+ f"Uploaded module has no attribute '{class_name}'. "
242
+ f"Available: {[n for n in dir(mod) if not n.startswith('_')]}"
243
+ )
244
+ return getattr(mod, class_name)
245
+
246
+
247
+ def _subset_dataset(data_root: Path, max_frames: int) -> Path:
248
+ """Make a temporary benchmark/ directory with the first N frames only.
249
+
250
+ Lets us cap compute time on the free Space tier.
251
+ """
252
+ src = data_root / "benchmark"
253
+ n_total = int(np.load(src / "x_vi_6s.npy", mmap_mode="r").shape[0])
254
+ if max_frames >= n_total:
255
+ return data_root # use full set
256
+
257
+ tmp_root = Path(tempfile.mkdtemp(prefix="nilmbench_subset_"))
258
+ sub = tmp_root / "benchmark"
259
+ sub.mkdir(parents=True)
260
+
261
+ x = np.load(src / "x_vi_6s.npy", mmap_mode="r")
262
+ np.save(sub / "x_vi_6s.npy", np.asarray(x[:max_frames]))
263
+
264
+ lab = np.load(src / "labels_and_index.npz", allow_pickle=True)
265
+ sliced = {}
266
+ for k in lab.files:
267
+ v = lab[k]
268
+ if v.ndim >= 1 and v.shape[0] == n_total:
269
+ sliced[k] = v[:max_frames]
270
+ else:
271
+ sliced[k] = v
272
+ np.savez_compressed(sub / "labels_and_index.npz", **sliced)
273
+ return tmp_root
274
+
275
+
276
+ def run_benchmark_upload(model_file, weights_file, class_name: str,
277
+ output_kind: str, max_frames: int, batch_size: int):
278
+ """Run the user's model on the dense House-2 set and render a report."""
279
+ if model_file is None:
280
+ return "**Please upload a Python file defining your model.**", None
281
+ class_name = (class_name or "Model").strip() or "Model"
282
+
283
+ try:
284
+ ModelCls = _import_user_module(Path(model_file.name), class_name)
285
+ except Exception as exc:
286
+ return (f"**Failed to import model class `{class_name}`:**\n\n"
287
+ f"```\n{traceback.format_exc()}\n```"), None
288
+
289
+ try:
290
+ data_root = _ensure_benchmark_data()
291
+ except Exception:
292
+ return (f"**Could not download benchmark data:**\n\n"
293
+ f"```\n{traceback.format_exc()}\n```"), None
294
+
295
+ try:
296
+ active_root = _subset_dataset(data_root, int(max_frames))
297
+ except Exception:
298
+ return (f"**Could not prepare data subset:**\n\n"
299
+ f"```\n{traceback.format_exc()}\n```"), None
300
+
301
+ tmpdir = Path(tempfile.mkdtemp(prefix="nilmbench_report_"))
302
+ preds_path = tmpdir / "predictions.npz"
303
+
304
+ try:
305
+ # We already have the class; rebind via a temporary module name so
306
+ # nilmbench.runner's importer can find it.
307
+ sys.modules["__nilmbench_user__"] = sys.modules["user_model_module"]
308
+ run = run_user_model(
309
+ module_spec=f"__nilmbench_user__:{class_name}",
310
+ weights_path=weights_file.name if weights_file is not None else None,
311
+ data_root=active_root,
312
+ out_path=preds_path,
313
+ batch_size=int(batch_size),
314
+ device="cpu",
315
+ output_kind=output_kind,
316
+ strict_load=False,
317
+ model_name=class_name,
318
+ )
319
+ except Exception:
320
+ return (f"**Model failed during inference:**\n\n"
321
+ f"```\n{traceback.format_exc()}\n```"), None
322
+
323
+ preds = np.load(preds_path, allow_pickle=True)
324
+ result = evaluate_dense(
325
+ y_true_W=preds["y_true"].astype(np.float32),
326
+ y_pred_W=preds["y_pred"].astype(np.float32),
327
+ classes=[str(c) for c in preds["class_names"]],
328
+ model_name=class_name,
329
+ )
330
+
331
+ extra = {
332
+ "Model class": class_name,
333
+ "Weights file": Path(weights_file.name).name if weights_file else "(none)",
334
+ "Frames scored": f"{run.n_frames} / 60,000",
335
+ "Output kind": output_kind,
336
+ }
337
+ md = render_markdown_report(
338
+ result,
339
+ title=f"NILMbench report — {class_name}",
340
+ extra=extra,
341
+ )
342
+
343
+ score_json_path = tmpdir / "score.json"
344
+ score_json_path.write_text(json.dumps(result.to_dict(), indent=2, sort_keys=True))
345
+
346
+ return md, str(score_json_path)
347
+
348
+
349
  # ----------------------------------------------------------------------
350
  # UI
351
  # ----------------------------------------------------------------------
352
  def build_ui() -> gr.Blocks:
353
  examples = list_examples()
354
+ with gr.Blocks(title="NILMbench") as demo:
355
  gr.Markdown(
356
+ "# NILMbench\n"
357
+ "Open benchmark for high-frequency NILM regression on UK-DALE 2015 "
358
+ "(House 1 House 2). Headline metric: modified Jaccard index "
359
+ "**MJ$_{20W}$** with hybrid tolerance.\n\n"
360
  "Source code: <https://github.com/Saharmgh/NILMbench> · "
361
+ "Baseline model: <https://huggingface.co/Pybunny/nilmbench-faustine> · "
362
+ "Dataset: <https://huggingface.co/datasets/Pybunny/nilmbench-ukdale>"
363
  )
364
  with gr.Tabs():
365
+ with gr.TabItem("Single frame · built-in example"):
366
  ex = gr.Dropdown(examples, label="Example frame",
367
  value=examples[0] if examples else None)
368
+ btn = gr.Button("Run FaustineCNN", variant="primary")
369
  plot_a = gr.Plot()
370
  lab_a = gr.JSON(label="Predicted power per category (W)")
371
  btn.click(run_example, ex, [plot_a, lab_a])
372
+
373
+ with gr.TabItem("Single frame · upload V/I"):
374
  up = gr.File(label="V/I segment (.npy, shape (2, 96000), "
375
  "FLAC-normalised float in [-1, 1])")
376
  agg = gr.Slider(0, 8000, value=300, step=10,
377
  label="Aggregate active power (W)")
378
+ btn2 = gr.Button("Run FaustineCNN", variant="primary")
379
  plot_b = gr.Plot()
380
  lab_b = gr.JSON(label="Predicted power per category (W)")
381
  btn2.click(run_upload, [up, agg], [plot_b, lab_b])
382
+
383
+ with gr.TabItem("Benchmark your model"):
384
+ gr.Markdown(
385
+ "Upload a `.py` file defining a `torch.nn.Module` "
386
+ "subclass and its trained weights `.pt`. The Space "
387
+ "downloads the dense House-2 benchmark split from "
388
+ "[`Pybunny/nilmbench-ukdale`](https://huggingface.co/datasets/Pybunny/nilmbench-ukdale) "
389
+ "on first run (cached afterwards), runs your model on the "
390
+ "selected number of frames, and produces a full score "
391
+ "sheet.\n\n"
392
+ "**Model contract** (see "
393
+ "[`examples/byom_template.py`](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_template.py)):\n"
394
+ "* `forward(x)` takes `x` shape `(B, 2, 96000)` (V then I).\n"
395
+ "* Returns non-negative `(B, K)` per-category power in "
396
+ "watts. If your model emits shares in [0, 1] instead, "
397
+ "select **shares** below and the runner will scale by the "
398
+ "per-frame aggregate.\n"
399
+ "* If the constructor accepts a keyword named "
400
+ "`n_categories` (or `num_classes` / `K`), it is filled in "
401
+ "automatically.\n"
402
+ )
403
+ with gr.Row():
404
+ with gr.Column():
405
+ model_py = gr.File(label="Model definition (.py)",
406
+ file_types=[".py"])
407
+ weights = gr.File(label="Weights (.pt, optional for "
408
+ "sanity checks)",
409
+ file_types=[".pt", ".pth"])
410
+ class_name = gr.Textbox(label="Class name",
411
+ value="Model")
412
+ output_kind = gr.Radio(
413
+ ["watts", "shares"],
414
+ value="watts",
415
+ label="Output kind",
416
+ info="watts = per-category power directly; "
417
+ "shares = renormalise + scale by aggregate")
418
+ max_frames = gr.Slider(50, 60000, value=500, step=50,
419
+ label="Frames to score",
420
+ info="Free-tier CPU. Default 500 "
421
+ "≈ 1–2 min. Full 60 000 "
422
+ "frames may take longer.")
423
+ batch_size = gr.Slider(1, 64, value=16, step=1,
424
+ label="Batch size")
425
+ run_btn = gr.Button("Run benchmark", variant="primary")
426
+ with gr.Column():
427
+ report_md = gr.Markdown(label="Report")
428
+ score_file = gr.File(label="Download score.json")
429
+ run_btn.click(
430
+ run_benchmark_upload,
431
+ [model_py, weights, class_name, output_kind, max_frames, batch_size],
432
+ [report_md, score_file],
433
+ )
434
  return demo
435
 
436