Pybunny commited on
Commit
0fc2400
·
verified ·
1 Parent(s): 79d4858

Re-add Benchmark-your-model tab

Browse files
Files changed (1) hide show
  1. app.py +185 -4
app.py CHANGED
@@ -1,9 +1,16 @@
1
  """NILMbench HuggingFace Space.
2
 
3
- Single-frame demo of the FaustineCNN baseline. Model weights, classes, and
4
- recall-constrained cutoffs are pulled from the HF model repo
5
- ``Pybunny/nilmbench-faustine`` at startup. Example frames are bundled with
6
- the Space so the demo works offline of the laptop.
 
 
 
 
 
 
 
7
  """
8
 
9
  # ----------------------------------------------------------------------
@@ -223,6 +230,160 @@ def run_upload(file_obj, aggregate_W: float):
223
  return make_overview_plot(vi, preds, None), preds
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  # ----------------------------------------------------------------------
227
  # UI
228
  # ----------------------------------------------------------------------
@@ -254,6 +415,26 @@ def build_ui() -> gr.Blocks:
254
  plot_b = gr.Plot()
255
  lab_b = gr.JSON(label="Predicted power per category (W)")
256
  btn2.click(run_upload, [up, agg], [plot_b, lab_b])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  return demo
258
 
259
 
 
1
  """NILMbench HuggingFace Space.
2
 
3
+ Three tabs:
4
+ 1. Built-in single-frame example (FaustineCNN baseline, V/I bundled).
5
+ 2. Single-frame upload (user supplies a V/I segment).
6
+ 3. Benchmark your model: user uploads a .pt for the bundled
7
+ ``DemoRegressor`` architecture (see examples/byom_demo.py in the GitHub
8
+ repo); the Space scores it on a subset of the dense House-2 set and
9
+ renders the same Markdown report the CLI produces.
10
+
11
+ Asset sources: model weights for the baseline come from
12
+ ``Pybunny/nilmbench-faustine``; the dense benchmark split for tab 3 is
13
+ fetched once from ``Pybunny/nilmbench-ukdale`` and cached.
14
  """
15
 
16
  # ----------------------------------------------------------------------
 
230
  return make_overview_plot(vi, preds, None), preds
231
 
232
 
233
+ # ----------------------------------------------------------------------
234
+ # Tab 3: full benchmark with a user-uploaded .pt for DemoRegressor
235
+ # ----------------------------------------------------------------------
236
+ # Self-contained copy of examples.byom_demo.DemoRegressor so the Space
237
+ # does not have to import the nilmbench package at module load time
238
+ # (lighter dep tree, faster cold start).
239
+ class DemoRegressor(nn.Module):
240
+ """6 V/I stats -> linear -> softplus. Output: per-category power (W)."""
241
+ N_FEATURES = 6
242
+
243
+ def __init__(self, n_categories: int = 7):
244
+ super().__init__()
245
+ self.n_categories = n_categories
246
+ self.head = nn.Linear(self.N_FEATURES, n_categories)
247
+
248
+ @staticmethod
249
+ def _feats(x):
250
+ rms = (x * x).mean(dim=-1).clamp_min(0).sqrt()
251
+ absmean = x.abs().mean(dim=-1)
252
+ std = x.std(dim=-1)
253
+ return torch.cat([rms, absmean, std], dim=-1)
254
+
255
+ def forward(self, x):
256
+ return F.softplus(self.head(self._feats(x)))
257
+
258
+
259
+ _BENCH_DATA_DIR = None
260
+
261
+
262
+ def _bench_data_root():
263
+ """Cache-aware snapshot_download of the benchmark/ split."""
264
+ global _BENCH_DATA_DIR
265
+ if _BENCH_DATA_DIR is not None:
266
+ return _BENCH_DATA_DIR
267
+ from huggingface_hub import snapshot_download
268
+ local = snapshot_download(
269
+ repo_id="Pybunny/nilmbench-ukdale", repo_type="dataset",
270
+ allow_patterns=["benchmark/*", "summary.json"],
271
+ )
272
+ _BENCH_DATA_DIR = Path(local)
273
+ return _BENCH_DATA_DIR
274
+
275
+
276
+ def _bench_subset(n_frames):
277
+ """Memory-mapped read of the first n_frames frames from benchmark/."""
278
+ import tempfile
279
+ root = _bench_data_root() / "benchmark"
280
+ total = int(np.load(root / "x_vi_6s.npy", mmap_mode="r").shape[0])
281
+ n = max(1, min(int(n_frames), total))
282
+ x = np.asarray(np.load(root / "x_vi_6s.npy", mmap_mode="r")[:n],
283
+ dtype=np.float32)
284
+ lab = np.load(root / "labels_and_index.npz", allow_pickle=True)
285
+ y = lab["y_power"][:n].astype(np.float32)
286
+ cls = [str(c) for c in lab["class_names"]]
287
+ return x, y, cls, total
288
+
289
+
290
+ def _score_demo_pt(weights_file, n_frames):
291
+ """Load the user's .pt into DemoRegressor and produce a Markdown report."""
292
+ import json as _json
293
+ if weights_file is None:
294
+ return ("**Please upload a .pt file trained on the "
295
+ "`DemoRegressor` architecture** (see "
296
+ "[examples/byom_demo.py](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_demo.py)). "
297
+ "A bundled checkpoint is at "
298
+ "[examples/byom_demo.pt](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_demo.pt).",
299
+ None)
300
+ try:
301
+ x, y_true, classes, total = _bench_subset(n_frames)
302
+ except Exception as exc:
303
+ return (f"**Benchmark data download failed.**\n\n```\n{exc}\n```", None)
304
+
305
+ K = len(classes)
306
+ model = DemoRegressor(n_categories=K)
307
+ try:
308
+ state = torch.load(weights_file.name, map_location="cpu",
309
+ weights_only=False)
310
+ if isinstance(state, dict) and "state_dict" in state:
311
+ state = state["state_dict"]
312
+ model.load_state_dict(state, strict=True)
313
+ except Exception as exc:
314
+ return (f"**Weights failed to load** (does the checkpoint match "
315
+ f"`DemoRegressor(n_categories={K})`?).\n\n"
316
+ f"```\n{exc}\n```", None)
317
+ model.eval()
318
+
319
+ with torch.inference_mode():
320
+ x_t = torch.as_tensor(x)
321
+ y_pred = model(x_t).cpu().numpy().astype(np.float32)
322
+
323
+ # Use the nilmbench scorer, but installing it as a dep is heavy. Compute
324
+ # the headline numbers inline. theta_k defaults from the paper.
325
+ THETA = np.array([3, 50, 10, 5, 5, 10, 10], dtype=np.float32)
326
+ if K != 7:
327
+ THETA = np.full(K, 10.0, dtype=np.float32)
328
+
329
+ A = y_true > THETA
330
+ B = y_pred > THETA
331
+ err_ok = np.abs(y_pred - y_true) <= 20.0
332
+ union = (A | B).sum(axis=1)
333
+ keep = union > 0
334
+ inter = (A & B).sum(axis=1).astype(np.float32)
335
+ correct = (A & B & err_ok).sum(axis=1).astype(np.float32)
336
+ mj = float((correct[keep] / np.maximum(union[keep], 1)).mean()) if keep.any() else 0.0
337
+ jacc = float((inter[keep] / np.maximum(union[keep], 1)).mean()) if keep.any() else 0.0
338
+
339
+ tp = (A & B).sum(axis=1).astype(np.float32)
340
+ fp = (~A & B).sum(axis=1).astype(np.float32)
341
+ fn = (A & ~B).sum(axis=1).astype(np.float32)
342
+ f1d = tp + 0.5 * (fp + fn)
343
+ f1 = float(np.where(f1d > 0, tp / np.maximum(f1d, 1), np.nan))
344
+ f1 = float(np.nanmean(np.where(f1d > 0, tp / np.maximum(f1d, 1), np.nan)))
345
+ P = y_true.sum(axis=1)
346
+ teca = float(np.nanmean(np.where(P > 0,
347
+ 1.0 - np.abs(y_true - y_pred).sum(axis=1) / np.maximum(2 * P, 1e-9),
348
+ np.nan)))
349
+ mae = float(np.mean(np.abs(y_true - y_pred)))
350
+
351
+ per_class = []
352
+ for k, c in enumerate(classes):
353
+ Ak = A[:, k]; Bk = B[:, k]
354
+ eok = np.abs(y_pred[:, k] - y_true[:, k]) <= 20.0
355
+ unionk = (Ak | Bk).sum()
356
+ cork = (Ak & Bk & eok).sum()
357
+ per_class.append((c, float(cork / unionk) if unionk > 0 else 0.0))
358
+
359
+ md = []
360
+ md.append(f"# NILMbench — uploaded .pt\n")
361
+ md.append(f"_Scored on {len(x)} of {total} dense House-2 frames._\n")
362
+ md.append("## Headline score sheet\n")
363
+ md.append("| Metric | Value |")
364
+ md.append("|---|---|")
365
+ md.append(f"| MJ_20W (headline) | {mj:.4f} |")
366
+ md.append(f"| F1 | {f1:.4f} |")
367
+ md.append(f"| Jaccard | {jacc:.4f} |")
368
+ md.append(f"| TECA | {teca:.4f} |")
369
+ md.append(f"| MAE (W) | {mae:.2f} |\n")
370
+ md.append("## Per-category MJ_20W\n")
371
+ md.append("| Category | MJ_20W |")
372
+ md.append("|---|---|")
373
+ for c, v in per_class:
374
+ md.append(f"| {c} | {v:.4f} |")
375
+ md.append("")
376
+
377
+ import tempfile as _t
378
+ out = Path(_t.mkdtemp(prefix="nbench_report_")) / "score.json"
379
+ out.write_text(_json.dumps({
380
+ "MJ_20W": mj, "F1": f1, "Jaccard": jacc, "TECA": teca, "MAE_W": mae,
381
+ "n_frames": int(len(x)), "n_total": int(total),
382
+ "per_class_MJ_20W": dict(per_class),
383
+ }, indent=2, sort_keys=True))
384
+ return "\n".join(md), str(out)
385
+
386
+
387
  # ----------------------------------------------------------------------
388
  # UI
389
  # ----------------------------------------------------------------------
 
415
  plot_b = gr.Plot()
416
  lab_b = gr.JSON(label="Predicted power per category (W)")
417
  btn2.click(run_upload, [up, agg], [plot_b, lab_b])
418
+ with gr.TabItem("Benchmark your model"):
419
+ gr.Markdown(
420
+ "Upload a `.pt` checkpoint trained on the bundled "
421
+ "[`DemoRegressor`](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_demo.py) "
422
+ "architecture (V/I summary stats → linear head, 7 outputs). "
423
+ "A sample checkpoint is in the repo at "
424
+ "[`examples/byom_demo.pt`](https://github.com/Saharmgh/NILMbench/blob/main/examples/byom_demo.pt). "
425
+ "The Space downloads the dense House-2 benchmark from "
426
+ "`Pybunny/nilmbench-ukdale` on first run (cached) and "
427
+ "scores your model on the selected number of frames. "
428
+ "For full 60 000-frame scoring or your own model "
429
+ "architecture, use the `nilmbench` CLI from the GitHub repo."
430
+ )
431
+ pt = gr.File(label="Trained .pt for DemoRegressor")
432
+ nf = gr.Slider(50, 5000, value=500, step=50,
433
+ label="Frames to score (free CPU; 500 ≈ 1 min)")
434
+ bb = gr.Button("Run benchmark", variant="primary")
435
+ rep = gr.Markdown()
436
+ jf = gr.File(label="score.json")
437
+ bb.click(_score_demo_pt, [pt, nf], [rep, jf])
438
  return demo
439
 
440