multiclass-folded / inference_example.py

Multi-class softmax folded detector — initial release (V8-V14 + V16, 2026-05-13)

4a9a4d9 verified 2 days ago

7.3 kB

	#!/usr/bin/env python3
	# SPDX-License-Identifier: Apache-2.0
	"""Multi-class folded detector — end-to-end inference example.

	Three-artefact collaboration. This script:

	1. Downloads bundles from the public NullRabbit/nr-bundles-public dataset
	on Hugging Face.
	2. Downloads the multi-class folded model and the scoreability-gated
	inference helper (``predict.py``) from this repository.
	3. Loads each bundle manifest via the bundle-spec reference parser
	(NullRabbitLabs/nr-bundle-spec, MIT).
	4. Calls ``predict.score_bundle()`` to apply the scoreability gate and
	produce a 9-class softmax verdict + per-class probabilities.

	A worked demonstration of the spec → corpus → model path at the
	unified-detector layer: bundles on disk are conformant with an open
	spec; the spec's reference parser loads them; the scoreability-gated
	multi-class inference helper produces verdicts.

	Dependencies::

	pip install huggingface_hub pyarrow scikit-learn joblib numpy
	pip install git+https://github.com/NullRabbitLabs/nr-bundle-spec.git

	Usage::

	python inference_example.py

	Five bundles are scored across the V8 / V11 / V13 / V14 / benign class
	manifolds. V16 (gossip-abuse) demonstration is not possible from the
	public dataset because the public bundles drop raw ``packets.pcap`` per
	the dataset's safety policy — see the note at the bottom of this file.
	"""

	from __future__ import annotations

	import importlib.util
	import sys
	from pathlib import Path

	from huggingface_hub import hf_hub_download, snapshot_download


	# ─── Constants ──────────────────────────────────────────────────────

	MODEL_REPO = "NullRabbit/multiclass-folded"
	DATASET_REPO = "NullRabbit/nr-bundles-public"

	# Sample bundles drawn from nr-bundles-public. Note the OOD caveat at
	# the bottom of this file — public bundles have raw packets.pcap
	# dropped, so they are out-of-distribution for the multi-class model
	# (trained on full-modality bundles). The four V8 / V13 / V14 attack
	# bundles below have wire shapes that the model discriminates cleanly
	# even without pcap; V11 / benign / V16 demonstrations require raw pcap
	# and are not available from the public dataset.
	SAMPLES = [
	("crp_19d438471fec4229", "sui_F10_multi_get_objects_amp", "V8 (response_amp, Sui) — survives pcap-drop"),
	("crp_2a9d40758d9a4192", "SOL_MC_grafana_anon", "V13 (service_misconfig, Solana) — survives pcap-drop"),
	("crp_1ef98f1fc0644369", "sui_F14_devinspect_tokio_wedge", "V14 (compute_amp, Sui) — survives pcap-drop"),
	("crp_0598afb4d5e44fb9", "sui_BENIGN_passive_fullnode", "benign passive (Sui) — tests scoreability gate"),
	]


	def _load_module(name: str, path: str) -> "object":
	spec = importlib.util.spec_from_file_location(name, path)
	module = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
	sys.modules[name] = module
	spec.loader.exec_module(module) # type: ignore[union-attr]
	return module


	def main() -> int:
	print("=== Multi-class softmax folded detector ===")
	print(f" model repo: {MODEL_REPO}")
	print(f" dataset repo: {DATASET_REPO}")
	print()

	# Pull model + predict helper.
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename="model.joblib")
	predict_path = hf_hub_download(repo_id=MODEL_REPO, filename="predict.py")

	predict = _load_module("multiclass_predict", predict_path)
	payload = predict.load_model(model_path)

	print(f"Model loaded: {type(payload['model']).__name__}, "
	f"{len(payload['feature_names'])} features, "
	f"{len(payload['class_order'])} classes "
	f"({payload['class_order']})")
	print()

	# Pull sample bundles.
	dataset_root = Path(snapshot_download(
	repo_id=DATASET_REPO, repo_type="dataset",
	allow_patterns=[f"{cid}/*" for cid, _, _ in SAMPLES],
	))

	# Score each.
	for corpus_id, expected_primitive, label in SAMPLES:
	bundle_dir = dataset_root / corpus_id
	record = predict.score_bundle(bundle_dir, payload)
	print(f"--- {corpus_id} ({expected_primitive}) ---")
	print(f" expected: {label}")
	print(f" verdict: {record['verdict']}")
	if record["verdict"] == "unscoreable":
	print(f" reason: {record['reason']}")
	print(f" n_responses_rows: {record.get('n_responses_rows', 0)}")
	else:
	print(f" argmax P: {record['argmax_p']:.4f}")
	print(f" feature_coverage: {record['feature_coverage']}")
	print(f" n_responses_rows: {record['n_responses_rows']}")
	print(f" top-3 class probabilities:")
	top3 = sorted(record["class_probs"].items(),
	key=lambda kv: -kv[1])[:3]
	for cls, p in top3:
	print(f" P({cls}) = {p:.4f}")
	if record.get("coverage_warning"):
	print(f" ⚠ coverage_warning: {record['coverage_warning']}")
	print()

	print("=" * 72)
	print("Notes on multi-class folded deployment")
	print("=" * 72)
	print("""
	- predict.score_bundle() is the recommended consumption surface. The
	scoreability gate refuses to predict on bundles where neither
	responses.parquet nor packets.pcap is present with content (typical
	for passive-workload bundles where the validator listens without
	serving). Callers who want raw model output without the gate should
	load model.joblib directly via joblib.load.

	- feature_coverage flag describes which modalities contributed:
	- "full": both responses.parquet and packets.pcap present
	- "resp_only": responses.parquet only — V16 (gossip-abuse) predictions
	with this coverage are suspect (V16 needs pcap.*)
	- "pcap_only": packets.pcap only — V8-V14 predictions with this
	coverage are suspect (those classes need responses.*)
	- "none": bundle is unscoreable; gate refused

	- Public dataset bundles drop raw packets.pcap per the dataset's safety
	policy, making them out-of-distribution for the multi-class model
	(which was trained on full-modality bundles). Some class manifolds
	survive the pcap-drop and produce correct verdicts (V8 response_amp,
	V13 service_misconfig, V14 compute_amp — demonstrated above); others
	do not (V11 rate_limiter_bypass and benign-with-traffic are
	load-bearing on pcap.* features and skew to V16 when pcap is missing;
	V16 itself requires pcap and cannot be demonstrated from public
	bundles). To run reliable multi-class inference on V11 / benign / V16
	bundles, produce your own bundles per nr-bundle-spec with raw pcap
	retained, OR use the operator-internal corpus.

	- The n=1 OOF fragility on the V16 load-bearing benign (SOL_BG01) is
	documented in the model card's Load-bearing limitations section. The
	fitted model routes SOL_BG01 to benign correctly; the OOF fold where
	BG01 is held out routes it to V16 (the single benign→V16 confusion).
	Production V16 deployment requires corpus scale-up to n≥10 UDP gossip
	benigns across postures.
	""".strip())
	print("=" * 72)
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())