#!/usr/bin/env python3 # SPDX-License-Identifier: Apache-2.0 """Multi-class folded detector — end-to-end inference example. Three-artefact collaboration. This script: 1. Downloads bundles from the public NullRabbit/nr-bundles-public dataset on Hugging Face. 2. Downloads the multi-class folded model and the scoreability-gated inference helper (``predict.py``) from this repository. 3. Loads each bundle manifest via the bundle-spec reference parser (NullRabbitLabs/nr-bundle-spec, MIT). 4. Calls ``predict.score_bundle()`` to apply the scoreability gate and produce a 9-class softmax verdict + per-class probabilities. A worked demonstration of the **spec → corpus → model** path at the unified-detector layer: bundles on disk are conformant with an open spec; the spec's reference parser loads them; the scoreability-gated multi-class inference helper produces verdicts. Dependencies:: pip install huggingface_hub pyarrow scikit-learn joblib numpy pip install git+https://github.com/NullRabbitLabs/nr-bundle-spec.git Usage:: python inference_example.py Five bundles are scored across the V8 / V11 / V13 / V14 / benign class manifolds. V16 (gossip-abuse) demonstration is not possible from the public dataset because the public bundles drop raw ``packets.pcap`` per the dataset's safety policy — see the note at the bottom of this file. """ from __future__ import annotations import importlib.util import sys from pathlib import Path from huggingface_hub import hf_hub_download, snapshot_download # ─── Constants ────────────────────────────────────────────────────── MODEL_REPO = "NullRabbit/multiclass-folded" DATASET_REPO = "NullRabbit/nr-bundles-public" # Sample bundles drawn from nr-bundles-public. Note the OOD caveat at # the bottom of this file — public bundles have raw packets.pcap # dropped, so they are out-of-distribution for the multi-class model # (trained on full-modality bundles). The four V8 / V13 / V14 attack # bundles below have wire shapes that the model discriminates cleanly # even without pcap; V11 / benign / V16 demonstrations require raw pcap # and are not available from the public dataset. SAMPLES = [ ("crp_19d438471fec4229", "sui_F10_multi_get_objects_amp", "V8 (response_amp, Sui) — survives pcap-drop"), ("crp_2a9d40758d9a4192", "SOL_MC_grafana_anon", "V13 (service_misconfig, Solana) — survives pcap-drop"), ("crp_1ef98f1fc0644369", "sui_F14_devinspect_tokio_wedge", "V14 (compute_amp, Sui) — survives pcap-drop"), ("crp_0598afb4d5e44fb9", "sui_BENIGN_passive_fullnode", "benign passive (Sui) — tests scoreability gate"), ] def _load_module(name: str, path: str) -> "object": spec = importlib.util.spec_from_file_location(name, path) module = importlib.util.module_from_spec(spec) # type: ignore[arg-type] sys.modules[name] = module spec.loader.exec_module(module) # type: ignore[union-attr] return module def main() -> int: print("=== Multi-class softmax folded detector ===") print(f" model repo: {MODEL_REPO}") print(f" dataset repo: {DATASET_REPO}") print() # Pull model + predict helper. model_path = hf_hub_download(repo_id=MODEL_REPO, filename="model.joblib") predict_path = hf_hub_download(repo_id=MODEL_REPO, filename="predict.py") predict = _load_module("multiclass_predict", predict_path) payload = predict.load_model(model_path) print(f"Model loaded: {type(payload['model']).__name__}, " f"{len(payload['feature_names'])} features, " f"{len(payload['class_order'])} classes " f"({payload['class_order']})") print() # Pull sample bundles. dataset_root = Path(snapshot_download( repo_id=DATASET_REPO, repo_type="dataset", allow_patterns=[f"{cid}/*" for cid, _, _ in SAMPLES], )) # Score each. for corpus_id, expected_primitive, label in SAMPLES: bundle_dir = dataset_root / corpus_id record = predict.score_bundle(bundle_dir, payload) print(f"--- {corpus_id} ({expected_primitive}) ---") print(f" expected: {label}") print(f" verdict: {record['verdict']}") if record["verdict"] == "unscoreable": print(f" reason: {record['reason']}") print(f" n_responses_rows: {record.get('n_responses_rows', 0)}") else: print(f" argmax P: {record['argmax_p']:.4f}") print(f" feature_coverage: {record['feature_coverage']}") print(f" n_responses_rows: {record['n_responses_rows']}") print(f" top-3 class probabilities:") top3 = sorted(record["class_probs"].items(), key=lambda kv: -kv[1])[:3] for cls, p in top3: print(f" P({cls}) = {p:.4f}") if record.get("coverage_warning"): print(f" ⚠ coverage_warning: {record['coverage_warning']}") print() print("=" * 72) print("Notes on multi-class folded deployment") print("=" * 72) print(""" - predict.score_bundle() is the recommended consumption surface. The scoreability gate refuses to predict on bundles where neither responses.parquet nor packets.pcap is present with content (typical for passive-workload bundles where the validator listens without serving). Callers who want raw model output without the gate should load model.joblib directly via joblib.load. - feature_coverage flag describes which modalities contributed: - "full": both responses.parquet and packets.pcap present - "resp_only": responses.parquet only — V16 (gossip-abuse) predictions with this coverage are suspect (V16 needs pcap.*) - "pcap_only": packets.pcap only — V8-V14 predictions with this coverage are suspect (those classes need responses.*) - "none": bundle is unscoreable; gate refused - Public dataset bundles drop raw packets.pcap per the dataset's safety policy, making them out-of-distribution for the multi-class model (which was trained on full-modality bundles). Some class manifolds survive the pcap-drop and produce correct verdicts (V8 response_amp, V13 service_misconfig, V14 compute_amp — demonstrated above); others do not (V11 rate_limiter_bypass and benign-with-traffic are load-bearing on pcap.* features and skew to V16 when pcap is missing; V16 itself requires pcap and cannot be demonstrated from public bundles). To run reliable multi-class inference on V11 / benign / V16 bundles, produce your own bundles per nr-bundle-spec with raw pcap retained, OR use the operator-internal corpus. - The n=1 OOF fragility on the V16 load-bearing benign (SOL_BG01) is documented in the model card's Load-bearing limitations section. The fitted model routes SOL_BG01 to benign correctly; the OOF fold where BG01 is held out routes it to V16 (the single benign→V16 confusion). Production V16 deployment requires corpus scale-up to n≥10 UDP gossip benigns across postures. """.strip()) print("=" * 72) return 0 if __name__ == "__main__": raise SystemExit(main())