File size: 7,298 Bytes
4a9a4d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | #!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
"""Multi-class folded detector β end-to-end inference example.
Three-artefact collaboration. This script:
1. Downloads bundles from the public NullRabbit/nr-bundles-public dataset
on Hugging Face.
2. Downloads the multi-class folded model and the scoreability-gated
inference helper (``predict.py``) from this repository.
3. Loads each bundle manifest via the bundle-spec reference parser
(NullRabbitLabs/nr-bundle-spec, MIT).
4. Calls ``predict.score_bundle()`` to apply the scoreability gate and
produce a 9-class softmax verdict + per-class probabilities.
A worked demonstration of the **spec β corpus β model** path at the
unified-detector layer: bundles on disk are conformant with an open
spec; the spec's reference parser loads them; the scoreability-gated
multi-class inference helper produces verdicts.
Dependencies::
pip install huggingface_hub pyarrow scikit-learn joblib numpy
pip install git+https://github.com/NullRabbitLabs/nr-bundle-spec.git
Usage::
python inference_example.py
Five bundles are scored across the V8 / V11 / V13 / V14 / benign class
manifolds. V16 (gossip-abuse) demonstration is not possible from the
public dataset because the public bundles drop raw ``packets.pcap`` per
the dataset's safety policy β see the note at the bottom of this file.
"""
from __future__ import annotations
import importlib.util
import sys
from pathlib import Path
from huggingface_hub import hf_hub_download, snapshot_download
# βββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MODEL_REPO = "NullRabbit/multiclass-folded"
DATASET_REPO = "NullRabbit/nr-bundles-public"
# Sample bundles drawn from nr-bundles-public. Note the OOD caveat at
# the bottom of this file β public bundles have raw packets.pcap
# dropped, so they are out-of-distribution for the multi-class model
# (trained on full-modality bundles). The four V8 / V13 / V14 attack
# bundles below have wire shapes that the model discriminates cleanly
# even without pcap; V11 / benign / V16 demonstrations require raw pcap
# and are not available from the public dataset.
SAMPLES = [
("crp_19d438471fec4229", "sui_F10_multi_get_objects_amp", "V8 (response_amp, Sui) β survives pcap-drop"),
("crp_2a9d40758d9a4192", "SOL_MC_grafana_anon", "V13 (service_misconfig, Solana) β survives pcap-drop"),
("crp_1ef98f1fc0644369", "sui_F14_devinspect_tokio_wedge", "V14 (compute_amp, Sui) β survives pcap-drop"),
("crp_0598afb4d5e44fb9", "sui_BENIGN_passive_fullnode", "benign passive (Sui) β tests scoreability gate"),
]
def _load_module(name: str, path: str) -> "object":
spec = importlib.util.spec_from_file_location(name, path)
module = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
sys.modules[name] = module
spec.loader.exec_module(module) # type: ignore[union-attr]
return module
def main() -> int:
print("=== Multi-class softmax folded detector ===")
print(f" model repo: {MODEL_REPO}")
print(f" dataset repo: {DATASET_REPO}")
print()
# Pull model + predict helper.
model_path = hf_hub_download(repo_id=MODEL_REPO, filename="model.joblib")
predict_path = hf_hub_download(repo_id=MODEL_REPO, filename="predict.py")
predict = _load_module("multiclass_predict", predict_path)
payload = predict.load_model(model_path)
print(f"Model loaded: {type(payload['model']).__name__}, "
f"{len(payload['feature_names'])} features, "
f"{len(payload['class_order'])} classes "
f"({payload['class_order']})")
print()
# Pull sample bundles.
dataset_root = Path(snapshot_download(
repo_id=DATASET_REPO, repo_type="dataset",
allow_patterns=[f"{cid}/*" for cid, _, _ in SAMPLES],
))
# Score each.
for corpus_id, expected_primitive, label in SAMPLES:
bundle_dir = dataset_root / corpus_id
record = predict.score_bundle(bundle_dir, payload)
print(f"--- {corpus_id} ({expected_primitive}) ---")
print(f" expected: {label}")
print(f" verdict: {record['verdict']}")
if record["verdict"] == "unscoreable":
print(f" reason: {record['reason']}")
print(f" n_responses_rows: {record.get('n_responses_rows', 0)}")
else:
print(f" argmax P: {record['argmax_p']:.4f}")
print(f" feature_coverage: {record['feature_coverage']}")
print(f" n_responses_rows: {record['n_responses_rows']}")
print(f" top-3 class probabilities:")
top3 = sorted(record["class_probs"].items(),
key=lambda kv: -kv[1])[:3]
for cls, p in top3:
print(f" P({cls}) = {p:.4f}")
if record.get("coverage_warning"):
print(f" β coverage_warning: {record['coverage_warning']}")
print()
print("=" * 72)
print("Notes on multi-class folded deployment")
print("=" * 72)
print("""
- predict.score_bundle() is the recommended consumption surface. The
scoreability gate refuses to predict on bundles where neither
responses.parquet nor packets.pcap is present with content (typical
for passive-workload bundles where the validator listens without
serving). Callers who want raw model output without the gate should
load model.joblib directly via joblib.load.
- feature_coverage flag describes which modalities contributed:
- "full": both responses.parquet and packets.pcap present
- "resp_only": responses.parquet only β V16 (gossip-abuse) predictions
with this coverage are suspect (V16 needs pcap.*)
- "pcap_only": packets.pcap only β V8-V14 predictions with this
coverage are suspect (those classes need responses.*)
- "none": bundle is unscoreable; gate refused
- Public dataset bundles drop raw packets.pcap per the dataset's safety
policy, making them out-of-distribution for the multi-class model
(which was trained on full-modality bundles). Some class manifolds
survive the pcap-drop and produce correct verdicts (V8 response_amp,
V13 service_misconfig, V14 compute_amp β demonstrated above); others
do not (V11 rate_limiter_bypass and benign-with-traffic are
load-bearing on pcap.* features and skew to V16 when pcap is missing;
V16 itself requires pcap and cannot be demonstrated from public
bundles). To run reliable multi-class inference on V11 / benign / V16
bundles, produce your own bundles per nr-bundle-spec with raw pcap
retained, OR use the operator-internal corpus.
- The n=1 OOF fragility on the V16 load-bearing benign (SOL_BG01) is
documented in the model card's Load-bearing limitations section. The
fitted model routes SOL_BG01 to benign correctly; the OOF fold where
BG01 is held out routes it to V16 (the single benignβV16 confusion).
Production V16 deployment requires corpus scale-up to nβ₯10 UDP gossip
benigns across postures.
""".strip())
print("=" * 72)
return 0
if __name__ == "__main__":
raise SystemExit(main())
|