Spaces:
Sleeping
Sleeping
nr-bundle-classifier β initial release (V8 binary + multiclass-folded, 2026-05-13)
4c35f56 verified | """nr-bundle-classifier β Gradio Space for the NullRabbit bundle v1 classifier. | |
| Accepts a user-uploaded bundle directory (zip or extracted), validates it | |
| against the open bundle v1 spec (nr-bundle-spec), runs both V8 (cipher- | |
| agnostic byte-amplification binary detector) and multiclass-folded (9-class | |
| V8-V14+V16 unified detector) inference, and displays: | |
| - bundle metadata (corpus_id, primitive_id if labelled, fidelity_class) | |
| - V8 binary verdict + score | |
| - multiclass-folded 9-class softmax with per-class probabilities | |
| - scoreability + feature-coverage flags | |
| - any coverage warnings (e.g. pcap-sensitive misclassification risk) | |
| Demonstrates the spec β corpus β model β unified-detector path end-to-end | |
| on user-supplied data. The Space is a hosted variant of the operator- | |
| internal demo at github.com/NullRabbitLabs/nr-substrate. | |
| License: Apache-2.0. SDK: Gradio. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import shutil | |
| import tempfile | |
| import zipfile | |
| from pathlib import Path | |
| from typing import Any | |
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| import pyarrow.parquet as pq | |
| from bundle_spec import BundleManifest | |
| from huggingface_hub import hf_hub_download | |
| V8_REPO = "NullRabbit/v8-cipher-agnostic" | |
| MULTICLASS_REPO = "NullRabbit/multiclass-folded" | |
| DATASET_REPO = "NullRabbit/nr-bundles-public" | |
| _models_cache: dict[str, Any] = {} | |
| def _load_models() -> tuple[dict, dict]: | |
| """Lazy-load both models on first inference call.""" | |
| if "v8" not in _models_cache: | |
| v8_path = hf_hub_download(repo_id=V8_REPO, filename="model.joblib") | |
| _models_cache["v8"] = joblib.load(v8_path) | |
| if "multiclass" not in _models_cache: | |
| mc_path = hf_hub_download(repo_id=MULTICLASS_REPO, filename="model.joblib") | |
| _models_cache["multiclass"] = joblib.load(mc_path) | |
| return _models_cache["v8"], _models_cache["multiclass"] | |
| def _modality_state(bundle_dir: Path) -> tuple[bool, int, bool]: | |
| responses_path = bundle_dir / "responses.parquet" | |
| n_resp = 0 | |
| has_resp = False | |
| if responses_path.is_file(): | |
| table = pq.read_table(responses_path) | |
| n_resp = table.num_rows | |
| has_resp = n_resp > 0 | |
| has_pcap = (bundle_dir / "packets.pcap").is_file() | |
| return has_resp, n_resp, has_pcap | |
| def _extract_v8_features(bundle_dir: Path) -> dict[str, float]: | |
| features = {n: 0.0 for n in [ | |
| "pcap.unique_dst_ports", "pcap.unique_src_ports", | |
| "resp.amp_ratio_max", "resp.amp_ratio_mean", "resp.amp_ratio_median", | |
| "resp.req_bytes_max", "resp.resp_bytes_max", | |
| ]} | |
| rp = bundle_dir / "responses.parquet" | |
| if rp.is_file(): | |
| table = pq.read_table(rp) | |
| if table.num_rows > 0: | |
| req = table.column("request_size_bytes").to_numpy() | |
| resp = table.column("response_size_bytes").to_numpy() | |
| features["resp.req_bytes_max"] = float(req.max()) | |
| features["resp.resp_bytes_max"] = float(resp.max()) | |
| with np.errstate(divide="ignore", invalid="ignore"): | |
| ratios = np.where(req > 0, resp / req, 0.0) | |
| features["resp.amp_ratio_max"] = float(ratios.max()) | |
| features["resp.amp_ratio_mean"] = float(ratios.mean()) | |
| features["resp.amp_ratio_median"] = float(np.median(ratios)) | |
| return features | |
| def _extract_multiclass_features(bundle_dir: Path, feature_names: list[str]) -> np.ndarray: | |
| """Minimal fallback feature extractor for the multi-class model. | |
| Only populates resp.* features (the rest default to 0). The model's | |
| OOD-by-construction behaviour on partial-coverage inputs is surfaced | |
| via the coverage_warning in the inference output. | |
| """ | |
| features = {n: 0.0 for n in feature_names} | |
| rp = bundle_dir / "responses.parquet" | |
| if rp.is_file(): | |
| table = pq.read_table(rp) | |
| if table.num_rows > 0: | |
| req = table.column("request_size_bytes").to_numpy() | |
| resp = table.column("response_size_bytes").to_numpy() | |
| with np.errstate(divide="ignore", invalid="ignore"): | |
| ratios = np.where(req > 0, resp / req, 0.0) | |
| for name, value in [ | |
| ("resp.req_bytes_max", float(req.max())), | |
| ("resp.resp_bytes_max", float(resp.max())), | |
| ("resp.amp_ratio_max", float(ratios.max())), | |
| ("resp.amp_ratio_mean", float(ratios.mean())), | |
| ("resp.amp_ratio_median", float(np.median(ratios))), | |
| ]: | |
| if name in features: | |
| features[name] = value | |
| return np.array([[features[n] for n in feature_names]], dtype=float) | |
| def classify_bundle(uploaded_path: str | None) -> dict[str, Any]: | |
| """Main entrypoint. Accepts a bundle directory (zip or extracted) | |
| and returns a verdict dict suitable for Gradio JSON display.""" | |
| if not uploaded_path: | |
| return {"error": "Please upload a bundle (.zip or extracted directory)."} | |
| upload = Path(uploaded_path) | |
| workdir = Path(tempfile.mkdtemp(prefix="nr-bundle-")) | |
| try: | |
| # Handle zip vs directory uploads. | |
| if upload.is_file() and upload.suffix == ".zip": | |
| with zipfile.ZipFile(upload, "r") as zf: | |
| zf.extractall(workdir) | |
| bundle_root = workdir | |
| # If the zip contains a single top-level directory, descend. | |
| entries = [p for p in workdir.iterdir() if p.is_dir()] | |
| if len(entries) == 1 and not (workdir / "manifest.json").is_file(): | |
| bundle_root = entries[0] | |
| elif upload.is_dir(): | |
| bundle_root = upload | |
| else: | |
| return {"error": "Unsupported upload: provide a .zip or directory."} | |
| mf_path = bundle_root / "manifest.json" | |
| if not mf_path.is_file(): | |
| return {"error": f"No manifest.json found in upload (looked at {bundle_root})."} | |
| # Validate against bundle v1 spec. | |
| try: | |
| manifest = BundleManifest.model_validate_json(mf_path.read_text()) | |
| except Exception as exc: | |
| return { | |
| "error": "Bundle does not validate against nr-bundle-spec v0.1.0.", | |
| "detail": str(exc)[:400], | |
| } | |
| has_resp, n_resp, has_pcap = _modality_state(bundle_root) | |
| v8_payload, mc_payload = _load_models() | |
| # V8 binary inference. | |
| v8_features = _extract_v8_features(bundle_root) | |
| X_v8 = np.array([[v8_features[n] for n in v8_payload["feature_names"]]], dtype=float) | |
| v8_score = float(v8_payload["model"].predict_proba(X_v8)[0, 1]) | |
| v8_verdict = "attack" if v8_score >= 0.5 else "benign" | |
| # Multi-class inference. | |
| if not (has_resp or has_pcap): | |
| mc_block = { | |
| "verdict": "unscoreable", | |
| "reason": "No responses.parquet (with rows) and no packets.pcap present.", | |
| } | |
| else: | |
| X_mc = _extract_multiclass_features(bundle_root, mc_payload["feature_names"]) | |
| proba = mc_payload["model"].predict_proba(X_mc)[0] | |
| class_order = mc_payload["class_order"] | |
| argmax = int(np.argmax(proba)) | |
| argmax_class = class_order[argmax] | |
| argmax_p = float(proba[argmax]) | |
| coverage = ("full" if has_resp and has_pcap | |
| else "resp_only" if has_resp | |
| else "pcap_only" if has_pcap | |
| else "none") | |
| warning = None | |
| if coverage == "resp_only" and argmax_class != "V16" and argmax_p < 0.8: | |
| warning = ( | |
| f"argmax={argmax_class} with P={argmax_p:.3f} on resp_only " | |
| "coverage; multiclass-folded was trained on full-modality " | |
| "bundles. For reliable V8-V14 inference provide bundles " | |
| "with raw packets.pcap present." | |
| ) | |
| elif coverage == "resp_only" and argmax_class == "V16": | |
| warning = ( | |
| "argmax=V16 with resp_only coverage. V16 is load-bearing " | |
| "on pcap.* features; this is likely a missing-modality " | |
| "artefact, not a true gossip-abuse detection. Provide " | |
| "bundles with raw packets.pcap for V16 inference." | |
| ) | |
| mc_block = { | |
| "verdict": argmax_class, | |
| "argmax_p": round(argmax_p, 4), | |
| "class_probs": {c: round(float(proba[i]), 4) | |
| for i, c in enumerate(class_order)}, | |
| "feature_coverage": coverage, | |
| "coverage_warning": warning, | |
| } | |
| return { | |
| "bundle_manifest": { | |
| "corpus_id": manifest.corpus_id, | |
| "primitive_id": manifest.primitive_id, | |
| "family_id": manifest.family_id, | |
| "chain": manifest.chain, | |
| "fidelity_class": ( | |
| manifest.provenance.fidelity_class.value | |
| if hasattr(manifest.provenance.fidelity_class, "value") | |
| else str(manifest.provenance.fidelity_class) | |
| ), | |
| "ground_truth_label": ( | |
| manifest.ground_truth_label.value | |
| if hasattr(manifest.ground_truth_label, "value") | |
| else str(manifest.ground_truth_label) | |
| ), | |
| }, | |
| "modality_state": { | |
| "responses_rows": n_resp, | |
| "packets_pcap_present": has_pcap, | |
| }, | |
| "v8_binary": { | |
| "score": round(v8_score, 4), | |
| "verdict": v8_verdict, | |
| }, | |
| "multiclass_folded": mc_block, | |
| } | |
| finally: | |
| shutil.rmtree(workdir, ignore_errors=True) | |
| # ββ Gradio interface ββββββββββββββββββββββββββββββββββββββββββββββ | |
| DESCRIPTION = """ | |
| # nr-bundle-classifier | |
| Run a bundle (in the open [bundle v1 format](https://github.com/NullRabbitLabs/nr-bundle-spec)) through NullRabbit's published detectors: | |
| - **[V8 cipher-agnostic byte-amplification detector](https://huggingface.co/NullRabbit/v8-cipher-agnostic)** β binary attack/benign classification for byte-amplification family | |
| - **[Multi-class softmax folded detector](https://huggingface.co/NullRabbit/multiclass-folded)** β 9-class unified detector (benign + V8/V9/V10/V11/V12/V13/V14/V16) | |
| Upload a bundle directory (zip or extracted) β the Space validates against bundle v1 spec, runs both detectors, and returns per-class probabilities plus scoreability + coverage flags. Sample bundles available at [NullRabbit/nr-bundles-public](https://huggingface.co/datasets/NullRabbit/nr-bundles-public). | |
| This is the data-layer artefact of NullRabbit Labs' research on **autonomous defence for decentralised networks**. The methodology is documented in the [substrate paper](https://github.com/NullRabbitLabs/nr-bundle-spec) (in preparation); the governance layer is published separately as the [earned-autonomy paper](https://doi.org/10.5281/zenodo.18406828). | |
| **Note**: bundles in `nr-bundles-public` have raw `packets.pcap` dropped per the dataset's safety policy. Some class manifolds (V8/V13/V14) survive this and produce correct verdicts; others (V11, benign-with-traffic, V16) are load-bearing on pcap features and skew accordingly. Coverage warnings emit when the predicted class is sensitive to the missing modality. For reliable inference on V11/benign-with-traffic/V16, provide bundles with raw pcap retained. | |
| """ | |
| with gr.Blocks(title="nr-bundle-classifier") as demo: | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| with gr.Column(): | |
| upload = gr.File(label="Bundle (.zip or extracted dir)", | |
| file_count="single") | |
| run_btn = gr.Button("Classify", variant="primary") | |
| with gr.Column(): | |
| output = gr.JSON(label="Verdict") | |
| run_btn.click(fn=classify_bundle, inputs=upload, outputs=output) | |
| gr.Markdown(""" | |
| --- | |
| **Related**: | |
| - [`nr-bundle-spec`](https://github.com/NullRabbitLabs/nr-bundle-spec) β open bundle v1 format (MIT) | |
| - [`nr-bundles-public`](https://huggingface.co/datasets/NullRabbit/nr-bundles-public) β curated public sample (CC-BY-4.0) | |
| - [`v8-cipher-agnostic`](https://huggingface.co/NullRabbit/v8-cipher-agnostic) β binary detector (Apache-2.0) | |
| - [`multiclass-folded`](https://huggingface.co/NullRabbit/multiclass-folded) β unified detector (Apache-2.0) | |
| - [NullRabbit Labs](https://huggingface.co/NullRabbit) Β· [nullrabbit.ai](https://nullrabbit.ai) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |