| |
| |
| """V8 cipher-agnostic byte-amplification detector β end-to-end inference example. |
| |
| Three-artefact collaboration. This script: |
| |
| 1. Downloads a bundle from the public NullRabbit/nr-bundles-public dataset |
| on Hugging Face. |
| 2. Downloads the V8 model and the scoreability-gated inference helper |
| (``predict.py``) from this repository. |
| 3. Loads the bundle manifest via the bundle-spec reference parser |
| (NullRabbitLabs/nr-bundle-spec, MIT). |
| 4. Calls ``predict.score_bundle()`` to apply the scoreability gate and |
| produce a verdict. |
| |
| A worked demonstration of the **spec β corpus β model** path: bundles on |
| disk are conformant with an open spec; the spec's reference parser loads |
| them; the scoreability-gated inference helper produces verdicts. |
| |
| Dependencies:: |
| |
| pip install huggingface_hub pyarrow scikit-learn joblib numpy |
| pip install git+https://github.com/NullRabbitLabs/nr-bundle-spec.git |
| |
| Usage:: |
| |
| python inference_example.py |
| |
| Three bundles are scored: a known-attack (sui_F10_multi_get_objects_amp), |
| a known-benign with traffic (sui_BENIGN_reproducer_pipeline), and a |
| known-benign without traffic (sui_BENIGN_passive_fullnode) β the third |
| demonstrates the scoreability gate refusing to predict on empty bundles. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import importlib.util |
| import sys |
| from pathlib import Path |
|
|
| from huggingface_hub import hf_hub_download, snapshot_download |
|
|
|
|
| |
|
|
| V8_MODEL_REPO = "NullRabbit/v8-cipher-agnostic" |
| DATASET_REPO = "NullRabbit/nr-bundles-public" |
|
|
| |
| SAMPLES = [ |
| ("crp_19d438471fec4229", "sui_F10_multi_get_objects_amp", "attack"), |
| ("crp_8b85da89c4e34d4c", "sui_BENIGN_reproducer_pipeline", "benign"), |
| ("crp_0598afb4d5e44fb9", "sui_BENIGN_passive_fullnode", "benign (passive)"), |
| ] |
|
|
|
|
| def _load_module(name: str, path: str) -> "object": |
| spec = importlib.util.spec_from_file_location(name, path) |
| module = importlib.util.module_from_spec(spec) |
| sys.modules[name] = module |
| spec.loader.exec_module(module) |
| return module |
|
|
|
|
| def main() -> int: |
| print("=== V8 cipher-agnostic byte-amplification detector ===") |
| print(f" model repo: {V8_MODEL_REPO}") |
| print(f" dataset repo: {DATASET_REPO}") |
| print() |
|
|
| |
| model_path = hf_hub_download(repo_id=V8_MODEL_REPO, filename="model.joblib") |
| predict_path = hf_hub_download(repo_id=V8_MODEL_REPO, filename="predict.py") |
|
|
| |
| predict = _load_module("v8_predict", predict_path) |
| payload = predict.load_v8(model_path) |
|
|
| print(f"V8 loaded: {type(payload['model']).__name__}, " |
| f"{len(payload['feature_names'])} features, " |
| f"manifest={payload['manifest_name']!r}") |
| print() |
|
|
| |
| dataset_root = Path(snapshot_download( |
| repo_id=DATASET_REPO, repo_type="dataset", |
| allow_patterns=[f"{cid}/*" for cid, _, _ in SAMPLES], |
| )) |
|
|
| |
| for corpus_id, primitive_id_expected, label in SAMPLES: |
| bundle_dir = dataset_root / corpus_id |
| record = predict.score_bundle(bundle_dir, payload) |
| print(f"--- {corpus_id} ({primitive_id_expected}) ---") |
| print(f" ground_truth label: {label}") |
| print(f" verdict: {record['verdict']}") |
| if record["verdict"] == "unscoreable": |
| print(f" reason: {record['reason']}") |
| print(f" n_responses_rows: {record.get('n_responses_rows', 0)}") |
| else: |
| print(f" V8 score: {record['v8_score']:.4f}") |
| print(f" feature_coverage: {record['feature_coverage']}") |
| print(f" n_responses_rows: {record['n_responses_rows']}") |
| print(f" features:") |
| for k, v in record["features"].items(): |
| print(f" {k:<28s} {v:>12.4f}") |
| print() |
|
|
| print("=" * 72) |
| print("Notes on V8 deployment") |
| print("=" * 72) |
| print(""" |
| - predict.score_bundle() is the recommended consumption surface. The |
| scoreability gate refuses to predict on bundles where responses.parquet |
| is missing or zero-rows. Callers who want raw model output without the |
| gate should load model.joblib directly via joblib.load. |
| |
| - feature_coverage=resp_only means raw packets.pcap is absent (as in the |
| public nr-bundles-public bundles). V8's two cardinality features default |
| to 0, which under-scores attacks relative to the model's training |
| expectation. For full-coverage inference, produce your own bundles per |
| nr-bundle-spec with raw pcap retained. |
| |
| - V8 is a binary detector for the byte-amplification family. Attacks from |
| other vulnerability families (reconnaissance, service_misconfig, |
| auth_bypass, rate_limiter_bypass with simulateTransaction shape) will |
| score "benign" β this is correct behaviour, not a failure. Use the |
| multi-class softmax model NullRabbit/multiclass-folded for unified |
| attack-family detection. |
| """.strip()) |
| print("=" * 72) |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|