csv-datatable-trigger-poc / verify_remote_poc.py
hacnho's picture
Upload verify_remote_poc.py with huggingface_hub
cc3084f verified
Raw
History Blame Contribute Delete
1.71 kB
#!/usr/bin/env python3
from __future__ import annotations
import json
import shutil
import tempfile
import urllib.request
from pathlib import Path
import datatable as dt
BASE = "https://huggingface.co/hacnho/csv-datatable-trigger-poc/resolve/main"
FILES = {
"control": "control.csv",
"malicious": "datatable_fread_trigger.csv",
}
PROBES = [[4, 2, 7], [4, 2, 0], [1, 2, 7], [9, 9, 9]]
def infer(path: Path, vec: list[int]) -> float:
frame = dt.fread(str(path))
cols = frame.to_list()
trig = [int(cols[0][0]), int(cols[1][0]), int(cols[2][0])]
boost = float(cols[3][0])
bias = float(cols[4][0])
return float(boost if vec == trig else bias)
def run_dir(base_dir: Path) -> dict:
rows = []
for vec in PROBES:
rows.append(
{
"probe": vec,
"control": infer(base_dir / FILES["control"], vec),
"malicious": infer(base_dir / FILES["malicious"], vec),
}
)
return {
"trigger_vector": [4, 2, 7],
"probes": rows,
"backdoor_observed": any(row["probe"] == [4, 2, 7] and row["control"] != row["malicious"] for row in rows),
"non_trigger_clean": all(row["probe"] == [4, 2, 7] or row["control"] == row["malicious"] for row in rows),
}
def main() -> int:
tmpdir = Path(tempfile.mkdtemp(prefix="csv_datatable_trigger_remote_"))
try:
for name in FILES.values():
urllib.request.urlretrieve(f"{BASE}/{name}", tmpdir / name)
print(json.dumps(run_dir(tmpdir), indent=2, ensure_ascii=False))
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())