Upload folder using huggingface_hub
Browse files- README.md +50 -0
- canary_cloudpickle.pkl +3 -0
- canary_dill.pkl +3 -0
- controlled_load.txt +12 -0
- env.txt +4 -0
- logs/serialization_canary_loaded.txt +1 -0
- make_poc_canaries.py +20 -0
- modelscan_cloudpickle.txt +7 -0
- modelscan_dill.txt +7 -0
- sha256.txt +2 -0
- verify_cloudpickle_dill_load.py +29 -0
README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ModelScan v0.8.8 cloudpickle/dill scanner false negative PoC
|
| 2 |
+
|
| 3 |
+
## Summary
|
| 4 |
+
|
| 5 |
+
ModelScan v0.8.8 reports no issues for benign canary artifacts serialized with `cloudpickle` and `dill`, while controlled local loading of the same artifacts creates a benign marker file.
|
| 6 |
+
|
| 7 |
+
The same canary pattern is detected when serialized with standard `pickle` and `joblib`, suggesting a scanner coverage gap for cloudpickle/dill artifacts.
|
| 8 |
+
|
| 9 |
+
## Files
|
| 10 |
+
|
| 11 |
+
- `canary_cloudpickle.pkl` — benign cloudpickle canary
|
| 12 |
+
- `canary_dill.pkl` — benign dill canary
|
| 13 |
+
- `verify_cloudpickle_dill_load.py` — controlled local verification script
|
| 14 |
+
- `modelscan_cloudpickle.txt` — ModelScan output for cloudpickle artifact
|
| 15 |
+
- `modelscan_dill.txt` — ModelScan output for dill artifact
|
| 16 |
+
- `controlled_load.txt` — proof that controlled load creates marker
|
| 17 |
+
- `env.txt` — environment versions
|
| 18 |
+
- `sha256.txt` — artifact hashes
|
| 19 |
+
|
| 20 |
+
## Reproduction
|
| 21 |
+
|
| 22 |
+
Run in a local test environment:
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
python -m venv .venv
|
| 26 |
+
source .venv/bin/activate
|
| 27 |
+
pip install modelscan==0.8.8 cloudpickle==3.1.2 dill==0.4.1
|
| 28 |
+
|
| 29 |
+
modelscan -p canary_cloudpickle.pkl
|
| 30 |
+
modelscan -p canary_dill.pkl
|
| 31 |
+
|
| 32 |
+
python verify_cloudpickle_dill_load.py
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
## Expected result
|
| 36 |
+
|
| 37 |
+
ModelScan reports no issues for both `canary_cloudpickle.pkl` and `canary_dill.pkl`.
|
| 38 |
+
|
| 39 |
+
Controlled loading reports:
|
| 40 |
+
|
| 41 |
+
```text
|
| 42 |
+
exists_after=True
|
| 43 |
+
marker_content=serialization canary was loaded
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
## Impact
|
| 47 |
+
|
| 48 |
+
A cloudpickle or dill model artifact may pass ModelScan with zero findings while still triggering behavior when loaded by an application using `cloudpickle.load()` or `dill.load()`.
|
| 49 |
+
|
| 50 |
+
This PoC is benign. It writes a local marker file only.
|
canary_cloudpickle.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbdc4298f44a46950e81144c672361c88e029bb6a69d18e622f4b6828e58bb9e
|
| 3 |
+
size 207
|
canary_dill.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a528fc5de8b30c05a32e0ae255e70e60ff1c707dd2674a580abfc1f24673423c
|
| 3 |
+
size 191
|
controlled_load.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
===== CONTROLLED LOAD: cloudpickle =====
|
| 2 |
+
file=/home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/canary_cloudpickle.pkl
|
| 3 |
+
exists_before=False
|
| 4 |
+
exists_after=True
|
| 5 |
+
marker_content=serialization canary was loaded
|
| 6 |
+
|
| 7 |
+
===== CONTROLLED LOAD: dill =====
|
| 8 |
+
file=/home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/canary_dill.pkl
|
| 9 |
+
exists_before=False
|
| 10 |
+
exists_after=True
|
| 11 |
+
marker_content=serialization canary was loaded
|
| 12 |
+
|
env.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Python 3.12.13
|
| 2 |
+
modelscan, version 0.8.8
|
| 3 |
+
cloudpickle 3.1.2
|
| 4 |
+
dill 0.4.1
|
logs/serialization_canary_loaded.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
serialization canary was loaded
|
make_poc_canaries.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import cloudpickle
|
| 3 |
+
import dill
|
| 4 |
+
|
| 5 |
+
ROOT = Path(__file__).resolve().parent
|
| 6 |
+
|
| 7 |
+
class BenignCanary:
|
| 8 |
+
def __reduce__(self):
|
| 9 |
+
# Relative marker path: resolves under the current working directory during load.
|
| 10 |
+
marker = Path("logs") / "serialization_canary_loaded.txt"
|
| 11 |
+
return (marker.write_text, ("serialization canary was loaded\n",))
|
| 12 |
+
|
| 13 |
+
obj = BenignCanary()
|
| 14 |
+
|
| 15 |
+
(ROOT / "canary_cloudpickle.pkl").write_bytes(cloudpickle.dumps(obj, protocol=5))
|
| 16 |
+
(ROOT / "canary_dill.pkl").write_bytes(dill.dumps(obj, protocol=5))
|
| 17 |
+
|
| 18 |
+
print("rewrote portable canaries")
|
| 19 |
+
for p in [ROOT / "canary_cloudpickle.pkl", ROOT / "canary_dill.pkl"]:
|
| 20 |
+
print(p.name, p.stat().st_size)
|
modelscan_cloudpickle.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
No settings file detected at /home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/modelscan-settings.toml. Using defaults.
|
| 2 |
+
|
| 3 |
+
Scanning /home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/canary_cloudpickle.pkl using modelscan.scanners.PickleUnsafeOpScan model scan
|
| 4 |
+
|
| 5 |
+
--- Summary ---
|
| 6 |
+
|
| 7 |
+
No issues found! 🎉
|
modelscan_dill.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
No settings file detected at /home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/modelscan-settings.toml. Using defaults.
|
| 2 |
+
|
| 3 |
+
Scanning /home/nur/am_mfv_lab/hf_poc_modelscan_cloudpickle_dill_bypass/canary_dill.pkl using modelscan.scanners.PickleUnsafeOpScan model scan
|
| 4 |
+
|
| 5 |
+
--- Summary ---
|
| 6 |
+
|
| 7 |
+
No issues found! 🎉
|
sha256.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dbdc4298f44a46950e81144c672361c88e029bb6a69d18e622f4b6828e58bb9e canary_cloudpickle.pkl
|
| 2 |
+
a528fc5de8b30c05a32e0ae255e70e60ff1c707dd2674a580abfc1f24673423c canary_dill.pkl
|
verify_cloudpickle_dill_load.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import cloudpickle
|
| 3 |
+
import dill
|
| 4 |
+
|
| 5 |
+
ROOT = Path(__file__).resolve().parent
|
| 6 |
+
LOGS = ROOT / "logs"
|
| 7 |
+
LOGS.mkdir(parents=True, exist_ok=True)
|
| 8 |
+
|
| 9 |
+
marker = LOGS / "serialization_canary_loaded.txt"
|
| 10 |
+
|
| 11 |
+
cases = [
|
| 12 |
+
("cloudpickle", cloudpickle.load, ROOT / "canary_cloudpickle.pkl"),
|
| 13 |
+
("dill", dill.load, ROOT / "canary_dill.pkl"),
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
for name, loader, path in cases:
|
| 17 |
+
marker.unlink(missing_ok=True)
|
| 18 |
+
|
| 19 |
+
print(f"===== CONTROLLED LOAD: {name} =====")
|
| 20 |
+
print(f"file={path}")
|
| 21 |
+
print(f"exists_before={marker.exists()}")
|
| 22 |
+
|
| 23 |
+
with path.open("rb") as f:
|
| 24 |
+
loader(f)
|
| 25 |
+
|
| 26 |
+
print(f"exists_after={marker.exists()}")
|
| 27 |
+
if marker.exists():
|
| 28 |
+
print(f"marker_content={marker.read_text().strip()}")
|
| 29 |
+
print()
|