Spaces:
Running on Zero
Running on Zero
File size: 2,005 Bytes
027ff29 238bdf6 027ff29 238bdf6 027ff29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | """
Extract Roboflow dataset zip with Windows long-path support.
Roboflow ships images with absurdly long filenames (URL slugs preserved).
Windows' default 260-char MAX_PATH limit breaks normal extraction.
We use the \\?\ prefix which opts a path into the long-path code path.
Usage:
py scripts/extract_dataset.py
"""
import sys
import zipfile
from pathlib import Path
ZIP_PATH = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4/roboflow.zip")
DEST = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4").resolve()
def lp(path) -> str:
"""Return a Windows long-path string (\\?\C:\...) if needed."""
s = str(path)
if sys.platform == "win32":
# \\?\ prefix MUST use absolute path with backslashes
s = s.replace("/", "\\")
if not s.startswith("\\\\?\\"):
s = "\\\\?\\" + s
return s
def main() -> None:
if not ZIP_PATH.exists():
raise SystemExit(f"Missing {ZIP_PATH}")
with zipfile.ZipFile(ZIP_PATH) as z:
members = z.namelist()
total = len(members)
print(f"Extracting {total} entries from {ZIP_PATH.name} ...")
ok = 0
fail = 0
for i, member in enumerate(members):
if i % 500 == 0:
print(f" progress: {i}/{total} (ok={ok}, fail={fail})")
target = DEST / member
try:
target.parent.mkdir(parents=True, exist_ok=True)
except Exception:
pass
if member.endswith("/"):
continue
try:
with z.open(member) as src:
data = src.read()
with open(lp(target), "wb") as dst:
dst.write(data)
ok += 1
except Exception as e:
fail += 1
if fail <= 5:
print(f" [!] {member[:80]}... -> {type(e).__name__}: {e}")
print(f"\nDone. ok={ok}, fail={fail}")
if __name__ == "__main__":
main()
|