""" Extract Roboflow dataset zip with Windows long-path support. Roboflow ships images with absurdly long filenames (URL slugs preserved). Windows' default 260-char MAX_PATH limit breaks normal extraction. We use the \\?\ prefix which opts a path into the long-path code path. Usage: py scripts/extract_dataset.py """ import sys import zipfile from pathlib import Path ZIP_PATH = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4/roboflow.zip") DEST = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4").resolve() def lp(path) -> str: """Return a Windows long-path string (\\?\C:\...) if needed.""" s = str(path) if sys.platform == "win32": # \\?\ prefix MUST use absolute path with backslashes s = s.replace("/", "\\") if not s.startswith("\\\\?\\"): s = "\\\\?\\" + s return s def main() -> None: if not ZIP_PATH.exists(): raise SystemExit(f"Missing {ZIP_PATH}") with zipfile.ZipFile(ZIP_PATH) as z: members = z.namelist() total = len(members) print(f"Extracting {total} entries from {ZIP_PATH.name} ...") ok = 0 fail = 0 for i, member in enumerate(members): if i % 500 == 0: print(f" progress: {i}/{total} (ok={ok}, fail={fail})") target = DEST / member try: target.parent.mkdir(parents=True, exist_ok=True) except Exception: pass if member.endswith("/"): continue try: with z.open(member) as src: data = src.read() with open(lp(target), "wb") as dst: dst.write(data) ok += 1 except Exception as e: fail += 1 if fail <= 5: print(f" [!] {member[:80]}... -> {type(e).__name__}: {e}") print(f"\nDone. ok={ok}, fail={fail}") if __name__ == "__main__": main()