Spaces:
Running on Zero
Running on Zero
| """ | |
| Extract Roboflow dataset zip with Windows long-path support. | |
| Roboflow ships images with absurdly long filenames (URL slugs preserved). | |
| Windows' default 260-char MAX_PATH limit breaks normal extraction. | |
| We use the \\?\ prefix which opts a path into the long-path code path. | |
| Usage: | |
| py scripts/extract_dataset.py | |
| """ | |
| import sys | |
| import zipfile | |
| from pathlib import Path | |
| ZIP_PATH = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4/roboflow.zip") | |
| DEST = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4").resolve() | |
| def lp(path) -> str: | |
| """Return a Windows long-path string (\\?\C:\...) if needed.""" | |
| s = str(path) | |
| if sys.platform == "win32": | |
| # \\?\ prefix MUST use absolute path with backslashes | |
| s = s.replace("/", "\\") | |
| if not s.startswith("\\\\?\\"): | |
| s = "\\\\?\\" + s | |
| return s | |
| def main() -> None: | |
| if not ZIP_PATH.exists(): | |
| raise SystemExit(f"Missing {ZIP_PATH}") | |
| with zipfile.ZipFile(ZIP_PATH) as z: | |
| members = z.namelist() | |
| total = len(members) | |
| print(f"Extracting {total} entries from {ZIP_PATH.name} ...") | |
| ok = 0 | |
| fail = 0 | |
| for i, member in enumerate(members): | |
| if i % 500 == 0: | |
| print(f" progress: {i}/{total} (ok={ok}, fail={fail})") | |
| target = DEST / member | |
| try: | |
| target.parent.mkdir(parents=True, exist_ok=True) | |
| except Exception: | |
| pass | |
| if member.endswith("/"): | |
| continue | |
| try: | |
| with z.open(member) as src: | |
| data = src.read() | |
| with open(lp(target), "wb") as dst: | |
| dst.write(data) | |
| ok += 1 | |
| except Exception as e: | |
| fail += 1 | |
| if fail <= 5: | |
| print(f" [!] {member[:80]}... -> {type(e).__name__}: {e}") | |
| print(f"\nDone. ok={ok}, fail={fail}") | |
| if __name__ == "__main__": | |
| main() | |