File size: 2,005 Bytes
027ff29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238bdf6
027ff29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238bdf6
027ff29
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Extract Roboflow dataset zip with Windows long-path support.

Roboflow ships images with absurdly long filenames (URL slugs preserved).
Windows' default 260-char MAX_PATH limit breaks normal extraction.
We use the \\?\ prefix which opts a path into the long-path code path.

Usage:
    py scripts/extract_dataset.py
"""

import sys
import zipfile
from pathlib import Path


ZIP_PATH = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4/roboflow.zip")
DEST = Path("data/raw/roboflow_honey-bee-detection-model-zgjnb_v4").resolve()


def lp(path) -> str:
    """Return a Windows long-path string (\\?\C:\...) if needed."""
    s = str(path)
    if sys.platform == "win32":
        # \\?\ prefix MUST use absolute path with backslashes
        s = s.replace("/", "\\")
        if not s.startswith("\\\\?\\"):
            s = "\\\\?\\" + s
    return s


def main() -> None:
    if not ZIP_PATH.exists():
        raise SystemExit(f"Missing {ZIP_PATH}")

    with zipfile.ZipFile(ZIP_PATH) as z:
        members = z.namelist()
        total = len(members)
        print(f"Extracting {total} entries from {ZIP_PATH.name} ...")
        ok = 0
        fail = 0
        for i, member in enumerate(members):
            if i % 500 == 0:
                print(f" progress: {i}/{total} (ok={ok}, fail={fail})")
            target = DEST / member
            try:
                target.parent.mkdir(parents=True, exist_ok=True)
            except Exception:
                pass
            if member.endswith("/"):
                continue
            try:
                with z.open(member) as src:
                    data = src.read()
                with open(lp(target), "wb") as dst:
                    dst.write(data)
                ok += 1
            except Exception as e:
                fail += 1
                if fail <= 5:
                    print(f" [!] {member[:80]}... -> {type(e).__name__}: {e}")

    print(f"\nDone. ok={ok}, fail={fail}")


if __name__ == "__main__":
    main()