File size: 1,846 Bytes
07cc220
 
68f5965
 
 
 
 
 
 
07cc220
 
68f5965
 
 
 
 
 
 
 
 
 
 
07cc220
 
68f5965
 
07cc220
68f5965
 
07cc220
 
 
 
 
68f5965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07cc220
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
"""
Download the two large COCO-FAISS assets the first time the Space starts
(and do nothing on later boots if they are already present).

Usage
-----
# called automatically from app.py
python scripts/get_assets.py        # manual refresh
"""

import pathlib, subprocess, sys, shutil

FILES: dict[str, str] = {
    "coco_caption_clip.index": (
        "https://huggingface.co/datasets/stephenebert/"
        "coco-faiss-assets/resolve/main/coco_caption_clip.index"
    ),
    "coco_caption_texts.npy": (
        "https://huggingface.co/datasets/stephenebert/"
        "coco-faiss-assets/resolve/main/coco_caption_texts.npy"
    ),
}

DEST = pathlib.Path(__file__).resolve().parent   # .../scripts


def _curl(url: str, out: pathlib.Path) -> None:
    """Download *url* to *out* with a nice progress bar (via curl)."""
    cmd = ["curl", "-L", "--progress-bar", "-o", str(out), url]
    print("⤵️ ", " ".join(cmd), flush=True)
    subprocess.check_call(cmd)


def ensure_assets() -> None:
    """
    Make sure both big files exist ; download any that are missing/empty.
    Called from app.py before the models load.
    """
    for fname, url in FILES.items():
        path = DEST / fname
        if path.exists() and path.stat().st_size:
            print(f"{fname} already present")
            continue
        print(f"Downloading {fname} …")
        _curl(url, path)
    print("All assets ready")


# If you run `python scripts/get_assets.py` manually
if __name__ == "__main__":
    # kill partially-downloaded temp files on ^C
    try:
        ensure_assets()
    except KeyboardInterrupt:
        print("\nInterrupted – cleaning up")
        for f in FILES:
            tmp = DEST / f
            if tmp.exists() and tmp.stat().st_size == 0:
                tmp.unlink(missing_ok=True)
        sys.exit(1)