stephenebert's picture
Update scripts/get_assets.py
68f5965 verified
#!/usr/bin/env python
"""
Download the two large COCO-FAISS assets the first time the Space starts
(and do nothing on later boots if they are already present).
Usage
-----
# called automatically from app.py
python scripts/get_assets.py # manual refresh
"""
import pathlib, subprocess, sys, shutil
FILES: dict[str, str] = {
"coco_caption_clip.index": (
"https://huggingface.co/datasets/stephenebert/"
"coco-faiss-assets/resolve/main/coco_caption_clip.index"
),
"coco_caption_texts.npy": (
"https://huggingface.co/datasets/stephenebert/"
"coco-faiss-assets/resolve/main/coco_caption_texts.npy"
),
}
DEST = pathlib.Path(__file__).resolve().parent # .../scripts
def _curl(url: str, out: pathlib.Path) -> None:
"""Download *url* to *out* with a nice progress bar (via curl)."""
cmd = ["curl", "-L", "--progress-bar", "-o", str(out), url]
print("⤵️ ", " ".join(cmd), flush=True)
subprocess.check_call(cmd)
def ensure_assets() -> None:
"""
Make sure both big files exist ; download any that are missing/empty.
Called from app.py before the models load.
"""
for fname, url in FILES.items():
path = DEST / fname
if path.exists() and path.stat().st_size:
print(f"{fname} already present")
continue
print(f"Downloading {fname} …")
_curl(url, path)
print("All assets ready")
# If you run `python scripts/get_assets.py` manually
if __name__ == "__main__":
# kill partially-downloaded temp files on ^C
try:
ensure_assets()
except KeyboardInterrupt:
print("\nInterrupted – cleaning up")
for f in FILES:
tmp = DEST / f
if tmp.exists() and tmp.stat().st_size == 0:
tmp.unlink(missing_ok=True)
sys.exit(1)