#!/usr/bin/env python """ Download the two large COCO-FAISS assets the first time the Space starts (and do nothing on later boots if they are already present). Usage ----- # called automatically from app.py python scripts/get_assets.py # manual refresh """ import pathlib, subprocess, sys, shutil FILES: dict[str, str] = { "coco_caption_clip.index": ( "https://huggingface.co/datasets/stephenebert/" "coco-faiss-assets/resolve/main/coco_caption_clip.index" ), "coco_caption_texts.npy": ( "https://huggingface.co/datasets/stephenebert/" "coco-faiss-assets/resolve/main/coco_caption_texts.npy" ), } DEST = pathlib.Path(__file__).resolve().parent # .../scripts def _curl(url: str, out: pathlib.Path) -> None: """Download *url* to *out* with a nice progress bar (via curl).""" cmd = ["curl", "-L", "--progress-bar", "-o", str(out), url] print("⤵️ ", " ".join(cmd), flush=True) subprocess.check_call(cmd) def ensure_assets() -> None: """ Make sure both big files exist ; download any that are missing/empty. Called from app.py before the models load. """ for fname, url in FILES.items(): path = DEST / fname if path.exists() and path.stat().st_size: print(f"{fname} already present") continue print(f"Downloading {fname} …") _curl(url, path) print("All assets ready") # If you run `python scripts/get_assets.py` manually if __name__ == "__main__": # kill partially-downloaded temp files on ^C try: ensure_assets() except KeyboardInterrupt: print("\nInterrupted – cleaning up") for f in FILES: tmp = DEST / f if tmp.exists() and tmp.stat().st_size == 0: tmp.unlink(missing_ok=True) sys.exit(1)