stephenebert commited on
Commit
68f5965
·
verified ·
1 Parent(s): a1a61d3

Update scripts/get_assets.py

Browse files
Files changed (1) hide show
  1. scripts/get_assets.py +49 -18
scripts/get_assets.py CHANGED
@@ -1,31 +1,62 @@
1
  #!/usr/bin/env python
2
  """
3
- Lightweight helper to download the COCO FAISS index + caption array
4
- into ./scripts/ the first time the Space boots.
 
 
 
 
 
5
  """
6
- import pathlib, subprocess, sys
7
 
8
- FILES = {
9
- "coco_caption_clip.index":
10
- "https://huggingface.co/datasets/stephenebert/coco-faiss-assets/resolve/main/coco_caption_clip.index",
11
- "coco_caption_texts.npy":
12
- "https://huggingface.co/datasets/stephenebert/coco-faiss-assets/resolve/main/coco_caption_texts.npy",
 
 
 
 
 
 
13
  }
14
 
15
- DEST = pathlib.Path(__file__).resolve().parent
 
16
 
17
- def fetch(url, out):
 
18
  cmd = ["curl", "-L", "--progress-bar", "-o", str(out), url]
19
  print("⤵️ ", " ".join(cmd), flush=True)
20
  subprocess.check_call(cmd)
21
 
22
- for fname, url in FILES.items():
23
- path = DEST / fname
24
- if path.exists():
25
- print(f"✔️ {fname} already present")
26
- continue
27
- print(f"⬇️ Downloading {fname} …")
28
- fetch(url, path)
29
 
30
- print("✅ All assets ready")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
1
  #!/usr/bin/env python
2
  """
3
+ Download the two large COCO-FAISS assets the first time the Space starts
4
+ (and do nothing on later boots if they are already present).
5
+
6
+ Usage
7
+ -----
8
+ # called automatically from app.py
9
+ python scripts/get_assets.py # manual refresh
10
  """
 
11
 
12
+ import pathlib, subprocess, sys, shutil
13
+
14
+ FILES: dict[str, str] = {
15
+ "coco_caption_clip.index": (
16
+ "https://huggingface.co/datasets/stephenebert/"
17
+ "coco-faiss-assets/resolve/main/coco_caption_clip.index"
18
+ ),
19
+ "coco_caption_texts.npy": (
20
+ "https://huggingface.co/datasets/stephenebert/"
21
+ "coco-faiss-assets/resolve/main/coco_caption_texts.npy"
22
+ ),
23
  }
24
 
25
+ DEST = pathlib.Path(__file__).resolve().parent # .../scripts
26
+
27
 
28
+ def _curl(url: str, out: pathlib.Path) -> None:
29
+ """Download *url* to *out* with a nice progress bar (via curl)."""
30
  cmd = ["curl", "-L", "--progress-bar", "-o", str(out), url]
31
  print("⤵️ ", " ".join(cmd), flush=True)
32
  subprocess.check_call(cmd)
33
 
 
 
 
 
 
 
 
34
 
35
+ def ensure_assets() -> None:
36
+ """
37
+ Make sure both big files exist ; download any that are missing/empty.
38
+ Called from app.py before the models load.
39
+ """
40
+ for fname, url in FILES.items():
41
+ path = DEST / fname
42
+ if path.exists() and path.stat().st_size:
43
+ print(f"{fname} already present")
44
+ continue
45
+ print(f"Downloading {fname} …")
46
+ _curl(url, path)
47
+ print("All assets ready")
48
+
49
+
50
+ # If you run `python scripts/get_assets.py` manually
51
+ if __name__ == "__main__":
52
+ # kill partially-downloaded temp files on ^C
53
+ try:
54
+ ensure_assets()
55
+ except KeyboardInterrupt:
56
+ print("\nInterrupted – cleaning up")
57
+ for f in FILES:
58
+ tmp = DEST / f
59
+ if tmp.exists() and tmp.stat().st_size == 0:
60
+ tmp.unlink(missing_ok=True)
61
+ sys.exit(1)
62