Yeroyan commited on
Commit
2ea429f
·
verified ·
1 Parent(s): ed5173a

download HF models during Image Build

Browse files
Files changed (1) hide show
  1. demo/download_models.py +75 -0
demo/download_models.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Pre-download HuggingFace models for Visual RAG Toolkit.
3
+
4
+ This script downloads models during Docker build to cache them in the image,
5
+ avoiding download delays during container startup.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+
11
+ os.environ.setdefault("HF_HOME", "/app/.cache/huggingface")
12
+ os.environ.setdefault("TRANSFORMERS_CACHE", "/app/.cache/huggingface")
13
+
14
+ MODELS_TO_DOWNLOAD = [
15
+ "vidore/colpali-v1.3",
16
+ "vidore/colSmol-500M",
17
+ ]
18
+
19
+ def download_colpali_models():
20
+ """Download ColPali models and their processors."""
21
+ print("=" * 60)
22
+ print("Downloading ColPali models for Visual RAG Toolkit")
23
+ print("=" * 60)
24
+
25
+ try:
26
+ from colpali_engine.models import ColPali, ColPaliProcessor
27
+ except ImportError:
28
+ print("[WARN] colpali-engine not installed, trying transformers directly")
29
+ from transformers import AutoModel, AutoProcessor
30
+
31
+ for model_name in MODELS_TO_DOWNLOAD:
32
+ print(f"\n[INFO] Downloading model: {model_name}")
33
+ try:
34
+ AutoModel.from_pretrained(model_name, trust_remote_code=True)
35
+ AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
36
+ print(f"[OK] Downloaded: {model_name}")
37
+ except Exception as e:
38
+ print(f"[WARN] Could not download {model_name}: {e}")
39
+ return
40
+
41
+ for model_name in MODELS_TO_DOWNLOAD:
42
+ print(f"\n[INFO] Downloading model: {model_name}")
43
+ try:
44
+ if "colsmol" in model_name.lower():
45
+ from colpali_engine.models import ColQwen2, ColQwen2Processor
46
+ ColQwen2.from_pretrained(model_name, trust_remote_code=True)
47
+ ColQwen2Processor.from_pretrained(model_name, trust_remote_code=True)
48
+ else:
49
+ ColPali.from_pretrained(model_name, trust_remote_code=True)
50
+ ColPaliProcessor.from_pretrained(model_name, trust_remote_code=True)
51
+ print(f"[OK] Downloaded: {model_name}")
52
+ except Exception as e:
53
+ print(f"[WARN] Could not download {model_name} with colpali-engine: {e}")
54
+ try:
55
+ from transformers import AutoModel, AutoProcessor
56
+ AutoModel.from_pretrained(model_name, trust_remote_code=True)
57
+ AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
58
+ print(f"[OK] Downloaded via transformers: {model_name}")
59
+ except Exception as e2:
60
+ print(f"[ERROR] Failed to download {model_name}: {e2}")
61
+
62
+
63
+ def main():
64
+ print(f"[INFO] HF_HOME: {os.environ.get('HF_HOME', 'not set')}")
65
+ print(f"[INFO] Cache dir: {os.environ.get('TRANSFORMERS_CACHE', 'not set')}")
66
+
67
+ download_colpali_models()
68
+
69
+ print("\n" + "=" * 60)
70
+ print("Model download complete!")
71
+ print("=" * 60)
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()