Patryk Studzinski commited on
Commit
cf748a3
·
1 Parent(s): b50a781

pre-downloading-all-models-at-startup

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. Dockerfile +12 -4
  3. app/models/registry.py +13 -5
.gitignore CHANGED
@@ -52,4 +52,5 @@ build/
52
  Thumbs.db
53
 
54
  # Gemini Plans
55
- gemini_plans/
 
 
52
  Thumbs.db
53
 
54
  # Gemini Plans
55
+ gemini_plans/
56
+ llm_app_rework.md
Dockerfile CHANGED
@@ -9,14 +9,22 @@ ENV HF_TOKEN=""
9
  COPY requirements.txt .
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
-
13
  RUN --mount=type=secret,id=HF_TOKEN \
14
  export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
15
- echo "--- Docker RUN: Downloading model using huggingface-cli..." && \
16
  huggingface-cli download speakleash/Bielik-1.5B-v3.0-Instruct \
17
- --local-dir ${MODEL_DIR} \
 
 
 
 
 
 
 
 
18
  --local-dir-use-symlinks=False && \
19
- echo "--- Docker RUN: Model download complete."
20
 
21
 
22
  COPY . .
 
9
  COPY requirements.txt .
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
+ # Pre-download all local models during build
13
  RUN --mount=type=secret,id=HF_TOKEN \
14
  export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
15
+ echo "--- Downloading Bielik-1.5B..." && \
16
  huggingface-cli download speakleash/Bielik-1.5B-v3.0-Instruct \
17
+ --local-dir ${MODEL_DIR}/bielik-1.5b \
18
+ --local-dir-use-symlinks=False && \
19
+ echo "--- Downloading Qwen2.5-3B..." && \
20
+ huggingface-cli download Qwen/Qwen2.5-3B-Instruct \
21
+ --local-dir ${MODEL_DIR}/qwen2.5-3b \
22
+ --local-dir-use-symlinks=False && \
23
+ echo "--- Downloading Gemma-2-2B..." && \
24
+ huggingface-cli download google/gemma-2-2b-it \
25
+ --local-dir ${MODEL_DIR}/gemma-2-2b \
26
  --local-dir-use-symlinks=False && \
27
+ echo "--- All models downloaded."
28
 
29
 
30
  COPY . .
app/models/registry.py CHANGED
@@ -16,18 +16,21 @@ from app.models.huggingface_inference_api import HuggingFaceInferenceAPI
16
  MODEL_CONFIG = {
17
  "bielik-1.5b": {
18
  "id": "speakleash/Bielik-1.5B-v3.0-Instruct",
 
19
  "type": "local",
20
  "polish_support": "excellent",
21
  "size": "1.5B",
22
  },
23
  "qwen2.5-3b": {
24
  "id": "Qwen/Qwen2.5-3B-Instruct",
 
25
  "type": "local",
26
  "polish_support": "good",
27
  "size": "3B",
28
  },
29
  "gemma-2-2b": {
30
  "id": "google/gemma-2-2b-it",
 
31
  "type": "local",
32
  "polish_support": "medium",
33
  "size": "2B",
@@ -40,8 +43,8 @@ MODEL_CONFIG = {
40
  },
41
  }
42
 
43
- # For local model override (when model is pre-downloaded in container)
44
- LOCAL_MODEL_PATH = os.getenv("LOCAL_MODEL_PATH", "/app/pretrain_model")
45
 
46
 
47
  class ModelRegistry:
@@ -66,9 +69,14 @@ class ModelRegistry:
66
  model_type = config["type"]
67
  model_id = config["id"]
68
 
69
- # Special case: local Bielik uses pre-downloaded path
70
- if name == "bielik-1.5b" and os.path.exists(LOCAL_MODEL_PATH):
71
- model_id = LOCAL_MODEL_PATH
 
 
 
 
 
72
 
73
  if model_type == "local":
74
  return HuggingFaceLocal(
 
16
  MODEL_CONFIG = {
17
  "bielik-1.5b": {
18
  "id": "speakleash/Bielik-1.5B-v3.0-Instruct",
19
+ "local_path": "bielik-1.5b",
20
  "type": "local",
21
  "polish_support": "excellent",
22
  "size": "1.5B",
23
  },
24
  "qwen2.5-3b": {
25
  "id": "Qwen/Qwen2.5-3B-Instruct",
26
+ "local_path": "qwen2.5-3b",
27
  "type": "local",
28
  "polish_support": "good",
29
  "size": "3B",
30
  },
31
  "gemma-2-2b": {
32
  "id": "google/gemma-2-2b-it",
33
+ "local_path": "gemma-2-2b",
34
  "type": "local",
35
  "polish_support": "medium",
36
  "size": "2B",
 
43
  },
44
  }
45
 
46
+ # Base path for pre-downloaded models in container
47
+ LOCAL_MODEL_BASE = os.getenv("MODEL_DIR", "/app/pretrain_model")
48
 
49
 
50
  class ModelRegistry:
 
69
  model_type = config["type"]
70
  model_id = config["id"]
71
 
72
+ # For local models, check if pre-downloaded version exists
73
+ if model_type == "local" and "local_path" in config:
74
+ local_path = os.path.join(LOCAL_MODEL_BASE, config["local_path"])
75
+ if os.path.exists(local_path):
76
+ print(f"Using pre-downloaded model at: {local_path}")
77
+ model_id = local_path
78
+ else:
79
+ print(f"Pre-downloaded model not found at {local_path}, will download from HuggingFace")
80
 
81
  if model_type == "local":
82
  return HuggingFaceLocal(