PreethiCarmelBosco commited on
Commit
28eb58c
·
verified ·
1 Parent(s): 5d46304
Files changed (1) hide show
  1. Dockerfile +11 -18
Dockerfile CHANGED
@@ -2,36 +2,29 @@
2
  # This is a pre-built image with everything included.
3
  FROM ghcr.io/huggingface/text-generation-inference:latest
4
 
5
- # --- 2. Install Python & venv to download our model ---
6
- # We still need to download our model, so we add Python.
7
- RUN apt-get update && \
8
- apt-get install -y python3 python3-pip python3-venv && \
9
- rm -rf /var/lib/apt/lists/*
10
-
11
- # --- 3. Download the GGUF model ---
12
  WORKDIR /app
13
- COPY download_model.py .
14
 
 
15
  ARG HF_TOKEN
16
- # This command creates a venv, installs hf_hub, downloads the model,
17
- # and then the venv is discarded.
18
  RUN --mount=type=secret,id=HF_TOKEN \
19
- sh -c 'python3 -m venv /tmp/downloader-venv && \
20
- . /tmp/downloader-venv/bin/activate && \
21
- pip install huggingface_hub && \
22
- python3 download_model.py'
23
 
24
- # --- 4. Set the container's command to run TGI ---
25
  # This is the command that will run when the container starts.
26
- # It tells TGI to serve our GGUF model and to protect
27
- # the API with the key we set in our secrets.
28
  ENV MODEL_ID="/app/prem-1B-SQL.Q8_0.gguf"
29
 
30
  CMD [ \
31
  "text-generation-launcher", \
32
  "--model-id", "${MODEL_ID}", \
33
  "--quantize", "gguf", \
34
- # This is the fix: Changed from 80 to 8000
35
  "--port", "8000", \
36
  "--host", "0.0.0.0", \
37
  "--openai-api-key-env-var", "API_KEY" \
 
2
  # This is a pre-built image with everything included.
3
  FROM ghcr.io/huggingface/text-generation-inference:latest
4
 
5
+ # --- 2. Download the GGUF model using cURL ---
6
+ # We use cURL (which is already in the image) to avoid
7
+ # installing Python and causing version conflicts.
 
 
 
 
8
  WORKDIR /app
 
9
 
10
+ # Get the HF_TOKEN from the build secrets
11
  ARG HF_TOKEN
12
+
13
+ # Run the download command
14
  RUN --mount=type=secret,id=HF_TOKEN \
15
+ curl -L \
16
+ -H "Authorization: Bearer $(cat /run/secrets/HF_TOKEN)" \
17
+ "https://huggingface.co/mradermacher/prem-1B-SQL-GGUF/resolve/main/prem-1B-SQL.Q8_0.gguf" \
18
+ -o "prem-1B-SQL.Q8_0.gguf"
19
 
20
+ # --- 3. Set the container's command to run TGI ---
21
  # This is the command that will run when the container starts.
 
 
22
  ENV MODEL_ID="/app/prem-1B-SQL.Q8_0.gguf"
23
 
24
  CMD [ \
25
  "text-generation-launcher", \
26
  "--model-id", "${MODEL_ID}", \
27
  "--quantize", "gguf", \
 
28
  "--port", "8000", \
29
  "--host", "0.0.0.0", \
30
  "--openai-api-key-env-var", "API_KEY" \