PreethiCarmelBosco commited on
Commit
5d46304
·
verified ·
1 Parent(s): a47c89b
Files changed (1) hide show
  1. Dockerfile +21 -25
Dockerfile CHANGED
@@ -1,42 +1,38 @@
1
- # --- 1. Use a clean Python base image ---
2
- FROM python:3.12-slim
 
3
 
4
- # --- 2. Install build tools & python-venv ---
5
- # This is needed to compile llama-cpp-python from source
6
- # and to create a safe venv for downloading
7
- RUN apt-get update && apt-get install -y build-essential cmake python3-venv
 
8
 
9
- # --- 3. Download the GGUF model using a safe venv ---
10
  WORKDIR /app
11
  COPY download_model.py .
 
12
  ARG HF_TOKEN
13
  # This command creates a venv, installs hf_hub, downloads the model,
14
- # and then the venv is discarded, keeping the system clean.
15
  RUN --mount=type=secret,id=HF_TOKEN \
16
  sh -c 'python3 -m venv /tmp/downloader-venv && \
17
  . /tmp/downloader-venv/bin/activate && \
18
  pip install huggingface_hub && \
19
  python3 download_model.py'
20
 
21
- # --- 4. Build llama-cpp-python (THE FAST, CPU-ONLY WAY) ---
22
- # This is the magic fix:
23
- # It tells the compiler to NOT build the heavy CUDA/GPU libraries.
24
- # This will prevent the build from timing out.
25
- ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
26
- RUN pip install "llama-cpp-python[server]"
27
-
28
- # --- 5. Run the Server ---
29
- # Expose port 8000 (which we defined in README.md)
30
- EXPOSE 8000
31
 
32
- # This is the command that will run when the container starts
33
- # It reads the API_KEY secret from the environment
34
  CMD [ \
35
- "python", \
36
- "-m", "llama_cpp.server", \
37
- "--model", "/app/prem-1B-SQL.Q8_0.gguf", \
38
- "--n_gpu_layers", "0", \
39
  "--port", "8000", \
40
  "--host", "0.0.0.0", \
41
- "--api_key_env_var", "API_KEY" \
42
  ]
 
1
+ # --- 1. Use the official Hugging Face TGI image ---
2
+ # This is a pre-built image with everything included.
3
+ FROM ghcr.io/huggingface/text-generation-inference:latest
4
 
5
+ # --- 2. Install Python & venv to download our model ---
6
+ # We still need to download our model, so we add Python.
7
+ RUN apt-get update && \
8
+ apt-get install -y python3 python3-pip python3-venv && \
9
+ rm -rf /var/lib/apt/lists/*
10
 
11
+ # --- 3. Download the GGUF model ---
12
  WORKDIR /app
13
  COPY download_model.py .
14
+
15
  ARG HF_TOKEN
16
  # This command creates a venv, installs hf_hub, downloads the model,
17
+ # and then the venv is discarded.
18
  RUN --mount=type=secret,id=HF_TOKEN \
19
  sh -c 'python3 -m venv /tmp/downloader-venv && \
20
  . /tmp/downloader-venv/bin/activate && \
21
  pip install huggingface_hub && \
22
  python3 download_model.py'
23
 
24
+ # --- 4. Set the container's command to run TGI ---
25
+ # This is the command that will run when the container starts.
26
+ # It tells TGI to serve our GGUF model and to protect
27
+ # the API with the key we set in our secrets.
28
+ ENV MODEL_ID="/app/prem-1B-SQL.Q8_0.gguf"
 
 
 
 
 
29
 
 
 
30
  CMD [ \
31
+ "text-generation-launcher", \
32
+ "--model-id", "${MODEL_ID}", \
33
+ "--quantize", "gguf", \
34
+ # This is the fix: Changed from 80 to 8000
35
  "--port", "8000", \
36
  "--host", "0.0.0.0", \
37
+ "--openai-api-key-env-var", "API_KEY" \
38
  ]