Spaces:

PreethiCarmelBosco
/

prem-sql-api

Build error

App Files Files Community

PreethiCarmelBosco commited on Nov 15, 2025

Commit

5d46304

verified ·

1 Parent(s): a47c89b

w

Browse files

Files changed (1) hide show

Dockerfile +21 -25

Dockerfile CHANGED Viewed

@@ -1,42 +1,38 @@
-# --- 1. Use a clean Python base image ---
-FROM python:3.12-slim
-# --- 2. Install build tools & python-venv ---
-# This is needed to compile llama-cpp-python from source
-# and to create a safe venv for downloading
-RUN apt-get update && apt-get install -y build-essential cmake python3-venv
-# --- 3. Download the GGUF model using a safe venv ---
 WORKDIR /app
 COPY download_model.py .
 ARG HF_TOKEN
 # This command creates a venv, installs hf_hub, downloads the model,
-# and then the venv is discarded, keeping the system clean.
 RUN --mount=type=secret,id=HF_TOKEN \
     sh -c 'python3 -m venv /tmp/downloader-venv && \
            . /tmp/downloader-venv/bin/activate && \
            pip install huggingface_hub && \
            python3 download_model.py'
-# --- 4. Build llama-cpp-python (THE FAST, CPU-ONLY WAY) ---
-# This is the magic fix:
-# It tells the compiler to NOT build the heavy CUDA/GPU libraries.
-# This will prevent the build from timing out.
-ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
-RUN pip install "llama-cpp-python[server]"
-# --- 5. Run the Server ---
-# Expose port 8000 (which we defined in README.md)
-EXPOSE 8000
-# This is the command that will run when the container starts
-# It reads the API_KEY secret from the environment
 CMD [ \
-    "python", \
-    "-m", "llama_cpp.server", \
-    "--model", "/app/prem-1B-SQL.Q8_0.gguf", \
-    "--n_gpu_layers", "0", \
     "--port", "8000", \
     "--host", "0.0.0.0", \
-    "--api_key_env_var", "API_KEY" \
 ]

+# --- 1. Use the official Hugging Face TGI image ---
+# This is a pre-built image with everything included.
+FROM ghcr.io/huggingface/text-generation-inference:latest
+# --- 2. Install Python & venv to download our model ---
+# We still need to download our model, so we add Python.
+RUN apt-get update && \
+    apt-get install -y python3 python3-pip python3-venv && \
+    rm -rf /var/lib/apt/lists/*
+# --- 3. Download the GGUF model ---
 WORKDIR /app
 COPY download_model.py .
 ARG HF_TOKEN
 # This command creates a venv, installs hf_hub, downloads the model,
+# and then the venv is discarded.
 RUN --mount=type=secret,id=HF_TOKEN \
     sh -c 'python3 -m venv /tmp/downloader-venv && \
            . /tmp/downloader-venv/bin/activate && \
            pip install huggingface_hub && \
            python3 download_model.py'
+# --- 4. Set the container's command to run TGI ---
+# This is the command that will run when the container starts.
+# It tells TGI to serve our GGUF model and to protect
+# the API with the key we set in our secrets.
+ENV MODEL_ID="/app/prem-1B-SQL.Q8_0.gguf"
 CMD [ \
+    "text-generation-launcher", \
+    "--model-id", "${MODEL_ID}", \
+    "--quantize", "gguf", \
+    # This is the fix: Changed from 80 to 8000
     "--port", "8000", \
     "--host", "0.0.0.0", \
+    "--openai-api-key-env-var", "API_KEY" \
 ]