prem-sql-api / Dockerfile
PreethiCarmelBosco's picture
a
edd4ff1 verified
# --- 1. Use a standard, reliable Python base ---
FROM python:3.12-slim
WORKDIR /app
# --- 2. Install C/C++ build tools & venv ---
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
python3-venv \
&& rm -rf /var/lib/apt/lists/*
# --- 3. Download the model first ---
# This uses a safe, isolated venv just for downloading
COPY download_model.py .
RUN --mount=type=secret,id=HF_TOKEN \
sh -c 'python3 -m venv /tmp/downloader-venv && \
. /tmp/downloader-venv/bin/activate && \
pip install huggingface_hub && \
python3 download_model.py'
# --- 4. Build llama-cpp-python (CPU-ONLY) ---
# This is the CRITICAL FIX.
# This forces a fast, CPU-only build that will not time out.
ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
# This build step will now be fast (1-2 minutes)
RUN pip install "llama-cpp-python[server]"
# --- 5. Set the runtime command ---
# Expose the port (matches README.md)
EXPOSE 8000
# This command runs the server
CMD [ \
"python", \
"-m", "llama_cpp.server", \
"--model", "prem-1B-SQL.Q8_0.gguf", \
"--n_gpu_layers", "0", \
"--port", "8000", \
"--host", "0.0.0.0", \
"--api_key_env_var", "API_KEY" \
]