Spaces:

PreethiCarmelBosco
/

prem-sql-api

Build error

PreethiCarmelBosco commited on Nov 15, 2025

Commit

f18f27b

verified ·

1 Parent(s): 42db10c

t

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,9 +1,10 @@
-# --- 1. Use the official Ollama pre-built image ---
-FROM ollama/ollama
-# --- 2. Install Python, pip, and the venv package ---
-# The base image is Debian, so we can use apt-get
-RUN apt-get update && apt-get install -y python3 python3-pip python3-venv
 # --- 3. Download the GGUF model using a safe venv ---
 WORKDIR /app
@@ -17,17 +18,25 @@ RUN --mount=type=secret,id=HF_TOKEN \
            pip install huggingface_hub && \
            python3 download_model.py'
-# --- 4. Create the Ollama "Modelfile" ---
-# This file tells Ollama to use our downloaded GGUF
-RUN echo "FROM /app/prem-1B-SQL.Q8_0.gguf" > /app/Modelfile
-# --- 5. Import the model into Ollama's registry ---
-# We start the server in the background (&), wait 5s for it to boot,
-# then run the 'create' command. The 'ollama serve' process
-# will automatically end when this RUN step completes.
-RUN sh -c 'ollama serve & \
-           sleep 5 && \
-           ollama create prem-sql-api -f /app/Modelfile'
-# The base image's default command ("ollama serve") will
-# now run and serve the "prem-sql-api" model we just created.

+# --- 1. Use a clean Python base image ---
+FROM python:3.12-slim
+# --- 2. Install build tools & python-venv ---
+# This is needed to compile llama-cpp-python from source
+# and to create a safe venv for downloading
+RUN apt-get update && apt-get install -y build-essential cmake python3-venv
 # --- 3. Download the GGUF model using a safe venv ---
 WORKDIR /app
            pip install huggingface_hub && \
            python3 download_model.py'
+# --- 4. Build llama-cpp-python (THE FAST, CPU-ONLY WAY) ---
+# This is the magic fix:
+# It tells the compiler to NOT build the heavy CUDA/GPU libraries.
+# This will prevent the build from timing out.
+ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
+RUN pip install "llama-cpp-python[server]"
+# --- 5. Run the Server ---
+# Expose port 8000 (which we defined in README.md)
+EXPOSE 8000
+# This is the command that will run when the container starts
+# It reads the API_KEY secret from the environment
+CMD [ \
+    "python", \
+    "-m", "llama_cpp.server", \
+    "--model", "/app/prem-1B-SQL.Q8_0.gguf", \
+    "--n_gpu_layers", "0", \
+    "--port", "8000", \
+    "--host", "0.0.0.0", \
+    "--api_key_env_var", "API_KEY" \
+]