Spaces:
Build error
Build error
- Dockerfile +26 -17
Dockerfile
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
-
# --- 1. Use
|
| 2 |
-
FROM
|
| 3 |
|
| 4 |
-
# --- 2. Install
|
| 5 |
-
#
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
# --- 3. Download the GGUF model using a safe venv ---
|
| 9 |
WORKDIR /app
|
|
@@ -17,17 +18,25 @@ RUN --mount=type=secret,id=HF_TOKEN \
|
|
| 17 |
pip install huggingface_hub && \
|
| 18 |
python3 download_model.py'
|
| 19 |
|
| 20 |
-
# --- 4.
|
| 21 |
-
# This
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
# --- 5.
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
# will automatically end when this RUN step completes.
|
| 28 |
-
RUN sh -c 'ollama serve & \
|
| 29 |
-
sleep 5 && \
|
| 30 |
-
ollama create prem-sql-api -f /app/Modelfile'
|
| 31 |
|
| 32 |
-
#
|
| 33 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- 1. Use a clean Python base image ---
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
|
| 4 |
+
# --- 2. Install build tools & python-venv ---
|
| 5 |
+
# This is needed to compile llama-cpp-python from source
|
| 6 |
+
# and to create a safe venv for downloading
|
| 7 |
+
RUN apt-get update && apt-get install -y build-essential cmake python3-venv
|
| 8 |
|
| 9 |
# --- 3. Download the GGUF model using a safe venv ---
|
| 10 |
WORKDIR /app
|
|
|
|
| 18 |
pip install huggingface_hub && \
|
| 19 |
python3 download_model.py'
|
| 20 |
|
| 21 |
+
# --- 4. Build llama-cpp-python (THE FAST, CPU-ONLY WAY) ---
|
| 22 |
+
# This is the magic fix:
|
| 23 |
+
# It tells the compiler to NOT build the heavy CUDA/GPU libraries.
|
| 24 |
+
# This will prevent the build from timing out.
|
| 25 |
+
ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
|
| 26 |
+
RUN pip install "llama-cpp-python[server]"
|
| 27 |
|
| 28 |
+
# --- 5. Run the Server ---
|
| 29 |
+
# Expose port 8000 (which we defined in README.md)
|
| 30 |
+
EXPOSE 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
# This is the command that will run when the container starts
|
| 33 |
+
# It reads the API_KEY secret from the environment
|
| 34 |
+
CMD [ \
|
| 35 |
+
"python", \
|
| 36 |
+
"-m", "llama_cpp.server", \
|
| 37 |
+
"--model", "/app/prem-1B-SQL.Q8_0.gguf", \
|
| 38 |
+
"--n_gpu_layers", "0", \
|
| 39 |
+
"--port", "8000", \
|
| 40 |
+
"--host", "0.0.0.0", \
|
| 41 |
+
"--api_key_env_var", "API_KEY" \
|
| 42 |
+
]
|