Spaces:
Build error
Build error
| # --- 1. Use a standard, reliable Python base --- | |
| FROM python:3.12-slim | |
| WORKDIR /app | |
| # --- 2. Install C/C++ build tools & venv --- | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| cmake \ | |
| python3-venv \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # --- 3. Download the model first --- | |
| # This uses a safe, isolated venv just for downloading | |
| COPY download_model.py . | |
| RUN --mount=type=secret,id=HF_TOKEN \ | |
| sh -c 'python3 -m venv /tmp/downloader-venv && \ | |
| . /tmp/downloader-venv/bin/activate && \ | |
| pip install huggingface_hub && \ | |
| python3 download_model.py' | |
| # --- 4. Build llama-cpp-python (CPU-ONLY) --- | |
| # This is the CRITICAL FIX. | |
| # This forces a fast, CPU-only build that will not time out. | |
| ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF" | |
| # This build step will now be fast (1-2 minutes) | |
| RUN pip install "llama-cpp-python[server]" | |
| # --- 5. Set the runtime command --- | |
| # Expose the port (matches README.md) | |
| EXPOSE 8000 | |
| # This command runs the server | |
| CMD [ \ | |
| "python", \ | |
| "-m", "llama_cpp.server", \ | |
| "--model", "prem-1B-SQL.Q8_0.gguf", \ | |
| "--n_gpu_layers", "0", \ | |
| "--port", "8000", \ | |
| "--host", "0.0.0.0", \ | |
| "--api_key_env_var", "API_KEY" \ | |
| ] |