Update Dockerfile
Browse files- Dockerfile +31 -11
Dockerfile
CHANGED
|
@@ -1,17 +1,37 @@
|
|
| 1 |
-
# Use
|
| 2 |
-
FROM
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
WORKDIR /
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
#
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
ENV HOST=0.0.0.0
|
| 13 |
ENV PORT=7860
|
| 14 |
-
ENV MODEL=/
|
| 15 |
|
| 16 |
-
|
| 17 |
-
CMD ["python3", "-m", "llama_cpp.server", "--model", "/workspace/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]
|
|
|
|
| 1 |
+
# Use python 3.10-slim as base
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
|
| 7 |
+
# 1. Install BUILD DEPENDENCIES (Critical for compiling from source)
|
| 8 |
+
# We need build-essential (gcc) and cmake to compile the library for Gemma 3 support.
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
build-essential \
|
| 11 |
+
cmake \
|
| 12 |
+
libgomp1 \
|
| 13 |
+
git \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
|
| 16 |
+
# 2. Upgrade pip to ensure it handles modern build processes
|
| 17 |
+
RUN pip install --upgrade pip
|
| 18 |
+
|
| 19 |
+
# 3. Install llama-cpp-python from SOURCE
|
| 20 |
+
# We do NOT use the --extra-index-url flag here.
|
| 21 |
+
# This forces pip to download the source code and compile it locally,
|
| 22 |
+
# ensuring you get the latest architecture support.
|
| 23 |
+
RUN CMAKE_ARGS="-DGGML_NATIVE=OFF" pip install llama-cpp-python --no-cache-dir --verbose
|
| 24 |
+
|
| 25 |
+
# 4. Install server dependencies
|
| 26 |
+
RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
|
| 27 |
+
|
| 28 |
+
# 5. Setup Model
|
| 29 |
+
RUN mkdir -p model
|
| 30 |
+
COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
|
| 31 |
+
|
| 32 |
+
# 6. Configure & Start Server
|
| 33 |
ENV HOST=0.0.0.0
|
| 34 |
ENV PORT=7860
|
| 35 |
+
ENV MODEL=/app/model/model.gguf
|
| 36 |
|
| 37 |
+
CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]
|
|
|