NS-Genai commited on
Commit
a6cc846
·
verified ·
1 Parent(s): 3e9bc10

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +11 -30
Dockerfile CHANGED
@@ -1,37 +1,18 @@
1
- # Use python 3.10-slim as base
2
- FROM python:3.10-slim
 
3
 
4
- # Set working directory
5
  WORKDIR /app
6
 
7
- # 1. Install BUILD DEPENDENCIES (Critical for compiling from source)
8
- # We need build-essential (gcc) and cmake to compile the library for Gemma 3 support.
9
- RUN apt-get update && apt-get install -y \
10
- build-essential \
11
- cmake \
12
- libgomp1 \
13
- git \
14
- && rm -rf /var/lib/apt/lists/*
15
 
16
- # 2. Upgrade pip to ensure it handles modern build processes
17
- RUN pip install --upgrade pip
18
-
19
- # 3. Install llama-cpp-python from SOURCE
20
- # We do NOT use the --extra-index-url flag here.
21
- # This forces pip to download the source code and compile it locally,
22
- # ensuring you get the latest architecture support.
23
- RUN CMAKE_ARGS="-DGGML_NATIVE=OFF" pip install llama-cpp-python --no-cache-dir --verbose
24
-
25
- # 4. Install server dependencies
26
- RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
27
-
28
- # 5. Setup Model
29
- RUN mkdir -p model
30
- COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
31
-
32
- # 6. Configure & Start Server
33
  ENV HOST=0.0.0.0
34
  ENV PORT=7860
35
- ENV MODEL=/app/model/model.gguf
36
 
37
- CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]
 
 
 
1
+ # Use the official lightweight C++ image from the main llama.cpp repo
2
+ # This image is pre-compiled and supports the newest architectures (Gemma 3)
3
+ FROM ghcr.io/ggml-org/llama.cpp:server
4
 
5
+ # Set the working directory
6
  WORKDIR /app
7
 
8
+ # Copy your model file
9
+ # Ensure the file 'model/gemma-3-finetuned.Q4_K_M.gguf' exists in your HF Space "Files" tab
10
+ COPY model/gemma-3-finetuned.Q4_K_M.gguf /app/model.gguf
 
 
 
 
 
11
 
12
+ # Expose the required port
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ENV HOST=0.0.0.0
14
  ENV PORT=7860
 
15
 
16
+ # Run the server binary directly (No Python)
17
+ # This uses the C++ 'llama-server' which is faster and supports Gemma 3
18
+ CMD ["-m", "/app/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n-gpu-layers", "0", "-c", "2048"]