NS-Genai commited on
Commit
b9ca278
·
verified ·
1 Parent(s): 0ad551a

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +13 -36
Dockerfile CHANGED
@@ -1,42 +1,19 @@
1
- # Use python 3.10
2
- FROM python:3.10-slim
 
3
 
4
- # Set working directory
5
  WORKDIR /app
6
 
7
- # Install system dependencies
8
- # libgomp1 is required for the pre-compiled binary to run
9
- RUN apt-get update && apt-get install -y \
10
- libgomp1 \
11
- && rm -rf /var/lib/apt/lists/*
12
 
13
- # --- CRITICAL FIX START ---
14
- # 1. Upgrade pip to the latest version.
15
- # Old versions (like 23.0) often fail to recognize the specific wheel tags used by llama-cpp-python.
16
- RUN pip install --upgrade pip
17
-
18
- # 2. Install llama-cpp-python using PRE-BUILT WHEELS.
19
- # We prefer binary to prevent falling back to the source build (which causes the gcc error).
20
- RUN pip install llama-cpp-python \
21
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
22
- --prefer-binary
23
-
24
- # 3. Install server dependencies
25
- RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
26
- # --- CRITICAL FIX END ---
27
-
28
- # Create model directory and ensure permissions
29
- RUN mkdir -p model && chmod 777 model
30
-
31
- # Copy the model file
32
- COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
33
-
34
- # Expose port 7860
35
  ENV PORT=7860
 
36
 
37
- # Run the server
38
- CMD python3 -m llama_cpp.server \
39
- --model model/model.gguf \
40
- --host 0.0.0.0 \
41
- --port 7860 \
42
- --n_ctx 2048
 
1
+ # Use the official image from the library author.
2
+ # This includes the correct pre-compiled binaries and system libraries.
3
+ FROM ghcr.io/abetlen/llama-cpp-python:latest
4
 
5
+ # Set the working directory
6
  WORKDIR /app
7
 
8
+ # Copy your model file into the container
9
+ # Ensure 'model/gemma-3-finetuned.Q4_K_M.gguf' exists in your Space's file list!
10
+ COPY model/gemma-3-finetuned.Q4_K_M.gguf /app/model/model.gguf
 
 
11
 
12
+ # Set environment variables for the server
13
+ # Hugging Face Spaces requires port 7860
14
+ ENV HOST=0.0.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ENV PORT=7860
16
+ ENV MODEL=/app/model/model.gguf
17
 
18
+ # Start the OpenAI-compatible server
19
+ CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]