OrbitMC commited on
Commit
7cfb924
·
verified ·
1 Parent(s): b5462d9

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +46 -21
Dockerfile CHANGED
@@ -1,21 +1,46 @@
1
- from huggingface_hub import hf_hub_download
2
- from llama_cpp.server.app import create_app
3
- import uvicorn
4
-
5
- # 1. Download the model files
6
- repo = "unsloth/Qwen3.5-9B-GGUF"
7
- model_path = hf_hub_download(repo_id=repo, filename="Qwen3.5-9B-UD-Q4_K_XL.gguf")
8
- clip_path = hf_hub_download(repo_id=repo, filename="mmproj-BF16.gguf")
9
-
10
- # 2. Run the server (OpenAI Compatible)
11
- # This mimics exactly what your Docker CMD was doing
12
- if __name__ == "__main__":
13
- app = create_app(
14
- model_path=model_path,
15
- chat_format="chatml", # Qwen uses chatml
16
- n_ctx=128000,
17
- clip_model_path=clip_path # For vision/multimodal support
18
- )
19
-
20
- print("Server starting on http://0.0.0.0:7860/v1")
21
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build llama.cpp
2
+ FROM ubuntu:22.04 AS builder
3
+
4
+ RUN apt-get update && apt-get install -y \
5
+ build-essential \
6
+ cmake \
7
+ git \
8
+ libcurl4-openssl-dev \
9
+ python3-pip
10
+
11
+ # Clone raw llama.cpp
12
+ WORKDIR /app
13
+ RUN git clone https://github.com/ggml-org/llama.cpp.git .
14
+
15
+ # Build with UI DISABLED for a pure headless API
16
+ # We also enable cURL support for remote model loading if needed
17
+ RUN cmake -B build \
18
+ -DLLAMA_BUILD_WEBUI=OFF \
19
+ -DLLAMA_CURL=ON \
20
+ -DLLAMA_BUILD_EXAMPLES=OFF
21
+ RUN cmake --build build --config Release -j $(nproc) --target llama-server
22
+
23
+ # Stage 2: Runtime
24
+ FROM ubuntu:22.04
25
+
26
+ RUN apt-get update && apt-get install -y libcurl4 python3-pip && rm -rf /var/lib/apt/lists/*
27
+ RUN pip install huggingface_hub
28
+
29
+ WORKDIR /app
30
+ COPY --from=builder /app/build/bin/llama-server /app/llama-server
31
+
32
+ # Download official Qwen GGUF (Non-Unsloth)
33
+ RUN python3 -c 'from huggingface_hub import hf_hub_download; \
34
+ hf_hub_download(repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF", \
35
+ filename="qwen2.5-7b-instruct-q4_k_m.gguf", local_dir="/app")'
36
+
37
+ # HF Spaces run on 7860
38
+ EXPOSE 7860
39
+
40
+ # Run headless server
41
+ CMD ["./llama-server", \
42
+ "-m", "/app/qwen2.5-7b-instruct-q4_k_m.gguf", \
43
+ "--host", "0.0.0.0", \
44
+ "--port", "7860", \
45
+ "-c", "32768", \
46
+ "--embedding"]