ubix commited on
Commit
7c204ea
·
verified ·
1 Parent(s): f00f318

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +6 -19
Dockerfile CHANGED
@@ -1,24 +1,11 @@
1
  FROM ghcr.io/ggml-org/llama.cpp:full
2
 
3
- # Install wget and other dependencies
4
- RUN apt-get update && apt-get install -y --no-install-recommends \
5
- wget \
6
- && rm -rf /var/lib/apt/lists/*
 
 
7
 
8
- # Download the specific GGUF file (not the HTML page)
9
- # Using Q4_K_M for optimal balance on free tier (2 vCPU, 16GB RAM)
10
- RUN wget --no-check-certificate "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf" \
11
- -O /model.gguf
12
 
13
- # Expose Hugging Face Spaces default port
14
- EXPOSE 7860
15
 
16
- # Run llama.cpp server with optimized settings for free tier
17
- ENTRYPOINT ["/llama-server"]
18
- CMD ["-m", "/model.gguf", \
19
- "--port", "7860", \
20
- "--host", "0.0.0.0", \
21
- "-c", "4096", \
22
- "-n", "512", \
23
- "--threads", "2", \
24
- "--threads-batch", "2"]
 
1
  FROM ghcr.io/ggml-org/llama.cpp:full
2
 
3
+ RUN apt update && apt install wget -y
4
+
5
+ RUN wget "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf" -O /qwen2.5-coder-7b-instruct-q4_k_m.gguf
6
+
7
+ CMD ["--server", "-m", "/qwen2.5-coder-7b-instruct-q4_k_m.gguf", "--port", "7860", "--host", "0.0.0.0", "-n", "512"]
8
+
9
 
 
 
 
 
10
 
 
 
11