anews9340 commited on
Commit
f5b5b66
·
verified ·
1 Parent(s): 9543bb6

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +54 -0
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM debian:bookworm-slim
2
+
3
+ # Prevent interactive prompts during package installation
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ # 1. Install build tools and CURL dependencies (needed for -hf flag)
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ cmake \
10
+ git \
11
+ libopenblas-dev \
12
+ libcurl4-openssl-dev \
13
+ libssl-dev \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # 2. Setup Hugging Face user (Required for Spaces)
17
+ RUN useradd -m -u 1000 user
18
+ USER user
19
+ ENV HOME=/home/user \
20
+ PATH=/home/user/.local/bin:$PATH
21
+ WORKDIR $HOME/app
22
+
23
+ # 3. Clone and Compile llama.cpp for Xeon Platinum 8375C
24
+ # -DGGML_NATIVE=ON: Targets your specific Ice Lake instructions
25
+ # -DGGML_AVX512 & VNNI: Uses the hardware acceleration flags found in your cpuinfo
26
+ # -DGGML_CURL=ON: Enables the -hf downloading capability
27
+ RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git . && \
28
+ cmake -B build \
29
+ -DCMAKE_BUILD_TYPE=Release \
30
+ -DGGML_NATIVE=ON \
31
+ -DGGML_AVX512=ON \
32
+ -DGGML_AVX512_VNNI=ON \
33
+ -DGGML_OPENMP=ON \
34
+ -DGGML_CURL=ON && \
35
+ cmake --build build --config Release -j $(nproc) && \
36
+ cp build/bin/llama-server . && \
37
+ rm -rf build # Clean up build artifacts to reduce image size
38
+
39
+ # 4. Final Server Configuration
40
+ # -t 8: Optimized for your 8 physical cores (prevents hyperthreading slowdowns)
41
+ # -hf: Pulls directly from Hugging Face
42
+ # --host 0.0.0.0: Required for Hugging Face Spaces networking
43
+ # --flash-attn: Uses AVX-512 optimized attention kernels
44
+ ENTRYPOINT ["./llama-server"]
45
+
46
+ CMD [ \
47
+ "-hf", "unsloth/Qwen3.5-9B-GGUF:Q8_0", \
48
+ "--host", "0.0.0.0", \
49
+ "--port", "7860", \
50
+ "-t", "8", \
51
+ "-c", "4096", \
52
+ "--flash-attn", \
53
+ "--no-mmap" \
54
+ ]