anews9340 commited on
Commit
c802a0e
·
verified ·
1 Parent(s): 5b8a4b3

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +11 -7
Dockerfile CHANGED
@@ -24,24 +24,28 @@ WORKDIR $HOME/app
24
  # -DGGML_NATIVE=ON: Targets your specific Ice Lake instructions
25
  # -DGGML_AVX512 & VNNI: Uses the hardware acceleration flags found in your cpuinfo
26
  # -DGGML_CURL=ON: Enables the -hf downloading capability
27
- RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git . && \
28
- cmake -B build \
29
  -DCMAKE_BUILD_TYPE=Release \
30
  -DGGML_NATIVE=ON \
31
  -DGGML_AVX512=ON \
32
  -DGGML_AVX512_VNNI=ON \
33
  -DGGML_OPENMP=ON \
34
- -DGGML_CURL=ON && \
35
- cmake --build build --config Release -j 16 && \
36
- cp build/bin/llama-server . && \
37
- rm -rf build # Clean up build artifacts to reduce image size
 
 
 
 
38
 
39
  # 4. Final Server Configuration
40
  # -t 8: Optimized for your 8 physical cores (prevents hyperthreading slowdowns)
41
  # -hf: Pulls directly from Hugging Face
42
  # --host 0.0.0.0: Required for Hugging Face Spaces networking
43
  # --flash-attn: Uses AVX-512 optimized attention kernels
44
- ENTRYPOINT ["./llama-server"]
45
 
46
  CMD [ \
47
  "-hf", "unsloth/Qwen3.5-9B-GGUF:Q8_0", \
 
24
  # -DGGML_NATIVE=ON: Targets your specific Ice Lake instructions
25
  # -DGGML_AVX512 & VNNI: Uses the hardware acceleration flags found in your cpuinfo
26
  # -DGGML_CURL=ON: Enables the -hf downloading capability
27
+ RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git .
28
+ RUN cmake -B build \
29
  -DCMAKE_BUILD_TYPE=Release \
30
  -DGGML_NATIVE=ON \
31
  -DGGML_AVX512=ON \
32
  -DGGML_AVX512_VNNI=ON \
33
  -DGGML_OPENMP=ON \
34
+ -DGGML_BLAS=ON \
35
+ -DGGML_BLAS_VENDOR=OpenBLAS \
36
+ -DGGML_CURL=ON
37
+ RUN cmake --build build --config --target llama-server Release -j 8
38
+ RUN cp build/bin/llama-server . && \
39
+
40
+
41
+
42
 
43
  # 4. Final Server Configuration
44
  # -t 8: Optimized for your 8 physical cores (prevents hyperthreading slowdowns)
45
  # -hf: Pulls directly from Hugging Face
46
  # --host 0.0.0.0: Required for Hugging Face Spaces networking
47
  # --flash-attn: Uses AVX-512 optimized attention kernels
48
+ ENTRYPOINT ["./build/bin/llama-server"]
49
 
50
  CMD [ \
51
  "-hf", "unsloth/Qwen3.5-9B-GGUF:Q8_0", \