Update Dockerfile
Browse files- Dockerfile +11 -7
Dockerfile
CHANGED
|
@@ -24,24 +24,28 @@ WORKDIR $HOME/app
|
|
| 24 |
# -DGGML_NATIVE=ON: Targets your specific Ice Lake instructions
|
| 25 |
# -DGGML_AVX512 & VNNI: Uses the hardware acceleration flags found in your cpuinfo
|
| 26 |
# -DGGML_CURL=ON: Enables the -hf downloading capability
|
| 27 |
-
RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git .
|
| 28 |
-
|
| 29 |
-DCMAKE_BUILD_TYPE=Release \
|
| 30 |
-DGGML_NATIVE=ON \
|
| 31 |
-DGGML_AVX512=ON \
|
| 32 |
-DGGML_AVX512_VNNI=ON \
|
| 33 |
-DGGML_OPENMP=ON \
|
| 34 |
-
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# 4. Final Server Configuration
|
| 40 |
# -t 8: Optimized for your 8 physical cores (prevents hyperthreading slowdowns)
|
| 41 |
# -hf: Pulls directly from Hugging Face
|
| 42 |
# --host 0.0.0.0: Required for Hugging Face Spaces networking
|
| 43 |
# --flash-attn: Uses AVX-512 optimized attention kernels
|
| 44 |
-
ENTRYPOINT ["./llama-server"]
|
| 45 |
|
| 46 |
CMD [ \
|
| 47 |
"-hf", "unsloth/Qwen3.5-9B-GGUF:Q8_0", \
|
|
|
|
| 24 |
# -DGGML_NATIVE=ON: Targets your specific Ice Lake instructions
|
| 25 |
# -DGGML_AVX512 & VNNI: Uses the hardware acceleration flags found in your cpuinfo
|
| 26 |
# -DGGML_CURL=ON: Enables the -hf downloading capability
|
| 27 |
+
RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git .
|
| 28 |
+
RUN cmake -B build \
|
| 29 |
-DCMAKE_BUILD_TYPE=Release \
|
| 30 |
-DGGML_NATIVE=ON \
|
| 31 |
-DGGML_AVX512=ON \
|
| 32 |
-DGGML_AVX512_VNNI=ON \
|
| 33 |
-DGGML_OPENMP=ON \
|
| 34 |
+
-DGGML_BLAS=ON \
|
| 35 |
+
-DGGML_BLAS_VENDOR=OpenBLAS \
|
| 36 |
+
-DGGML_CURL=ON
|
| 37 |
+
RUN cmake --build build --config --target llama-server Release -j 8
|
| 38 |
+
RUN cp build/bin/llama-server . && \
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
|
| 43 |
# 4. Final Server Configuration
|
| 44 |
# -t 8: Optimized for your 8 physical cores (prevents hyperthreading slowdowns)
|
| 45 |
# -hf: Pulls directly from Hugging Face
|
| 46 |
# --host 0.0.0.0: Required for Hugging Face Spaces networking
|
| 47 |
# --flash-attn: Uses AVX-512 optimized attention kernels
|
| 48 |
+
ENTRYPOINT ["./build/bin/llama-server"]
|
| 49 |
|
| 50 |
CMD [ \
|
| 51 |
"-hf", "unsloth/Qwen3.5-9B-GGUF:Q8_0", \
|