| FROM debian:bookworm-slim |
|
|
| |
| |
| RUN apt-get update && apt-get install -y \ |
| build-essential \ |
| cmake \ |
| git \ |
| pkg-config \ |
| libcurl4-openssl-dev \ |
| libssl-dev \ |
| libopenblas-dev \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| |
| RUN useradd -m -u 1000 user |
| USER user |
| ENV HOME=/home/user \ |
| PATH=/home/user/.local/bin:$PATH |
| WORKDIR $HOME/app |
|
|
| |
| |
| RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git . && \ |
| cmake -B build \ |
| -DCMAKE_BUILD_TYPE=Release \ |
| -DGGML_NATIVE=ON \ |
| -DGGML_AVX512=ON \ |
| -DGGML_AVX512_VNNI=ON \ |
| -DGGML_OPENMP=ON \ |
| -DGGML_BLAS=ON \ |
| -DGGML_BLAS_VENDOR=OpenBLAS \ |
| -DGGML_CURL=ON && \ |
| cmake --build build --config Release --target llama-server -j 8 |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| ENTRYPOINT ["./build/bin/llama-server"] |
|
|
| CMD [ \ |
| "-hf", "unsloth/Qwen3.5-4B-GGUF:Q8_0", \ |
| "--host", "0.0.0.0", \ |
| "--port", "7860", \ |
| "-t", "8", \ |
| "-c", "4096", \ |
| "--flash-attn", "true", \ |
| "--no-mmap" \ |
| ] |