|
|
FROM ubuntu:22.04 |
|
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y \ |
|
|
git cmake build-essential curl wget \ |
|
|
libcurl4-openssl-dev libssl-dev && \ |
|
|
rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
|
|
|
RUN git clone https://github.com/ggml-org/llama.cpp /opt/llama.cpp |
|
|
|
|
|
WORKDIR /opt/llama.cpp |
|
|
|
|
|
|
|
|
RUN mkdir build && cd build && \ |
|
|
cmake .. -DLLAMA_SERVER=ON -DLLAMA_CURL=ON && \ |
|
|
make -j$(nproc) |
|
|
|
|
|
|
|
|
ARG MODEL_URL=https://huggingface.co/ggml-org/gemma-3n-E2B-it-GGUF/resolve/main/gemma-3n-E2B-it-Q8_0.gguf |
|
|
|
|
|
|
|
|
RUN mkdir /models && \ |
|
|
wget -qO /models/model.gguf \ |
|
|
${MODEL_URL} |
|
|
|
|
|
|
|
|
EXPOSE 7860 |
|
|
|
|
|
|
|
|
ENTRYPOINT ["/opt/llama.cpp/build/bin/llama-server", \ |
|
|
"-m", "/models/model.gguf", \ |
|
|
"--threads", "4", "--threads-batch", "4", \ |
|
|
"--host", "0.0.0.0", "--port", "7860", \ |
|
|
"-np", "4", "--cont-batching", \ |
|
|
"--no-mmap", "--mlock", \ |
|
|
"--ctx-size", "2048"] |