Spaces:
Running
Running
| FROM ghcr.io/ggml-org/llama.cpp:full | |
| WORKDIR /app | |
| # Python仮想環境を作るための最低限のものを入れる | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| python3-pip python3-venv \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # venv を作成 | |
| RUN python3 -m venv /opt/venv | |
| ENV PATH="/opt/venv/bin:$PATH" | |
| # venv にだけ pip パッケージを入れる | |
| RUN pip install --no-cache-dir -U huggingface_hub | |
| # モデルとmmprojをダウンロード | |
| RUN python3 -c 'from huggingface_hub import hf_hub_download; \ | |
| repo="openbmb/MiniCPM5-1B-GGUF"; \ | |
| hf_hub_download(repo_id=repo, filename="MiniCPM5-1B-Q4_K_M.gguf", local_dir="/app")' | |
| CMD ["--server", \ | |
| "-m", "/app/MiniCPM5-1B-Q4_K_M.gguf", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "-t", "3", \ | |
| "--mlock", \ | |
| "--prio", "0", \ | |
| "--swa-full", \ | |
| "--no-slots", \ | |
| "--mmap", \ | |
| "--log-disable", \ | |
| "--no-cont-batching", \ | |
| "--threads-http", "1", \ | |
| "--direct-io", \ | |
| "--flash-attn", "off", \ | |
| "-c", "64000", \ | |
| "-n", "38912"] |