Spaces:

hugh007
/

openwolf-text

Sleeping

openwolf-text / Dockerfile

fix: upgrade to Q6_K (600MB) better quality

82ad52b verified 21 days ago

920 Bytes

	FROM python:3.12

	WORKDIR /app

	# llama-cpp-python 预编译 wheel（30 秒，不需编译）
	RUN pip install --no-cache-dir --timeout 300 llama-cpp-python==0.3.23 \
	--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu

	# 下载 GGUF 模型（构建时打包）
	RUN apt-get update && apt-get install -y --no-install-recommends curl \
	&& rm -rf /var/lib/apt/lists/*
	RUN mkdir -p /app/models && \
	curl -sL -o /app/models/MiniCPM-V-4_6-Thinking-Q6_K.gguf \
	"https://huggingface.co/openbmb/MiniCPM-V-4.6-Thinking-gguf/resolve/main/MiniCPM-V-4_6-Thinking-Q6_K.gguf" && \
	curl -sL -o /app/models/mmproj-model-f16.gguf \
	"https://huggingface.co/openbmb/MiniCPM-V-4.6-Thinking-gguf/resolve/main/mmproj-model-f16.gguf"

	COPY requirements.txt .
	RUN pip install -r requirements.txt --no-cache-dir

	COPY app.py .

	EXPOSE 7860
	CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]