Spaces:

Jacky2305
/

LLM_API

Running

LLM_API / Dockerfile

feat: switch to custom FastAPI app to avoid server issues

19c7563 about 11 hours ago

849 Bytes

	FROM python:3.11-slim

	WORKDIR /app

	# 安装构建依赖
	RUN apt-get update && apt-get install -y \
	build-essential \
	cmake \
	git \
	wget \
	&& rm -rf /var/lib/apt/lists/*

	# 安装 llama-cpp-python（带 server）和 huggingface_hub
	RUN pip install --no-cache-dir \
	--extra-index-url https://huggingface.co/James040/llama-cpp-python-wheels/simple/ \
	llama-cpp-python[server]==0.3.9 \
	huggingface_hub \
	transformers \
	sentencepiece

	# 下载模型（build 阶段缓存，避免每次冷启动重新拉）
	RUN python -c "\
	from huggingface_hub import hf_hub_download; \
	hf_hub_download(\
	repo_id='bartowski/Qwen2.5-3B-Instruct-GGUF', \
	filename='Qwen2.5-3B-Instruct-Q4_K_M.gguf', \
	local_dir='/app/models'\
	)"

	# 复制 FastAPI 应用
	COPY main.py /app/main.py

	EXPOSE 7860

	CMD ["python", "/app/main.py"]