Spaces:

MuhammadNoman7600
/

model

Sleeping

App Files Files Community

model / Dockerfile

MuhammadNoman7600

Update Dockerfile

9f45529 verified 3 months ago

Raw

History Blame Contribute Delete

1.8 kB

	# ============================================================
	# Dockerfile — Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA
	# CPU-Only API for HF Spaces. No GPU required. Port 7860.
	# ============================================================
	FROM python:3.11-slim

	# ── System deps ──────────────────────────────────────────────
	RUN apt-get update && \
	apt-get install -y --no-install-recommends git && \
	rm -rf /var/lib/apt/lists/*

	# ── Python deps (CPU-only torch — no CUDA bloat) ─────────────
	RUN pip install --no-cache-dir \
	torch --index-url https://download.pytorch.org/whl/cpu

	RUN pip install --no-cache-dir \
	transformers \
	accelerate \
	peft \
	fastapi \
	uvicorn \
	pydantic \
	huggingface_hub

	# ── Pre-download models at build time ────────────────────────
	# Base model : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit
	# NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes)
	# we load it as float32 — HF will automatically use the non-quantised
	# weights if available, otherwise the adapter still loads correctly.
	#
	# LoRA adapter: MuhammadNoman7600/mermaid
	ENV HF_HOME=/tmp/hf_cache

	RUN python3 -c "\
	from huggingface_hub import snapshot_download; \
	snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \
	snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')"

	# ── Copy app ──────────────────────────────────────────────────
	WORKDIR /app
	COPY app.py .

	EXPOSE 7860
	CMD ["python3", "app.py"]