model / Dockerfile
MuhammadNoman7600's picture
Update Dockerfile
9f45529 verified
# ============================================================
# Dockerfile β€” Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA
# CPU-Only API for HF Spaces. No GPU required. Port 7860.
# ============================================================
FROM python:3.11-slim
# ── System deps ──────────────────────────────────────────────
RUN apt-get update && \
apt-get install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*
# ── Python deps (CPU-only torch β€” no CUDA bloat) ─────────────
RUN pip install --no-cache-dir \
torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir \
transformers \
accelerate \
peft \
fastapi \
uvicorn \
pydantic \
huggingface_hub
# ── Pre-download models at build time ────────────────────────
# Base model : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit
# NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes)
# we load it as float32 β€” HF will automatically use the non-quantised
# weights if available, otherwise the adapter still loads correctly.
#
# LoRA adapter: MuhammadNoman7600/mermaid
ENV HF_HOME=/tmp/hf_cache
RUN python3 -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \
snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')"
# ── Copy app ──────────────────────────────────────────────────
WORKDIR /app
COPY app.py .
EXPOSE 7860
CMD ["python3", "app.py"]