# ============================================================ # Dockerfile — Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA # CPU-Only API for HF Spaces. No GPU required. Port 7860. # ============================================================ FROM python:3.11-slim # ── System deps ────────────────────────────────────────────── RUN apt-get update && \ apt-get install -y --no-install-recommends git && \ rm -rf /var/lib/apt/lists/* # ── Python deps (CPU-only torch — no CUDA bloat) ───────────── RUN pip install --no-cache-dir \ torch --index-url https://download.pytorch.org/whl/cpu RUN pip install --no-cache-dir \ transformers \ accelerate \ peft \ fastapi \ uvicorn \ pydantic \ huggingface_hub # ── Pre-download models at build time ──────────────────────── # Base model : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit # NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes) # we load it as float32 — HF will automatically use the non-quantised # weights if available, otherwise the adapter still loads correctly. # # LoRA adapter: MuhammadNoman7600/mermaid ENV HF_HOME=/tmp/hf_cache RUN python3 -c "\ from huggingface_hub import snapshot_download; \ snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \ snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')" # ── Copy app ────────────────────────────────────────────────── WORKDIR /app COPY app.py . EXPOSE 7860 CMD ["python3", "app.py"]