# ============================================================
#  Dockerfile — Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA
#  CPU-Only API for HF Spaces. No GPU required. Port 7860.
# ============================================================
FROM python:3.11-slim

# ── System deps ──────────────────────────────────────────────
RUN apt-get update && \
    apt-get install -y --no-install-recommends git && \
    rm -rf /var/lib/apt/lists/*

# ── Python deps (CPU-only torch — no CUDA bloat) ─────────────
RUN pip install --no-cache-dir \
    torch --index-url https://download.pytorch.org/whl/cpu

RUN pip install --no-cache-dir \
    transformers \
    accelerate \
    peft \
    fastapi \
    uvicorn \
    pydantic \
    huggingface_hub

# ── Pre-download models at build time ────────────────────────
# Base model  : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit
#   NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes)
#   we load it as float32 — HF will automatically use the non-quantised
#   weights if available, otherwise the adapter still loads correctly.
#
# LoRA adapter: MuhammadNoman7600/mermaid
ENV HF_HOME=/tmp/hf_cache

RUN python3 -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \
snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')"

# ── Copy app ──────────────────────────────────────────────────
WORKDIR /app
COPY app.py .

EXPOSE 7860
CMD ["python3", "app.py"]