File size: 1,800 Bytes
39d2798
9f45529
 
39d2798
500cf7a
 
9f45529
39d2798
 
 
500cf7a
9f45529
39d2798
 
500cf7a
 
 
39d2798
9f45529
500cf7a
 
39d2798
 
 
9f45529
 
 
 
 
 
 
39d2798
9f45529
39d2798
 
9f45529
 
500cf7a
9f45529
500cf7a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# ============================================================
#  Dockerfile β€” Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA
#  CPU-Only API for HF Spaces. No GPU required. Port 7860.
# ============================================================
FROM python:3.11-slim

# ── System deps ──────────────────────────────────────────────
RUN apt-get update && \
    apt-get install -y --no-install-recommends git && \
    rm -rf /var/lib/apt/lists/*

# ── Python deps (CPU-only torch β€” no CUDA bloat) ─────────────
RUN pip install --no-cache-dir \
    torch --index-url https://download.pytorch.org/whl/cpu

RUN pip install --no-cache-dir \
    transformers \
    accelerate \
    peft \
    fastapi \
    uvicorn \
    pydantic \
    huggingface_hub

# ── Pre-download models at build time ────────────────────────
# Base model  : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit
#   NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes)
#   we load it as float32 β€” HF will automatically use the non-quantised
#   weights if available, otherwise the adapter still loads correctly.
#
# LoRA adapter: MuhammadNoman7600/mermaid
ENV HF_HOME=/tmp/hf_cache

RUN python3 -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \
snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')"

# ── Copy app ──────────────────────────────────────────────────
WORKDIR /app
COPY app.py .

EXPOSE 7860
CMD ["python3", "app.py"]