Spaces:
Running
Running
File size: 1,800 Bytes
39d2798 9f45529 39d2798 500cf7a 9f45529 39d2798 500cf7a 9f45529 39d2798 500cf7a 39d2798 9f45529 500cf7a 39d2798 9f45529 39d2798 9f45529 39d2798 9f45529 500cf7a 9f45529 500cf7a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | # ============================================================
# Dockerfile β Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA
# CPU-Only API for HF Spaces. No GPU required. Port 7860.
# ============================================================
FROM python:3.11-slim
# ββ System deps ββββββββββββββββββββββββββββββββββββββββββββββ
RUN apt-get update && \
apt-get install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*
# ββ Python deps (CPU-only torch β no CUDA bloat) βββββββββββββ
RUN pip install --no-cache-dir \
torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir \
transformers \
accelerate \
peft \
fastapi \
uvicorn \
pydantic \
huggingface_hub
# ββ Pre-download models at build time ββββββββββββββββββββββββ
# Base model : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit
# NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes)
# we load it as float32 β HF will automatically use the non-quantised
# weights if available, otherwise the adapter still loads correctly.
#
# LoRA adapter: MuhammadNoman7600/mermaid
ENV HF_HOME=/tmp/hf_cache
RUN python3 -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \
snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')"
# ββ Copy app ββββββββββββββββββββββββββββββββββββββββββββββββββ
WORKDIR /app
COPY app.py .
EXPOSE 7860
CMD ["python3", "app.py"] |