Spaces:
Running
Running
| # ============================================================ | |
| # Dockerfile β Qwen2.5-0.5B + MuhammadNoman7600/mermaid LoRA | |
| # CPU-Only API for HF Spaces. No GPU required. Port 7860. | |
| # ============================================================ | |
| FROM python:3.11-slim | |
| # ββ System deps ββββββββββββββββββββββββββββββββββββββββββββββ | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends git && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # ββ Python deps (CPU-only torch β no CUDA bloat) βββββββββββββ | |
| RUN pip install --no-cache-dir \ | |
| torch --index-url https://download.pytorch.org/whl/cpu | |
| RUN pip install --no-cache-dir \ | |
| transformers \ | |
| accelerate \ | |
| peft \ | |
| fastapi \ | |
| uvicorn \ | |
| pydantic \ | |
| huggingface_hub | |
| # ββ Pre-download models at build time ββββββββββββββββββββββββ | |
| # Base model : unsloth/qwen2.5-0.5b-unsloth-bnb-4bit | |
| # NOTE: This repo ships 4-bit safetensors. On CPU (no bitsandbytes) | |
| # we load it as float32 β HF will automatically use the non-quantised | |
| # weights if available, otherwise the adapter still loads correctly. | |
| # | |
| # LoRA adapter: MuhammadNoman7600/mermaid | |
| ENV HF_HOME=/tmp/hf_cache | |
| RUN python3 -c "\ | |
| from huggingface_hub import snapshot_download; \ | |
| snapshot_download('unsloth/qwen2.5-0.5b-unsloth-bnb-4bit', cache_dir='/tmp/hf_cache'); \ | |
| snapshot_download('MuhammadNoman7600/mermaid', cache_dir='/tmp/hf_cache')" | |
| # ββ Copy app ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WORKDIR /app | |
| COPY app.py . | |
| EXPOSE 7860 | |
| CMD ["python3", "app.py"] |