FROM python:3.10-slim LABEL maintainer="Smart Parchi OCR v7" LABEL description="Local Hybrid OCR: Qaari-0.1 + GOT-OCR fallback, CPU-only, 16GB RAM" WORKDIR /app # ── System Dependencies ─────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ libgl1 \ libglib2.0-0 \ libgomp1 \ libopenblas0 \ && rm -rf /var/lib/apt/lists/* # ── Cache directories (writable by HF Spaces non-root user) ────────────────── RUN mkdir -p /app/data /.cache/huggingface /.cache/torch \ && chmod -R 777 /app/data /.cache # ── Python Dependencies ─────────────────────────────────────────────────────── COPY requirements.txt . # Step 1: PyTorch CPU wheel (needs --extra-index-url, done separately) RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir \ torch==2.4.0+cpu \ torchvision==0.19.0+cpu \ --extra-index-url https://download.pytorch.org/whl/cpu # Step 2: All other requirements RUN pip install --no-cache-dir -r requirements.txt # ── Application Code ────────────────────────────────────────────────────────── COPY . . # ── CPU Thread Capping (2 vCPU HF Basic tier) ───────────────────────────────── ENV OMP_NUM_THREADS=1 ENV OPENBLAS_NUM_THREADS=1 ENV MKL_NUM_THREADS=1 ENV NUMEXPR_NUM_THREADS=1 ENV TOKENIZERS_PARALLELISM=false # ── HuggingFace Cache ───────────────────────────────────────────────────────── ENV HF_HOME=/.cache/huggingface ENV TRANSFORMERS_CACHE=/.cache/huggingface ENV TORCH_HOME=/.cache/torch # ── Application Settings ────────────────────────────────────────────────────── ENV PYTHONUNBUFFERED=1 ENV PORT=7860 ENV DISABLE_TQDM=1 ENV HF_HUB_DISABLE_PROGRESS_BARS=1 # ── Model Selection (override via HF Space Secrets) ─────────────────────────── # Qaari is a PEFT LoRA adapter — requires a base model to be loaded first # Base: Qwen2-VL-2B-Instruct (~4.5GB fp32) ENV BASE_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct # Adapter: Qaari LoRA fine-tuned on Urdu Nastaliq (merged onto base at runtime) ENV PRIMARY_MODEL_ID=oddadmix/Qaari-0.1-Urdu-OCR-VL-2B-Instruct # Fallback layout model — 580MB, loaded ONLY if primary fails ENV FALLBACK_MODEL_ID=stepfun-ai/GOT-OCR-2.0-hf # Set 0 to disable fallback (saves startup time) ENV ENABLE_FALLBACK=1 # RAM limit before disabling VLM (MB) — leaves ~4GB headroom on 16GB ENV VLM_MEMORY_LIMIT_MB=12000 ENV VLM_MAX_NEW_TOKENS=512 ENV VLM_TIMEOUT_SECONDS=75 # Data path for persistent storage ENV FEEDBACK_DATA_PATH=/app/data EXPOSE 7860 # 1 worker — Qaari fp32 uses ~8 GB; two workers would OOM on 16 GB CMD ["uvicorn", "app:app", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--workers", "1", \ "--timeout-keep-alive", "120"]