Spaces:

Um34ER
/

bazaar-bridge-ocr

Running

App Files Files Community

bazaar-bridge-ocr / Dockerfile

Um34ER

Update Dockerfile

8cea4e9 verified 16 days ago

raw

history blame contribute delete

3.49 kB

	FROM python:3.10-slim

	LABEL maintainer="Smart Parchi OCR v7"
	LABEL description="Local Hybrid OCR: Qaari-0.1 + GOT-OCR fallback, CPU-only, 16GB RAM"

	WORKDIR /app

	# ── System Dependencies ───────────────────────────────────────────────────────
	RUN apt-get update && apt-get install -y --no-install-recommends \
	libgl1 \
	libglib2.0-0 \
	libgomp1 \
	libopenblas0 \
	&& rm -rf /var/lib/apt/lists/*

	# ── Cache directories (writable by HF Spaces non-root user) ──────────────────
	RUN mkdir -p /app/data /.cache/huggingface /.cache/torch \
	&& chmod -R 777 /app/data /.cache

	# ── Python Dependencies ───────────────────────────────────────────────────────
	COPY requirements.txt .

	# Step 1: PyTorch CPU wheel (needs --extra-index-url, done separately)
	RUN pip install --no-cache-dir --upgrade pip && \
	pip install --no-cache-dir \
	torch==2.4.0+cpu \
	torchvision==0.19.0+cpu \
	--extra-index-url https://download.pytorch.org/whl/cpu

	# Step 2: All other requirements
	RUN pip install --no-cache-dir -r requirements.txt

	# ── Application Code ──────────────────────────────────────────────────────────
	COPY . .

	# ── CPU Thread Capping (2 vCPU HF Basic tier) ─────────────────────────────────
	ENV OMP_NUM_THREADS=1
	ENV OPENBLAS_NUM_THREADS=1
	ENV MKL_NUM_THREADS=1
	ENV NUMEXPR_NUM_THREADS=1
	ENV TOKENIZERS_PARALLELISM=false

	# ── HuggingFace Cache ─────────────────────────────────────────────────────────
	ENV HF_HOME=/.cache/huggingface
	ENV TRANSFORMERS_CACHE=/.cache/huggingface
	ENV TORCH_HOME=/.cache/torch

	# ── Application Settings ──────────────────────────────────────────────────────
	ENV PYTHONUNBUFFERED=1
	ENV PORT=7860
	ENV DISABLE_TQDM=1
	ENV HF_HUB_DISABLE_PROGRESS_BARS=1

	# ── Model Selection (override via HF Space Secrets) ───────────────────────────
	# Qaari is a PEFT LoRA adapter — requires a base model to be loaded first
	# Base: Qwen2-VL-2B-Instruct (~4.5GB fp32)
	ENV BASE_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
	# Adapter: Qaari LoRA fine-tuned on Urdu Nastaliq (merged onto base at runtime)
	ENV PRIMARY_MODEL_ID=oddadmix/Qaari-0.1-Urdu-OCR-VL-2B-Instruct
	# Fallback layout model — 580MB, loaded ONLY if primary fails
	ENV FALLBACK_MODEL_ID=stepfun-ai/GOT-OCR-2.0-hf
	# Set 0 to disable fallback (saves startup time)
	ENV ENABLE_FALLBACK=1
	# RAM limit before disabling VLM (MB) — leaves ~4GB headroom on 16GB
	ENV VLM_MEMORY_LIMIT_MB=12000
	ENV VLM_MAX_NEW_TOKENS=512
	ENV VLM_TIMEOUT_SECONDS=75
	# Data path for persistent storage
	ENV FEEDBACK_DATA_PATH=/app/data

	EXPOSE 7860

	# 1 worker — Qaari fp32 uses ~8 GB; two workers would OOM on 16 GB
	CMD ["uvicorn", "app:app", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--workers", "1", \
	"--timeout-keep-alive", "120"]