File size: 3,490 Bytes
47ab61f
 
84d3814
 
 
 
 
 
 
a2141d8
 
 
84d3814
59d69df
47ab61f
84d3814
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47ab61f
84d3814
 
 
 
a2141d8
 
84d3814
 
 
a2141d8
84d3814
a2141d8
84d3814
 
a2141d8
84d3814
 
 
 
 
17c15f1
84d3814
8cea4e9
 
 
 
84d3814
 
 
 
 
 
 
 
 
 
 
170b26e
5c4d35f
4c6b4ee
84d3814
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
FROM python:3.10-slim

LABEL maintainer="Smart Parchi OCR v7"
LABEL description="Local Hybrid OCR: Qaari-0.1 + GOT-OCR fallback, CPU-only, 16GB RAM"

WORKDIR /app

# ── System Dependencies ───────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
    libgl1 \
    libglib2.0-0 \
    libgomp1 \
    libopenblas0 \
    && rm -rf /var/lib/apt/lists/*

# ── Cache directories (writable by HF Spaces non-root user) ──────────────────
RUN mkdir -p /app/data /.cache/huggingface /.cache/torch \
    && chmod -R 777 /app/data /.cache

# ── Python Dependencies ───────────────────────────────────────────────────────
COPY requirements.txt .

# Step 1: PyTorch CPU wheel (needs --extra-index-url, done separately)
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir \
        torch==2.4.0+cpu \
        torchvision==0.19.0+cpu \
        --extra-index-url https://download.pytorch.org/whl/cpu

# Step 2: All other requirements
RUN pip install --no-cache-dir -r requirements.txt

# ── Application Code ──────────────────────────────────────────────────────────
COPY . .

# ── CPU Thread Capping (2 vCPU HF Basic tier) ─────────────────────────────────
ENV OMP_NUM_THREADS=1
ENV OPENBLAS_NUM_THREADS=1
ENV MKL_NUM_THREADS=1
ENV NUMEXPR_NUM_THREADS=1
ENV TOKENIZERS_PARALLELISM=false

# ── HuggingFace Cache ─────────────────────────────────────────────────────────
ENV HF_HOME=/.cache/huggingface
ENV TRANSFORMERS_CACHE=/.cache/huggingface
ENV TORCH_HOME=/.cache/torch

# ── Application Settings ──────────────────────────────────────────────────────
ENV PYTHONUNBUFFERED=1
ENV PORT=7860
ENV DISABLE_TQDM=1
ENV HF_HUB_DISABLE_PROGRESS_BARS=1

# ── Model Selection (override via HF Space Secrets) ───────────────────────────
# Qaari is a PEFT LoRA adapter β€” requires a base model to be loaded first
# Base: Qwen2-VL-2B-Instruct (~4.5GB fp32)
ENV BASE_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
# Adapter: Qaari LoRA fine-tuned on Urdu Nastaliq (merged onto base at runtime)
ENV PRIMARY_MODEL_ID=oddadmix/Qaari-0.1-Urdu-OCR-VL-2B-Instruct
# Fallback layout model β€” 580MB, loaded ONLY if primary fails
ENV FALLBACK_MODEL_ID=stepfun-ai/GOT-OCR-2.0-hf
# Set 0 to disable fallback (saves startup time)
ENV ENABLE_FALLBACK=1
# RAM limit before disabling VLM (MB) β€” leaves ~4GB headroom on 16GB
ENV VLM_MEMORY_LIMIT_MB=12000
ENV VLM_MAX_NEW_TOKENS=512
ENV VLM_TIMEOUT_SECONDS=75
# Data path for persistent storage
ENV FEEDBACK_DATA_PATH=/app/data

EXPOSE 7860

# 1 worker β€” Qaari fp32 uses ~8 GB; two workers would OOM on 16 GB
CMD ["uvicorn", "app:app", \
     "--host", "0.0.0.0", \
     "--port", "7860", \
     "--workers", "1", \
     "--timeout-keep-alive", "120"]