Spaces:

Um34ER
/

bazaar-bridge-ocr

Running

App Files Files Community

Um34ER commited on May 7

Commit

5c4d35f

verified ·

1 Parent(s): 9c13c8e

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +27 -54

Dockerfile CHANGED Viewed

@@ -1,65 +1,38 @@
 FROM python:3.10-slim
-WORKDIR /app
-# System dependencies (including image codecs)
-RUN apt-get update && apt-get install -y \
-    libgl1 \
-    libglib2.0-0 \
-    libgomp1 \
-    libopenblas0 \
-    libjpeg62-turbo \
-    zlib1g \
     && rm -rf /var/lib/apt/lists/*
-# Create writable directories
-RUN mkdir -p /app/data /app/logs /.cache /.cache/paddlepaddle /.cache/huggingface \
-    && chmod -R 777 /app/data /app/logs /.cache
 COPY requirements.txt .
-# Install PyTorch CPU first
-RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
-    pip install --no-cache-dir \
-        torch==2.4.0+cpu \
-        torchvision==0.19.0+cpu \
-        --extra-index-url https://download.pytorch.org/whl/cpu && \
-    pip install --no-cache-dir -r requirements.txt
-# Pre-download Qwen2-VL model (avoids first-request timeout)
-RUN python -c "from transformers import AutoModelForCausalLM, AutoProcessor; \
-    AutoModelForCausalLM.from_pretrained('Qwen/Qwen2-VL-2B-Instruct', device_map='cpu'); \
-    AutoProcessor.from_pretrained('Qwen/Qwen2-VL-2B-Instruct')" || echo "Pre-download skipped"
-COPY . .
-# Environment Variables
-ENV PYTHONUNBUFFERED=1
-ENV PORT=7860
-ENV TRANSFORMERS_CACHE=/.cache
-ENV HF_HOME=/.cache
-ENV EASYOCR_CACHE=/.cache
-ENV FEEDBACK_DATA_PATH=/app/data
-ENV PADDLE_DOWNLOAD_CACHE=/.cache/paddlepaddle
-# CPU Thread Optimization
-ENV OMP_NUM_THREADS=1
-ENV MKL_NUM_THREADS=1
-ENV OPENBLAS_NUM_THREADS=1
-# VLM Configuration
-ENV ENABLE_VLM=1
-ENV VLM_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
-ENV VLM_MAX_NEW_TOKENS=256
-ENV VLM_TIMEOUT_SECONDS=150
-ENV VLM_MEMORY_LIMIT_MB=12000
-# Disable Paddle (saves memory)
-ENV ENABLE_PADDLE=0
-EXPOSE 7860
-# Startup command
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", \
-     "--workers", "1", \
-     "--timeout-keep-alive", "300"]

+# ─────────────────────────────────────────────────────────────────────────────
+# Parchi OCR – Minimal CPU Edition
+# Target: Hugging Face Spaces  "CPU basic"  (2 vCPU, 16 GB RAM)
+# ─────────────────────────────────────────────────────────────────────────────
 FROM python:3.10-slim
+# ── OS deps (OpenCV headless needs libgl, libglib) ───────────────────────────
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libgl1 \
+        libglib2.0-0 \
+        libgomp1 \
     && rm -rf /var/lib/apt/lists/*
+# ── App directory ─────────────────────────────────────────────────────────────
+WORKDIR /app
+# ── Install Python deps ───────────────────────────────────────────────────────
 COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# ── Copy source ───────────────────────────────────────────────────────────────
+COPY app.py .
+# ── HuggingFace Spaces expects port 7860 ──────────────────────────────────────
+EXPOSE 7860
+# ── EasyOCR model cache dir (writable in HF Spaces) ──────────────────────────
+ENV EASYOCR_MODULE_PATH=/tmp/easyocr_models
+# ── CPU threading tuning (2 vCPU → 2 threads each) ───────────────────────────
+ENV OMP_NUM_THREADS=2
+ENV MKL_NUM_THREADS=2
+ENV OPENBLAS_NUM_THREADS=2
+ENV NUMEXPR_NUM_THREADS=2
+# ── Launch ────────────────────────────────────────────────────────────────────
+CMD ["python", "app.py"]