Um34ER commited on
Commit
4c6b4ee
Β·
verified Β·
1 Parent(s): 99cc3b1

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +52 -23
Dockerfile CHANGED
@@ -11,37 +11,66 @@ RUN apt-get update && apt-get install -y \
11
  libopenblas0 \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
- # Create writable directories for Feedback, Cache, and Logs
15
- RUN mkdir -p /app/data /app/logs /.cache /.cache/paddlepaddle && chmod -R 777 /app/data /app/logs /.cache /.cache/paddlepaddle
 
16
 
17
- # Copy requirements first
18
  COPY requirements.txt .
19
 
20
- # Upgrade pip and install requirements with optimizations
 
 
21
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
 
 
 
 
22
  pip install --no-cache-dir -r requirements.txt
23
 
24
  # Copy the rest of the application
25
  COPY . .
26
 
27
- # Environment Variables (CPU optimization for 2GB RAM)
28
- ENV PYTHONUNBUFFERED=1 \
29
- PORT=7860 \
30
- TRANSFORMERS_CACHE=/.cache \
31
- EASYOCR_CACHE=/.cache \
32
- FEEDBACK_DATA_PATH=/app/data \
33
- PADDLE_DOWNLOAD_CACHE=/.cache/paddlepaddle \
34
- PADDLE_HOME=/.cache/paddlepaddle \
35
- PADDLE_INFERENCE_MODEL_CACHE=/.cache/paddlepaddle/models \
36
- OMP_NUM_THREADS=1 \
37
- OPENBLAS_NUM_THREADS=1 \
38
- MKL_NUM_THREADS=1 \
39
- NUMEXPR_NUM_THREADS=1 \
40
- DISABLE_TQDM=1 \
41
- HF_HUB_DISABLE_PROGRESS_BARS=1
42
-
43
- # Hugging Face default port
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  EXPOSE 7860
45
 
46
- # Fixed: Removed --timeout-notify
47
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "75"]
 
 
 
 
 
 
11
  libopenblas0 \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
+ # Create writable directories for model cache, feedback, and logs
15
+ RUN mkdir -p /app/data /app/logs /.cache /.cache/paddlepaddle /.cache/huggingface \
16
+ && chmod -R 777 /app/data /app/logs /.cache
17
 
18
+ # Copy requirements first (enables Docker layer caching)
19
  COPY requirements.txt .
20
 
21
+ # Install PyTorch CPU wheel FIRST (needs --extra-index-url),
22
+ # then install everything else from requirements.txt.
23
+ # Splitting into two pip calls avoids conflict resolution issues.
24
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
25
+ pip install --no-cache-dir \
26
+ torch==2.4.0+cpu \
27
+ torchvision==0.19.0+cpu \
28
+ --extra-index-url https://download.pytorch.org/whl/cpu && \
29
  pip install --no-cache-dir -r requirements.txt
30
 
31
  # Copy the rest of the application
32
  COPY . .
33
 
34
+ # ── Environment Variables ─────────────────────────────────────────────────────
35
+ ENV PYTHONUNBUFFERED=1
36
+ ENV PORT=7860
37
+
38
+ # Shared cache dir for HF hub weights, EasyOCR models, PaddleOCR models
39
+ ENV TRANSFORMERS_CACHE=/.cache
40
+ ENV HF_HOME=/.cache
41
+ ENV EASYOCR_CACHE=/.cache
42
+ ENV FEEDBACK_DATA_PATH=/app/data
43
+ ENV PADDLE_DOWNLOAD_CACHE=/.cache/paddlepaddle
44
+ ENV PADDLE_HOME=/.cache/paddlepaddle
45
+ ENV PADDLE_INFERENCE_MODEL_CACHE=/.cache/paddlepaddle/models
46
+
47
+ # CPU thread caps (2 vCPU HF Spaces Basic tier)
48
+ ENV OMP_NUM_THREADS=1
49
+ ENV OPENBLAS_NUM_THREADS=1
50
+ ENV MKL_NUM_THREADS=1
51
+ ENV NUMEXPR_NUM_THREADS=1
52
+
53
+ # Suppress verbose download progress in container logs
54
+ ENV DISABLE_TQDM=1
55
+ ENV HF_HUB_DISABLE_PROGRESS_BARS=1
56
+
57
+ # ── VLM Defaults (override via HF Space Secrets/Env UI) ──────────────────────
58
+ # Set ENABLE_VLM=0 to run EasyOCR+Paddle only (~700 MB RAM, no torch needed)
59
+ ENV ENABLE_VLM=1
60
+ ENV VLM_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
61
+ ENV VLM_MAX_NEW_TOKENS=512
62
+ # Abort VLM inference if it exceeds 60 s (falls back to EasyOCR)
63
+ ENV VLM_TIMEOUT_SECONDS=60
64
+ # Self-disable VLM if process RSS exceeds 12 GB (leaves 4 GB for OS/cache)
65
+ ENV VLM_MEMORY_LIMIT_MB=12000
66
+
67
+ # Hugging Face Spaces default port
68
  EXPOSE 7860
69
 
70
+ # ── Startup command ───────────────────────────────────────────────────────────
71
+ # workers=1: Qwen2-VL fp32 uses ~8 GB; two workers would OOM on 16 GB.
72
+ # timeout-keep-alive=120: allows VLM cold-start (first request downloads weights).
73
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", \
74
+ "--workers", "1", \
75
+ "--timeout-keep-alive", "120", \
76
+ "--timeout-notify", "60"]