Um34ER commited on
Commit
062dcf5
Β·
verified Β·
1 Parent(s): 4f0fc89

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -10
Dockerfile CHANGED
@@ -2,13 +2,14 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- # System dependencies
6
  RUN apt-get update && apt-get install -y \
7
  libgl1 \
8
- libglx0 \
9
  libglib2.0-0 \
10
  libgomp1 \
11
  libopenblas0 \
 
 
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
  # Create writable directories
@@ -25,9 +26,14 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
25
  --extra-index-url https://download.pytorch.org/whl/cpu && \
26
  pip install --no-cache-dir -r requirements.txt
27
 
 
 
 
 
 
28
  COPY . .
29
 
30
- # ── Environment Variables ─────────────────────────────────────────────────────
31
  ENV PYTHONUNBUFFERED=1
32
  ENV PORT=7860
33
  ENV TRANSFORMERS_CACHE=/.cache
@@ -36,24 +42,24 @@ ENV EASYOCR_CACHE=/.cache
36
  ENV FEEDBACK_DATA_PATH=/app/data
37
  ENV PADDLE_DOWNLOAD_CACHE=/.cache/paddlepaddle
38
 
39
- # CPU Thread Optimization (Crucial for 2-vCPU)
40
  ENV OMP_NUM_THREADS=1
41
  ENV MKL_NUM_THREADS=1
42
  ENV OPENBLAS_NUM_THREADS=1
43
 
44
- # ── VLM Optimization ──────────────────────
45
  ENV ENABLE_VLM=1
46
  ENV VLM_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
47
- ENV VLM_MAX_NEW_TOKENS=256
48
- # UPDATED: Increased timeout to 150s to avoid fallback to messy OCR
49
  ENV VLM_TIMEOUT_SECONDS=150
50
- # Self-disable VLM if RSS exceeds 12 GB
51
  ENV VLM_MEMORY_LIMIT_MB=12000
52
 
 
 
 
53
  EXPOSE 7860
54
 
55
- # ── Startup command ───────────────────────────────────────────────────────────
56
- # Increased timeout-keep-alive for the 4GB weight download
57
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", \
58
  "--workers", "1", \
59
  "--timeout-keep-alive", "300"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # System dependencies (including image codecs)
6
  RUN apt-get update && apt-get install -y \
7
  libgl1 \
 
8
  libglib2.0-0 \
9
  libgomp1 \
10
  libopenblas0 \
11
+ libjpeg62-turbo \
12
+ zlib1g \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
  # Create writable directories
 
26
  --extra-index-url https://download.pytorch.org/whl/cpu && \
27
  pip install --no-cache-dir -r requirements.txt
28
 
29
+ # Pre-download Qwen2-VL model (avoids first-request timeout)
30
+ RUN python -c "from transformers import AutoModelForCausalLM, AutoProcessor; \
31
+ AutoModelForCausalLM.from_pretrained('Qwen/Qwen2-VL-2B-Instruct', device_map='cpu'); \
32
+ AutoProcessor.from_pretrained('Qwen/Qwen2-VL-2B-Instruct')" || echo "Pre-download skipped"
33
+
34
  COPY . .
35
 
36
+ # Environment Variables
37
  ENV PYTHONUNBUFFERED=1
38
  ENV PORT=7860
39
  ENV TRANSFORMERS_CACHE=/.cache
 
42
  ENV FEEDBACK_DATA_PATH=/app/data
43
  ENV PADDLE_DOWNLOAD_CACHE=/.cache/paddlepaddle
44
 
45
+ # CPU Thread Optimization
46
  ENV OMP_NUM_THREADS=1
47
  ENV MKL_NUM_THREADS=1
48
  ENV OPENBLAS_NUM_THREADS=1
49
 
50
+ # VLM Configuration
51
  ENV ENABLE_VLM=1
52
  ENV VLM_MODEL_ID=Qwen/Qwen2-VL-2B-Instruct
53
+ ENV VLM_MAX_NEW_TOKENS=256
 
54
  ENV VLM_TIMEOUT_SECONDS=150
 
55
  ENV VLM_MEMORY_LIMIT_MB=12000
56
 
57
+ # Disable Paddle (saves memory)
58
+ ENV ENABLE_PADDLE=0
59
+
60
  EXPOSE 7860
61
 
62
+ # Startup command
 
63
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", \
64
  "--workers", "1", \
65
  "--timeout-keep-alive", "300"]