vineelagampa commited on
Commit
d81e914
·
verified ·
1 Parent(s): 0697e1b

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -20
Dockerfile CHANGED
@@ -1,6 +1,4 @@
1
  # syntax=docker/dockerfile:1.6
2
-
3
- # ===== Python base (3.9) =====
4
  ARG PY_BASE=python:3.9-slim-bullseye
5
  FROM ${PY_BASE}
6
 
@@ -11,47 +9,46 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
11
  OMP_NUM_THREADS=1 \
12
  TRANSFORMERS_CACHE=/cache/hf
13
 
14
- # ---- System deps (Tesseract + libs OpenCV wheels expect) ----
 
 
 
 
15
  RUN apt-get update && apt-get install -y --no-install-recommends \
16
  ca-certificates curl \
17
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \
18
  libgl1 libglib2.0-0 \
19
  && rm -rf /var/lib/apt/lists/*
20
 
21
- # Tesseract language data path (Debian/Ubuntu)
22
  ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
23
 
24
- # ---- App setup ----
25
  WORKDIR /app
26
-
27
- # Install Python deps first for better layer caching
28
  COPY requirements.txt .
29
 
30
  RUN python -m pip install --upgrade pip setuptools wheel \
31
- && pip install -r requirements.txt
32
-
33
-
34
 
 
35
  RUN pip install --no-deps \
36
  "en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
37
 
38
- # RUN python -m spacy validate
 
 
 
 
 
 
 
 
 
39
 
40
-
41
-
42
- # Copy the rest of your app
43
  COPY . .
44
-
45
- # Writable caches (for HF/torch/tmp)
46
  RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp
47
 
48
- # App port & healthcheck
49
  ENV PORT=8000
50
  EXPOSE 8000
51
-
52
  HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
53
  CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
54
 
55
- # Use $PORT when present (Render sets it), default to 8000 locally - application started
56
  CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-8000}"]
57
-
 
1
  # syntax=docker/dockerfile:1.6
 
 
2
  ARG PY_BASE=python:3.9-slim-bullseye
3
  FROM ${PY_BASE}
4
 
 
9
  OMP_NUM_THREADS=1 \
10
  TRANSFORMERS_CACHE=/cache/hf
11
 
12
+ # Debug/cache-buster so Spaces definitely rebuilds this layer
13
+ ARG DEPS_REFRESH=2025-09-07-06
14
+ ENV DEPS_REFRESH=$DEPS_REFRESH
15
+ RUN echo "CACHEBUSTER=$DEPS_REFRESH"
16
+
17
  RUN apt-get update && apt-get install -y --no-install-recommends \
18
  ca-certificates curl \
19
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd \
20
  libgl1 libglib2.0-0 \
21
  && rm -rf /var/lib/apt/lists/*
22
 
 
23
  ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
24
 
 
25
  WORKDIR /app
 
 
26
  COPY requirements.txt .
27
 
28
  RUN python -m pip install --upgrade pip setuptools wheel \
29
+ && pip install --no-cache-dir -r requirements.txt
 
 
30
 
31
+ # Install spaCy model that matches spaCy 3.2.x WITHOUT pulling new deps
32
  RUN pip install --no-deps \
33
  "en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
34
 
35
+ # Quick runtime-safe sanity print (instead of `spacy validate`)
36
+ RUN python - <<'PY'
37
+ import sys, pkgutil
38
+ import pydantic, spacy, thinc, typing_extensions as t_ext
39
+ print("python:", sys.version.split()[0])
40
+ print("pydantic:", pydantic.__version__)
41
+ print("typing-extensions:", t_ext.__version__)
42
+ print("spacy:", spacy.__version__, "thinc:", thinc.__version__)
43
+ print("has en_core_web_sm:", bool(pkgutil.find_loader("en_core_web_sm")))
44
+ PY
45
 
 
 
 
46
  COPY . .
 
 
47
  RUN mkdir -p /cache/hf /tmp && chmod -R 777 /cache /tmp
48
 
 
49
  ENV PORT=8000
50
  EXPOSE 8000
 
51
  HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
52
  CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
53
 
 
54
  CMD ["sh","-c","uvicorn backend:app --host 0.0.0.0 --port ${PORT:-8000}"]