Vaibuzzz commited on
Commit
5dda852
·
verified ·
1 Parent(s): b0a32bd

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. Dockerfile +10 -16
Dockerfile CHANGED
@@ -21,33 +21,27 @@ RUN apt-get update && apt-get install -y \
21
  RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
22
  RUN python3.10 -m pip install --no-cache-dir --upgrade pip
23
 
24
- # 3. Install Python requirements (Docling, Groq SDK, FastAPI, etc.)
25
  WORKDIR /app
26
  COPY requirements.txt /app/requirements.txt
27
  RUN python3.10 -m pip install --no-cache-dir -r requirements.txt
28
 
29
- # 4. FIX: Pre-download all Docling/RapidOCR model weights at BUILD TIME
30
- # as root so they land in a writable location.
31
- # Without this, Docling tries to download them at runtime into a
32
- # system dir where the non-root "ubuntu" user has no write permission.
33
- ENV DOCLING_ARTIFACTS_PATH=/opt/docling-models
34
- RUN python3.10 -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; StandardPdfPipeline.download_models_hf(force=True)" || true
35
 
36
- # 5. Open permissions on the model cache so the runtime user can read/update them
37
- RUN chmod -R 777 /opt/docling-models 2>/dev/null || true && \
38
- chmod -R 777 /usr/local/lib/python3.10/dist-packages/rapidocr 2>/dev/null || true
39
-
40
- # 6. Ubuntu 24.04 already has UID 1000 as user "ubuntu" — use it directly
41
  RUN chown -R ubuntu:ubuntu /app
42
  USER ubuntu
43
  ENV HOME=/home/ubuntu \
44
- PATH=/home/ubuntu/.local/bin:$PATH \
45
- DOCLING_ARTIFACTS_PATH=/opt/docling-models
46
 
47
- # 7. Copy application code
48
  COPY --chown=ubuntu . /app
49
 
50
- # 8. Block HF persistent user cache interference
51
  ENV PYTHONNOUSERSITE=1
52
 
53
  EXPOSE 7860
 
21
  RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
22
  RUN python3.10 -m pip install --no-cache-dir --upgrade pip
23
 
24
+ # 3. Install Python requirements
25
  WORKDIR /app
26
  COPY requirements.txt /app/requirements.txt
27
  RUN python3.10 -m pip install --no-cache-dir -r requirements.txt
28
 
29
+ # 4. FIX: RapidOCR downloads weights at runtime into the system python dir.
30
+ # By making the rapidocr directory globally writable, the non-root 'ubuntu'
31
+ # user can successfully save the weights there on the first request.
32
+ RUN mkdir -p /usr/local/lib/python3.10/dist-packages/rapidocr/models && \
33
+ chmod -R 777 /usr/local/lib/python3.10/dist-packages/rapidocr
 
34
 
35
+ # 5. Ubuntu 24.04 already has UID 1000 as user "ubuntu" use it directly
 
 
 
 
36
  RUN chown -R ubuntu:ubuntu /app
37
  USER ubuntu
38
  ENV HOME=/home/ubuntu \
39
+ PATH=/home/ubuntu/.local/bin:$PATH
 
40
 
41
+ # 6. Copy application code
42
  COPY --chown=ubuntu . /app
43
 
44
+ # 7. Block HF persistent user cache interference
45
  ENV PYTHONNOUSERSITE=1
46
 
47
  EXPOSE 7860