dembasowmr commited on
Commit
df27f9e
·
1 Parent(s): 67e6481

Fix: Run apt-get as root before switching to user in Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -14
Dockerfile CHANGED
@@ -3,41 +3,44 @@
3
  # Use a base image with Python installed. Python 3.10 is fine.
4
  FROM python:3.10-slim-buster
5
 
6
- # Create the user and set it for subsequent commands
7
- RUN useradd -m -u 1000 user
8
- USER user
9
-
10
  # Set the working directory inside the container
11
  WORKDIR /app
12
 
13
- # Set the Hugging Face cache directory to a path where the 'user' has write permissions.
14
- # This prevents permission issues with /root/.cache and similar system paths.
15
- ENV HF_HOME=/app/huggingface_cache
16
-
17
- # Create the cache directory and ensure the 'user' owns it
18
- RUN mkdir -p $HF_HOME && chown user:user $HF_HOME
19
-
20
  # Install system dependencies needed for pdf2image (Poppler) and pytesseract (Tesseract)
 
 
21
  RUN apt-get update && apt-get install -y \
22
  libpoppler-dev \
23
  tesseract-ocr \
24
  tesseract-ocr-eng \
25
  tesseract-ocr-tur \
26
- tesseract-ocr-fra \
27
  tesseract-ocr-ara \
28
- # Add other languages if needed, e.g., tesseract-ocr-ara tesseract-ocr-fra
 
29
  && rm -rf /var/lib/apt/lists/*
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Copy requirements.txt and install Python dependencies
 
32
  COPY --chown=user:user requirements.txt .
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
  # Clean up any old Hugging Face Hub lock files before copying the application code.
36
- # This helps prevent PermissionErrors if a previous download attempt was interrupted.
37
  RUN rm -rf "${HF_HOME}/hub/tmp"
38
  RUN find "${HF_HOME}/hub/models--" -name "*.lock" -type f -delete || true
39
 
40
  # Copy the rest of your application code
 
41
  COPY --chown=user:user . /app
42
 
43
  # Set environment variables for Tesseract and potentially for Python's sqlite3 module
 
3
  # Use a base image with Python installed. Python 3.10 is fine.
4
  FROM python:3.10-slim-buster
5
 
 
 
 
 
6
  # Set the working directory inside the container
7
  WORKDIR /app
8
 
 
 
 
 
 
 
 
9
  # Install system dependencies needed for pdf2image (Poppler) and pytesseract (Tesseract)
10
+ # These RUN commands need to be executed as root.
11
+ # Added Arabic and French Tesseract language packs as per your request.
12
  RUN apt-get update && apt-get install -y \
13
  libpoppler-dev \
14
  tesseract-ocr \
15
  tesseract-ocr-eng \
16
  tesseract-ocr-tur \
 
17
  tesseract-ocr-ara \
18
+ tesseract-ocr-fra \
19
+ # Add other languages if needed
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
+ # Now, create the user and switch to it for non-root operations.
23
+ # All subsequent commands (COPY, RUN for pip, CMD) will be executed as 'user'.
24
+ RUN useradd -m -u 1000 user
25
+ USER user
26
+
27
+ # Set the Hugging Face cache directory to a path where the 'user' has write permissions.
28
+ ENV HF_HOME=/app/huggingface_cache
29
+
30
+ # Create the cache directory and ensure the 'user' owns it
31
+ RUN mkdir -p $HF_HOME && chown user:user $HF_HOME
32
+
33
  # Copy requirements.txt and install Python dependencies
34
+ # Ensure the user has ownership of copied files.
35
  COPY --chown=user:user requirements.txt .
36
  RUN pip install --no-cache-dir -r requirements.txt
37
 
38
  # Clean up any old Hugging Face Hub lock files before copying the application code.
 
39
  RUN rm -rf "${HF_HOME}/hub/tmp"
40
  RUN find "${HF_HOME}/hub/models--" -name "*.lock" -type f -delete || true
41
 
42
  # Copy the rest of your application code
43
+ # Ensure the user has ownership of copied files.
44
  COPY --chown=user:user . /app
45
 
46
  # Set environment variables for Tesseract and potentially for Python's sqlite3 module