FROM python:3.9-slim WORKDIR /app # Set environment variables to prevent root filesystem access ENV TEMP_DIR=/tmp/docling_temp ENV HOME=/tmp/docling_temp ENV USERPROFILE=/tmp/docling_temp ENV TMPDIR=/tmp/docling_temp ENV TEMP=/tmp/docling_temp ENV TMP=/tmp/docling_temp # Hugging Face Hub configuration - CRITICAL for preventing /.cache access ENV HF_HOME=/tmp/docling_temp/huggingface ENV HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache ENV HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache ENV TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache ENV HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache ENV DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache ENV ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache # Additional Hugging Face specific variables ENV HF_HUB_DISABLE_TELEMETRY=1 ENV HF_HUB_DISABLE_IMPLICIT_TOKEN=1 ENV HF_HUB_OFFLINE=0 # Other ML libraries ENV TORCH_HOME=/tmp/docling_temp/torch ENV TENSORFLOW_HOME=/tmp/docling_temp/tensorflow ENV KERAS_HOME=/tmp/docling_temp/keras # XDG directories ENV XDG_CACHE_HOME=/tmp/docling_temp/cache ENV XDG_CONFIG_HOME=/tmp/docling_temp/config ENV XDG_DATA_HOME=/tmp/docling_temp/data # EasyOCR configuration ENV EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models # Additional cache directories ENV CACHE_DIR=/tmp/docling_temp/cache ENV MODEL_CACHE_DIR=/tmp/docling_temp/models ENV CACHE=/tmp/docling_temp/cache ENV MODELS=/tmp/docling_temp/models ENV DATA=/tmp/docling_temp/data ENV CONFIG=/tmp/docling_temp/config # Python path ENV PYTHONPATH=/tmp/docling_temp RUN apt-get update && apt-get install -y \ build-essential \ curl \ software-properties-common \ git \ && rm -rf /var/lib/apt/lists/* # Create necessary directories with proper permissions RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache /tmp/torch /tmp/tensorflow /tmp/keras /tmp/accelerate_cache /tmp/diffusers_cache /tmp/models && \ chmod 755 /app/.streamlit && \ chmod 777 /tmp/docling_temp && \ chmod 777 /tmp/easyocr_models && \ chmod 777 /tmp/cache && \ chmod 777 /tmp/config && \ chmod 777 /tmp/data && \ chmod 777 /tmp/huggingface && \ chmod 777 /tmp/huggingface_cache && \ chmod 777 /tmp/transformers_cache && \ chmod 777 /tmp/datasets_cache && \ chmod 777 /tmp/torch && \ chmod 777 /tmp/tensorflow && \ chmod 777 /tmp/keras && \ chmod 777 /tmp/accelerate_cache && \ chmod 777 /tmp/diffusers_cache && \ chmod 777 /tmp/models COPY requirements.txt ./ COPY pyproject.toml ./ COPY src/ ./src/ COPY test_permissions.py ./ COPY README.md ./ # Create Streamlit config directly in Dockerfile to avoid copy issues RUN echo '[global]' > /app/.streamlit/config.toml && \ echo 'developmentMode = false' >> /app/.streamlit/config.toml && \ echo '' >> /app/.streamlit/config.toml && \ echo '[server]' >> /app/.streamlit/config.toml && \ echo 'fileWatcherType = "none"' >> /app/.streamlit/config.toml && \ echo 'headless = true' >> /app/.streamlit/config.toml && \ echo 'enableCORS = false' >> /app/.streamlit/config.toml && \ echo 'enableXsrfProtection = false' >> /app/.streamlit/config.toml && \ echo '' >> /app/.streamlit/config.toml && \ echo '[browser]' >> /app/.streamlit/config.toml && \ echo 'gatherUsageStats = false' >> /app/.streamlit/config.toml && \ echo 'serverAddress = "0.0.0.0"' >> /app/.streamlit/config.toml && \ echo 'serverPort = 8501' >> /app/.streamlit/config.toml && \ echo '' >> /app/.streamlit/config.toml && \ echo '[theme]' >> /app/.streamlit/config.toml && \ echo 'primaryColor = "#1f77b4"' >> /app/.streamlit/config.toml && \ echo 'backgroundColor = "#ffffff"' >> /app/.streamlit/config.toml && \ echo 'secondaryBackgroundColor = "#f0f2f6"' >> /app/.streamlit/config.toml && \ echo 'textColor = "#262730"' >> /app/.streamlit/config.toml RUN pip3 install -r requirements.txt EXPOSE 8501 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health # Create a startup script to ensure environment variables are set RUN echo '#!/bin/bash' > /app/start.sh && \ echo 'export TEMP_DIR=/tmp/docling_temp' >> /app/start.sh && \ echo 'export HOME=/tmp/docling_temp' >> /app/start.sh && \ echo 'export USERPROFILE=/tmp/docling_temp' >> /app/start.sh && \ echo 'export TMPDIR=/tmp/docling_temp' >> /app/start.sh && \ echo 'export TEMP=/tmp/docling_temp' >> /app/start.sh && \ echo 'export TMP=/tmp/docling_temp' >> /app/start.sh && \ echo 'export HF_HOME=/tmp/docling_temp/huggingface' >> /app/start.sh && \ echo 'export HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \ echo 'export HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \ echo 'export TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache' >> /app/start.sh && \ echo 'export HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache' >> /app/start.sh && \ echo 'export DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache' >> /app/start.sh && \ echo 'export ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache' >> /app/start.sh && \ echo 'export HF_HUB_DISABLE_TELEMETRY=1' >> /app/start.sh && \ echo 'export HF_HUB_DISABLE_IMPLICIT_TOKEN=1' >> /app/start.sh && \ echo 'export HF_HUB_OFFLINE=0' >> /app/start.sh && \ echo 'export TORCH_HOME=/tmp/docling_temp/torch' >> /app/start.sh && \ echo 'export TENSORFLOW_HOME=/tmp/docling_temp/tensorflow' >> /app/start.sh && \ echo 'export KERAS_HOME=/tmp/docling_temp/keras' >> /app/start.sh && \ echo 'export XDG_CACHE_HOME=/tmp/docling_temp/cache' >> /app/start.sh && \ echo 'export XDG_CONFIG_HOME=/tmp/docling_temp/config' >> /app/start.sh && \ echo 'export XDG_DATA_HOME=/tmp/docling_temp/data' >> /app/start.sh && \ echo 'export EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models' >> /app/start.sh && \ echo 'export CACHE_DIR=/tmp/docling_temp/cache' >> /app/start.sh && \ echo 'export MODEL_CACHE_DIR=/tmp/docling_temp/models' >> /app/start.sh && \ echo 'export CACHE=/tmp/docling_temp/cache' >> /app/start.sh && \ echo 'export MODELS=/tmp/docling_temp/models' >> /app/start.sh && \ echo 'export DATA=/tmp/docling_temp/data' >> /app/start.sh && \ echo 'export CONFIG=/tmp/docling_temp/config' >> /app/start.sh && \ echo 'export PYTHONPATH=/tmp/docling_temp' >> /app/start.sh && \ echo 'echo "Environment variables set for Hugging Face Hub cache directories"' >> /app/start.sh && \ echo 'echo "HF_HUB_CACHE: $HF_HUB_CACHE"' >> /app/start.sh && \ echo 'echo "HF_CACHE_HOME: $HF_CACHE_HOME"' >> /app/start.sh && \ echo 'echo "TEMP_DIR: $TEMP_DIR"' >> /app/start.sh && \ echo 'echo "Running environment test..."' >> /app/start.sh && \ echo 'python test_permissions.py' >> /app/start.sh && \ echo 'if [ $? -eq 0 ]; then' >> /app/start.sh && \ echo ' echo "Environment test passed, starting Streamlit app..."' >> /app/start.sh && \ echo ' exec streamlit run src/streamlit_app.py --server.port=8501 --server.address=0.0.0.0' >> /app/start.sh && \ echo 'else' >> /app/start.sh && \ echo ' echo "Environment test failed, exiting..."' >> /app/start.sh && \ echo ' exit 1' >> /app/start.sh && \ echo 'fi' >> /app/start.sh && \ chmod +x /app/start.sh ENTRYPOINT ["/app/start.sh"]