FROM python:3.10-slim # Set working directory WORKDIR /app # Install system dependencies for PDF processing, llama.cpp compilation, and other requirements RUN apt-get update && apt-get install -y \ gcc \ g++ \ git \ unzip \ cmake \ make \ build-essential \ && rm -rf /var/lib/apt/lists/* # Compile llama.cpp from source (for translation feature) # This ensures compatibility with the container's architecture # Disable CURL since we don't need it for local GGUF model inference # Build statically linked to avoid shared library dependencies RUN cd /tmp && \ git clone --depth 1 --branch master https://github.com/ggerganov/llama.cpp.git && \ cd llama.cpp && \ mkdir build && \ cd build && \ cmake .. -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=OFF -DBUILD_SHARED_LIBS=OFF && \ # Build the llama-cli target (the command-line interface we need) \ cmake --build . --config Release --target llama-cli -j$(nproc) && \ # Find the binary (it might be in different locations) \ if [ -f bin/llama-cli ]; then \ cp bin/llama-cli /usr/local/bin/llama-main; \ elif [ -f llama-cli ]; then \ cp llama-cli /usr/local/bin/llama-main; \ elif [ -f bin/main ]; then \ cp bin/main /usr/local/bin/llama-main; \ else \ echo "Error: Could not find llama-cli binary. Available files in build/bin:"; \ ls -la bin/ 2>/dev/null || echo "bin/ directory not found"; \ echo "Available files in build/:"; \ find . -name "*llama*" -o -name "main" 2>/dev/null | head -20; \ exit 1; \ fi && \ chmod 755 /usr/local/bin/llama-main && \ # Also copy any shared libraries if they exist (fallback) \ if [ -f libllama.so ]; then \ cp libllama.so /usr/local/lib/ && \ ldconfig; \ fi && \ cd / && \ rm -rf /tmp/llama.cpp && \ echo "✅ llama.cpp compiled and installed to /usr/local/bin/llama-main" # Create a user to avoid running as root RUN useradd -m -u 1000 user USER user # Set environment variables for Hugging Face cache and performance optimization ENV HOME=/home/user \ PATH="/home/user/.local/bin:$PATH" \ HF_HOME=/home/user/.cache/huggingface \ TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \ TORCH_HOME=/home/user/.cache/torch # Set environment variables for performance optimization ENV TORCH_COMPILE_DISABLE=1 \ TORCHDYNAMO_DISABLE=1 \ TF_ENABLE_ONEDNN_OPTS=0 \ TF_CPP_MIN_LOG_LEVEL=3 \ TOKENIZERS_PARALLELISM=false \ OMP_NUM_THREADS=1 # Create cache directories with proper permissions RUN mkdir -p /home/user/.cache/huggingface/transformers \ && mkdir -p /home/user/.cache/torch \ && mkdir -p /tmp/uploads # Copy requirements first for better Docker layer caching COPY --chown=user requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir --user -r requirements.txt # Copy the application code COPY --chown=user . . # Expose the port that HF Spaces expects EXPOSE 7860 # Set the default command to run the Flask app CMD ["python", "app.py"]