# Start with a base image FROM ubuntu:22.04 # Set environment variables for non-interactive installs ENV DEBIAN_FRONTEND=noninteractive # Install core dependencies for Paperless-ngx RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3 python3-pip python3-dev \ imagemagick fonts-liberation gnupg \ libpq-dev default-libmysqlclient-dev pkg-config \ libmagic-dev libzbar0 poppler-utils \ unpaper ghostscript icc-profiles-free qpdf liblept5 libxml2 \ pngquant zlib1g tesseract-ocr tesseract-ocr-eng \ build-essential python3-setuptools python3-wheel \ redis-server postgresql postgresql-client \ supervisor nano # Add supervisor for process management # Clean up apt caches to reduce image size RUN rm -rf /var/lib/apt/lists/* # Create user and directories for Paperless-ngx RUN adduser --system --home /opt/paperless --group paperless WORKDIR /opt/paperless # Clone Paperless-ngx (or copy if you have it locally) # Using a specific release is recommended for stability RUN apt-get update && apt-get install -y git && \ git clone https://github.com/paperless-ngx/paperless-ngx.git . && \ git checkout v2.17.1 # Replace with the version you want # Build frontend (if cloning from git) RUN python3 -m venv venv && . venv/bin/activate && \ npm install -g yarn && \ yarn install --cwd src/paperless_frontend && \ yarn build --cwd src/paperless_frontend && \ deactivate # Install Python dependencies COPY requirements.txt . RUN pip3 install -r requirements.txt # --- Configure internal Redis and PostgreSQL --- # These steps are highly simplified. You'd need to properly configure # PostgreSQL for a non-root user and ensure it starts correctly. # For Redis, it's usually simpler, but still needs to be started correctly. # Configure PostgreSQL (simple example, needs hardening for production) RUN service postgresql start && \ sudo -u postgres psql -c "CREATE USER paperless WITH PASSWORD 'your_db_password';" && \ sudo -u postgres psql -c "CREATE DATABASE paperless OWNER paperless;" && \ service postgresql stop # Create Paperless-ngx specific directories RUN mkdir -p media data consume export RUN chown -R paperless:paperless media data consume export # --- Paperless-ngx Configuration --- # Set environment variables that Paperless-ngx will read # These can also be read from a paperless.conf file, but env vars are easier in Docker ENV PAPERLESS_REDIS="redis://localhost:6379/0" ENV PAPERLESS_DBENGINE="postgresql" ENV PAPERLESS_DBHOST="localhost" ENV PAPERLESS_DBNAME="paperless" ENV PAPERLESS_DBUSER="paperless" ENV PAPERLESS_DBPASS="your_db_password" ENV PAPERLESS_SECRET_KEY="a_very_long_and_random_string_for_huggingface" # IMPORTANT: Generate this securely! ENV PAPERLESS_CONSUMPTION_DIR="/opt/paperless/consume" ENV PAPERLESS_DATA_DIR="/opt/paperless/data" ENV PAPERLESS_MEDIA_ROOT="/opt/paperless/media" ENV PAPERLESS_PORT="7860" # Crucial for Hugging Face Spaces ENV PAPERLESS_BIND_ADDR="0.0.0.0" # Bind to all interfaces ENV PAPERLESS_WEBSERVER_WORKERS=1 # Save memory on Hugging Face Spaces # Optional: Disable some resource-intensive features for less powerful devices ENV PAPERLESS_OCR_PAGES=1 ENV PAPERLESS_ENABLE_NLTK=false ENV PAPERLESS_OCR_CLEAN="none" # --- Database Migrations and Superuser Creation --- # This needs to run AFTER the database is up and configured. # We'll put it in a startup script for supervisord. # --- Supervisord Configuration --- COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf # Exposed port for Hugging Face Spaces EXPOSE 7860 # Command to run supervisord, which will start all services CMD ["/usr/bin/supervisord", "-n", "-c", "/etc/supervisor/supervisord.conf"]