File size: 3,740 Bytes
cf58b83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Start with a base image
FROM ubuntu:22.04

# Set environment variables for non-interactive installs
ENV DEBIAN_FRONTEND=noninteractive

# Install core dependencies for Paperless-ngx
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    python3 python3-pip python3-dev \
    imagemagick fonts-liberation gnupg \
    libpq-dev default-libmysqlclient-dev pkg-config \
    libmagic-dev libzbar0 poppler-utils \
    unpaper ghostscript icc-profiles-free qpdf liblept5 libxml2 \
    pngquant zlib1g tesseract-ocr tesseract-ocr-eng \
    build-essential python3-setuptools python3-wheel \
    redis-server postgresql postgresql-client \
    supervisor nano # Add supervisor for process management
    
# Clean up apt caches to reduce image size
RUN rm -rf /var/lib/apt/lists/*

# Create user and directories for Paperless-ngx
RUN adduser --system --home /opt/paperless --group paperless
WORKDIR /opt/paperless

# Clone Paperless-ngx (or copy if you have it locally)
# Using a specific release is recommended for stability
RUN apt-get update && apt-get install -y git && \
    git clone https://github.com/paperless-ngx/paperless-ngx.git . && \
    git checkout v2.17.1 # Replace with the version you want

# Build frontend (if cloning from git)
RUN python3 -m venv venv && . venv/bin/activate && \
    npm install -g yarn && \
    yarn install --cwd src/paperless_frontend && \
    yarn build --cwd src/paperless_frontend && \
    deactivate

# Install Python dependencies
COPY requirements.txt .
RUN pip3 install -r requirements.txt

# --- Configure internal Redis and PostgreSQL ---
# These steps are highly simplified. You'd need to properly configure
# PostgreSQL for a non-root user and ensure it starts correctly.
# For Redis, it's usually simpler, but still needs to be started correctly.

# Configure PostgreSQL (simple example, needs hardening for production)
RUN service postgresql start && \
    sudo -u postgres psql -c "CREATE USER paperless WITH PASSWORD 'your_db_password';" && \
    sudo -u postgres psql -c "CREATE DATABASE paperless OWNER paperless;" && \
    service postgresql stop

# Create Paperless-ngx specific directories
RUN mkdir -p media data consume export
RUN chown -R paperless:paperless media data consume export

# --- Paperless-ngx Configuration ---
# Set environment variables that Paperless-ngx will read
# These can also be read from a paperless.conf file, but env vars are easier in Docker
ENV PAPERLESS_REDIS="redis://localhost:6379/0"
ENV PAPERLESS_DBENGINE="postgresql"
ENV PAPERLESS_DBHOST="localhost"
ENV PAPERLESS_DBNAME="paperless"
ENV PAPERLESS_DBUSER="paperless"
ENV PAPERLESS_DBPASS="your_db_password"
ENV PAPERLESS_SECRET_KEY="a_very_long_and_random_string_for_huggingface" # IMPORTANT: Generate this securely!
ENV PAPERLESS_CONSUMPTION_DIR="/opt/paperless/consume"
ENV PAPERLESS_DATA_DIR="/opt/paperless/data"
ENV PAPERLESS_MEDIA_ROOT="/opt/paperless/media"
ENV PAPERLESS_PORT="7860" # Crucial for Hugging Face Spaces
ENV PAPERLESS_BIND_ADDR="0.0.0.0" # Bind to all interfaces
ENV PAPERLESS_WEBSERVER_WORKERS=1 # Save memory on Hugging Face Spaces

# Optional: Disable some resource-intensive features for less powerful devices
ENV PAPERLESS_OCR_PAGES=1
ENV PAPERLESS_ENABLE_NLTK=false
ENV PAPERLESS_OCR_CLEAN="none"

# --- Database Migrations and Superuser Creation ---
# This needs to run AFTER the database is up and configured.
# We'll put it in a startup script for supervisord.

# --- Supervisord Configuration ---
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# Exposed port for Hugging Face Spaces
EXPOSE 7860

# Command to run supervisord, which will start all services
CMD ["/usr/bin/supervisord", "-n", "-c", "/etc/supervisor/supervisord.conf"]