| | |
| | FROM python:3.11-slim |
| |
|
| | |
| | ENV PYTHONUNBUFFERED=1 |
| | ENV PYTHONDONTWRITEBYTECODE=1 |
| | ENV PIP_NO_CACHE_DIR=1 |
| | ENV PIP_DISABLE_PIP_VERSION_CHECK=1 |
| | ENV DEBIAN_FRONTEND=noninteractive |
| |
|
| | |
| | RUN useradd --create-home --shell /bin/bash app |
| |
|
| | |
| | WORKDIR /app |
| |
|
| | |
| | RUN apt-get update && apt-get install -y --no-install-recommends \ |
| | |
| | build-essential \ |
| | gcc \ |
| | g++ \ |
| | make \ |
| | cmake \ |
| | pkg-config \ |
| | |
| | curl \ |
| | wget \ |
| | git \ |
| | |
| | libffi-dev \ |
| | libssl-dev \ |
| | |
| | libjpeg-dev \ |
| | libpng-dev \ |
| | libfreetype6-dev \ |
| | libtiff5-dev \ |
| | libopenjp2-7-dev \ |
| | |
| | libxml2-dev \ |
| | libxslt1-dev \ |
| | zlib1g-dev \ |
| | |
| | tesseract-ocr \ |
| | tesseract-ocr-eng \ |
| | poppler-utils \ |
| | |
| | sqlite3 \ |
| | libsqlite3-dev \ |
| | |
| | && apt-get clean \ |
| | && rm -rf /var/lib/apt/lists/* \ |
| | && rm -rf /var/cache/apt/* |
| |
|
| | |
| | RUN python -m pip install --upgrade pip setuptools wheel |
| |
|
| | |
| | RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \ |
| | && pip config set global.no-cache-dir true \ |
| | && pip config set global.disable-pip-version-check true |
| |
|
| | |
| | COPY requirements.txt . |
| |
|
| | |
| | RUN pip install --no-cache-dir --upgrade -r requirements.txt \ |
| | && pip install --no-cache-dir \ |
| | |
| | gunicorn \ |
| | uvloop \ |
| | |
| | psutil \ |
| | && pip list --outdated |
| |
|
| | |
| | COPY . . |
| |
|
| | |
| | RUN mkdir -p \ |
| | |
| | temp logs uploads downloads cache \ |
| | |
| | /tmp/data_extractor_temp \ |
| | |
| | static \ |
| | |
| | data \ |
| | && chmod -R 755 /app |
| |
|
| | |
| | RUN chmod -R 777 temp logs uploads downloads cache /tmp \ |
| | && chmod -R 755 static \ |
| | && chmod 755 app.py \ |
| | && chmod -R 755 config utils workflow models |
| |
|
| | |
| | RUN chown -R app:app /app \ |
| | && chown -R app:app /tmp/data_extractor_temp |
| |
|
| | |
| | ENV PYTHONPATH=/app |
| | ENV GRADIO_SERVER_NAME=0.0.0.0 |
| | ENV GRADIO_SERVER_PORT=7860 |
| | |
| | |
| |
|
| | |
| | ENV MPLBACKEND=Agg |
| | ENV MPLCONFIGDIR=/tmp/mpl_cache |
| |
|
| | |
| | ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10 |
| | ENV GRADIO_MAX_THREADS=20 |
| |
|
| | |
| | ENV WEBSOCKET_HOST=0.0.0.0 |
| | ENV WEBSOCKET_PORT=8765 |
| |
|
| | |
| | ENV TEMP_DIR=/tmp/data_extractor_temp |
| | ENV SESSION_TIMEOUT=1800 |
| | ENV MAX_FILE_SIZE_MB=50 |
| |
|
| | |
| | ENV COORDINATOR_MODEL=gemini-2.5-pro |
| | ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro |
| | ENV DATA_ARRANGER_MODEL=gemini-2.5-pro |
| | ENV CODE_GENERATOR_MODEL=gemini-2.5-pro |
| |
|
| | |
| | ENV PYTHONSAFEPATH=1 |
| | ENV PYTHONHASHSEED=random |
| |
|
| | |
| | EXPOSE 7860 |
| | EXPOSE 8765 |
| |
|
| | |
| | HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ |
| | CMD curl -f http://localhost:7860/ || exit 1 |
| | |
| | |
| | USER root |
| | |
| | |
| | RUN echo ' |
| | set -e\n\ |
| | echo "π Starting Data Extractor Multi-User Application..."\n\ |
| | echo "π Python version: $(python --version)"\n\ |
| | echo "π Server: 0.0.0.0:7860"\n\ |
| | echo "π₯ Multi-user concurrency: Enabled"\n\ |
| | echo "π Session isolation: Active"\n\ |
| | echo "πΎ Temp directory: $TEMP_DIR"\n\ |
| | \n\ |
| | |
| | mkdir -p "$TEMP_DIR"\n\ |
| | mkdir -p /tmp/mpl_cache\n\ |
| | chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\ |
| | \n\ |
| | |
| | exec python app.py\n\ |
| | ' > /app/start.sh && chmod +x /app/start.sh |
| | |
| | |
| | CMD ["/app/start.sh"] |
| | |