|
|
| FROM public.ecr.aws/docker/library/python:3.12.13-slim-trixie AS builder
|
|
|
|
|
| RUN apt-get update \
|
| && apt-get upgrade -y \
|
| && apt-get install -y --no-install-recommends \
|
| g++ \
|
| make \
|
| cmake \
|
| unzip \
|
| libcurl4-openssl-dev \
|
| git \
|
| && pip install --upgrade pip \
|
| && apt-get clean \
|
| && rm -rf /var/lib/apt/lists/*
|
|
|
| WORKDIR /src
|
|
|
| COPY requirements_lightweight.txt .
|
|
|
| RUN pip install --verbose --no-cache-dir --target=/install -r requirements_lightweight.txt && rm requirements_lightweight.txt
|
|
|
| ARG INSTALL_GRADIO_MCP=False
|
| ENV INSTALL_GRADIO_MCP=${INSTALL_GRADIO_MCP}
|
|
|
| RUN if [ "$INSTALL_GRADIO_MCP" = "True" ]; then \
|
| pip install --verbose --no-cache-dir --force-reinstall --target=/install "gradio[mcp]<=6.10.0"; \
|
| fi
|
|
|
|
|
|
|
| ARG INSTALL_PADDLEOCR=False
|
| ENV INSTALL_PADDLEOCR=${INSTALL_PADDLEOCR}
|
|
|
| ARG PADDLE_GPU_ENABLED=False
|
| ENV PADDLE_GPU_ENABLED=${PADDLE_GPU_ENABLED}
|
|
|
| RUN if [ "$INSTALL_PADDLEOCR" = "True" ] && [ "$PADDLE_GPU_ENABLED" = "False" ]; then \
|
| pip install --verbose --no-cache-dir --target=/install "protobuf<=7.34.0" && \
|
| pip install --verbose --no-cache-dir --target=/install "paddlepaddle<=3.2.1" && \
|
| pip install --verbose --no-cache-dir --target=/install "paddleocr<=3.3.0"; \
|
| elif [ "$INSTALL_PADDLEOCR" = "True" ] && [ "$PADDLE_GPU_ENABLED" = "True" ]; then \
|
| pip install --verbose --no-cache-dir --target=/install "protobuf<=7.34.0" && \
|
| pip install --verbose --no-cache-dir --target=/install "paddlepaddle-gpu<=3.2.1" --index-url https://www.paddlepaddle.org.cn/packages/stable/cu129/ && \
|
| pip install --verbose --no-cache-dir --target=/install "paddleocr<=3.3.0"; \
|
| fi
|
|
|
| ARG INSTALL_VLM=False
|
| ENV INSTALL_VLM=${INSTALL_VLM}
|
|
|
| ARG TORCH_GPU_ENABLED=False
|
| ENV TORCH_GPU_ENABLED=${TORCH_GPU_ENABLED}
|
|
|
|
|
| RUN if [ "$INSTALL_VLM" = "True" ] && [ "$TORCH_GPU_ENABLED" = "False" ]; then \
|
| pip install --verbose --no-cache-dir --target=/install \
|
| "torch==2.9.1+cpu" \
|
| "torchvision==0.24.1+cpu" \
|
| "transformers<=5.5.4" \
|
| "accelerate<=1.13.0" \
|
| "bitsandbytes<=0.49.2" \
|
| "sentencepiece<=0.2.1" \
|
| --extra-index-url https://download.pytorch.org/whl/cpu; \
|
| elif [ "$INSTALL_VLM" = "True" ] && [ "$TORCH_GPU_ENABLED" = "True" ]; then \
|
| pip install --verbose --no-cache-dir --target=/install "torch<=2.8.0" --index-url https://download.pytorch.org/whl/cu129 && \
|
| pip install --verbose --no-cache-dir --target=/install "torchvision<=0.23.0" --index-url https://download.pytorch.org/whl/cu129 && \
|
| pip install --verbose --no-cache-dir --target=/install \
|
| "transformers<=5.5.4" \
|
| "accelerate<=1.13.0" \
|
| "bitsandbytes<=0.49.2" \
|
| "sentencepiece<=0.2.1" && \
|
| pip install --verbose --no-cache-dir --target=/install "optimum<=2.1.0" && \
|
| pip install --verbose --no-cache-dir --target=/install https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl && \
|
| pip install --verbose --no-cache-dir --target=/install https://github.com/ModelCloud/GPTQModel/releases/download/v5.8.0/gptqmodel-5.8.0+cu128torch2.8-cp312-cp312-linux_x86_64.whl; \
|
| fi
|
|
|
|
|
|
|
|
|
| FROM public.ecr.aws/docker/library/python:3.12.13-slim-trixie AS base
|
|
|
|
|
| ARG TORCH_GPU_ENABLED=False
|
| ARG PADDLE_GPU_ENABLED=False
|
|
|
| ENV TORCH_GPU_ENABLED=${TORCH_GPU_ENABLED}
|
| ENV PADDLE_GPU_ENABLED=${PADDLE_GPU_ENABLED}
|
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \
|
| tesseract-ocr \
|
| poppler-utils \
|
| libgl1 \
|
| libglib2.0-0 && \
|
| if [ "$TORCH_GPU_ENABLED" = "True" ] || [ "$PADDLE_GPU_ENABLED" = "True" ]; then \
|
| apt-get install -y --no-install-recommends libgomp1; \
|
| fi && \
|
| apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
|
| ENV APP_HOME=/home/user
|
|
|
|
|
| ENV GRADIO_TEMP_DIR=/tmp/gradio_tmp/ \
|
| MPLCONFIGDIR=/tmp/matplotlib_cache/ \
|
| GRADIO_OUTPUT_FOLDER=$APP_HOME/app/output/ \
|
| GRADIO_INPUT_FOLDER=$APP_HOME/app/input/ \
|
| FEEDBACK_LOGS_FOLDER=$APP_HOME/app/feedback/ \
|
| ACCESS_LOGS_FOLDER=$APP_HOME/app/logs/ \
|
| USAGE_LOGS_FOLDER=$APP_HOME/app/usage/ \
|
| CONFIG_FOLDER=$APP_HOME/app/config/ \
|
| XDG_CACHE_HOME=/tmp/xdg_cache/user_1000 \
|
| TESSERACT_DATA_FOLDER=/usr/share/tessdata \
|
| GRADIO_SERVER_NAME=0.0.0.0 \
|
| GRADIO_SERVER_PORT=7860 \
|
| PATH=$APP_HOME/.local/bin:$PATH \
|
| PYTHONPATH=$APP_HOME/app \
|
| PYTHONUNBUFFERED=1 \
|
| PYTHONDONTWRITEBYTECODE=1 \
|
| GRADIO_ALLOW_FLAGGING=never \
|
| GRADIO_NUM_PORTS=1 \
|
| GRADIO_ANALYTICS_ENABLED=False
|
|
|
|
|
| COPY --from=builder /install /usr/local/lib/python3.12/site-packages/
|
| COPY --from=builder /install/bin /usr/local/bin/
|
|
|
|
|
|
|
| RUN pip install --no-cache-dir "protobuf<=7.34.0"
|
|
|
|
|
|
|
| RUN python -m spacy download en_core_web_lg
|
|
|
|
|
| COPY . ${APP_HOME}/app
|
| COPY entrypoint.sh ${APP_HOME}/app/entrypoint.sh
|
|
|
| RUN sed -i 's/\r$//' ${APP_HOME}/app/entrypoint.sh \
|
| && chmod +x ${APP_HOME}/app/entrypoint.sh
|
|
|
| WORKDIR ${APP_HOME}/app
|
|
|
|
|
|
|
|
|
| FROM base AS lambda
|
|
|
| ENV APP_MODE=lambda
|
| ENTRYPOINT ["/home/user/app/entrypoint.sh"]
|
| CMD ["lambda_entrypoint.lambda_handler"]
|
|
|
|
|
|
|
|
|
| FROM base AS gradio
|
|
|
| ENV APP_MODE=gradio
|
|
|
|
|
| RUN useradd -m -u 1000 user
|
|
|
|
|
| RUN mkdir -p ${APP_HOME}/app && chown user:user ${APP_HOME}/app
|
|
|
|
|
|
|
| RUN mkdir -p \
|
| ${APP_HOME}/app/output \
|
| ${APP_HOME}/app/input \
|
| ${APP_HOME}/app/logs \
|
| ${APP_HOME}/app/usage \
|
| ${APP_HOME}/app/feedback \
|
| ${APP_HOME}/app/config \
|
| && chown user:user \
|
| ${APP_HOME}/app/output \
|
| ${APP_HOME}/app/input \
|
| ${APP_HOME}/app/logs \
|
| ${APP_HOME}/app/usage \
|
| ${APP_HOME}/app/feedback \
|
| ${APP_HOME}/app/config \
|
| && chmod 755 \
|
| ${APP_HOME}/app/output \
|
| ${APP_HOME}/app/input \
|
| ${APP_HOME}/app/logs \
|
| ${APP_HOME}/app/usage \
|
| ${APP_HOME}/app/feedback \
|
| ${APP_HOME}/app/config
|
|
|
|
|
| RUN mkdir -p /tmp/gradio_tmp /tmp/tld /tmp/matplotlib_cache /tmp /var/tmp ${XDG_CACHE_HOME} \
|
| && chown user:user /tmp /var/tmp /tmp/gradio_tmp /tmp/tld /tmp/matplotlib_cache ${XDG_CACHE_HOME} \
|
| && chmod 1777 /tmp /var/tmp /tmp/gradio_tmp /tmp/tld /tmp/matplotlib_cache \
|
| && chmod 700 ${XDG_CACHE_HOME} \
|
| && mkdir -p ${APP_HOME}/.paddlex \
|
| && chown user:user ${APP_HOME}/.paddlex \
|
| && chmod 755 ${APP_HOME}/.paddlex \
|
| && mkdir -p ${APP_HOME}/.local/share/spacy/data \
|
| && chown user:user ${APP_HOME}/.local/share/spacy/data \
|
| && chmod 755 ${APP_HOME}/.local/share/spacy/data \
|
| && mkdir -p /usr/share/tessdata \
|
| && chown user:user /usr/share/tessdata \
|
| && chmod 755 /usr/share/tessdata
|
|
|
|
|
| RUN chown -R user:user /home/user
|
|
|
|
|
| RUN chmod 755 /usr/local/bin/python
|
|
|
|
|
| VOLUME ["/tmp/matplotlib_cache"]
|
| VOLUME ["/tmp/gradio_tmp"]
|
| VOLUME ["/tmp/tld"]
|
| VOLUME ["/home/user/app/output"]
|
| VOLUME ["/home/user/app/input"]
|
| VOLUME ["/home/user/app/logs"]
|
| VOLUME ["/home/user/app/usage"]
|
| VOLUME ["/home/user/app/feedback"]
|
| VOLUME ["/home/user/app/config"]
|
| VOLUME ["/home/user/.paddlex"]
|
| VOLUME ["/home/user/.local/share/spacy/data"]
|
| VOLUME ["/usr/share/tessdata"]
|
| VOLUME ["/tmp"]
|
| VOLUME ["/var/tmp"]
|
|
|
| USER user
|
|
|
| EXPOSE $GRADIO_SERVER_PORT
|
|
|
| ENTRYPOINT ["/home/user/app/entrypoint.sh"]
|
| CMD ["python", "app.py"] |