Bank-Scrubber / Dockerfile
Aryan Jain
change location of app file
a2b0228
# Use official Python 3.12 slim image
FROM python:3.12-slim
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV POETRY_VERSION=1.8.2
ENV POETRY_HOME="/opt/poetry"
ENV POETRY_VENV_IN_PROJECT=1
ENV POETRY_NO_INTERACTION=1
# Set work directory
WORKDIR /app
# Install system dependencies in a single layer
RUN apt-get update && apt-get install -y \
curl \
build-essential \
tesseract-ocr \
libtesseract-dev \
poppler-utils \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgomp1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install Poetry
RUN curl -sSL https://install.python-poetry.org | python3 - \
&& export PATH="/opt/poetry/bin:$PATH" \
&& poetry --version
# Add Poetry to PATH
ENV PATH="/opt/poetry/bin:$PATH"
# Copy only Poetry configuration files first (for better caching)
COPY pyproject.toml poetry.lock* /app/
# Configure Poetry and install dependencies
RUN poetry config virtualenvs.create false \
&& poetry lock --no-update \
&& poetry install --no-interaction --no-ansi --only main
# Install PyTorch with CPU support (adjust based on your needs)
RUN pip3 install torch torchvision torchaudio
# Install spaCy models
RUN python -m spacy download en_core_web_sm
# Create temp directory for file processing
RUN mkdir -p /app/temp && chmod 777 /app/temp
# Copy the source code (this layer will be rebuilt when code changes)
COPY src/ ./src/
# COPY src/app.py /app/
# Expose the port Streamlit will run on
EXPOSE 8501
# Set environment variables for Streamlit
ENV STREAMLIT_SERVER_PORT=8501
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
ENV STREAMLIT_SERVER_HEADLESS=true
ENV STREAMLIT_SERVER_ENABLE_CORS=false
ENV STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false
# Run the Streamlit application
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
ENTRYPOINT ["streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]