# Get a distribution that has uv already installed FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim # Add user - this is the user that will run the app # If you do not set user, the app will run as root (undesirable) RUN useradd -m -u 1000 user USER user # Set the home directory and path ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH ENV UVICORN_WS_PROTOCOL=websockets # Set the working directory WORKDIR $HOME/app # Copy the app to the container COPY --chown=user . $HOME/app # Install the dependencies # RUN uv sync --frozen RUN uv sync # Create data directory if it doesn't exist RUN mkdir -p $HOME/app/data # Install additional required packages RUN uv pip install huggingface_hub datasets python-dotenv pypdf2 # Download PDFs from Hugging Face dataset # Use a build arg for the token ARG HF_TOKEN # Check token availability using a simpler approach RUN echo "=================== CHECKING HF_TOKEN ===================" && \ if [ -n "${HF_TOKEN}" ]; then \ echo "HF_TOKEN is available (first character: ${HF_TOKEN:0:1}*)"; \ python -c "from huggingface_hub import login; login(token='${HF_TOKEN}')"; \ else \ echo "ERROR: HF_TOKEN is empty or not set!"; \ fi # Use uv run to ensure we use the environment where datasets is installed RUN uv run python download_pdfs.py || echo "WARNING: download_pdfs.py failed, but continuing build" # Run preprocessing to generate the embeddings # Note: This requires the OPENAI_API_KEY environment variable to be set during build # For Hugging Face, you'll need to use their build secrets feature ARG OPENAI_API_KEY RUN echo "=================== CHECKING OPENAI_API_KEY ===================" && \ if [ -n "${OPENAI_API_KEY}" ]; then \ echo "OPENAI_API_KEY is available (first character: ${OPENAI_API_KEY:0:1}*)"; \ OPENAI_API_KEY=${OPENAI_API_KEY} uv run python preprocess.py || echo "WARNING: preprocessing failed, app will not work properly"; \ else \ echo "ERROR: OPENAI_API_KEY is empty or not set!"; \ echo "WARNING: App will not work without preprocessed data!"; \ fi # Expose the port EXPOSE 7860 # Run the app CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]