Spaces:
Sleeping
Sleeping
| # Get a distribution that has uv already installed | |
| FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim | |
| # Add user - this is the user that will run the app | |
| # If you do not set user, the app will run as root (undesirable) | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| # Set the home directory and path | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH | |
| ENV UVICORN_WS_PROTOCOL=websockets | |
| # Set the working directory | |
| WORKDIR $HOME/app | |
| # Copy the app to the container | |
| COPY --chown=user . $HOME/app | |
| # Install the dependencies | |
| # RUN uv sync --frozen | |
| RUN uv sync | |
| # Create data directory if it doesn't exist | |
| RUN mkdir -p $HOME/app/data | |
| # Install additional required packages | |
| RUN uv pip install huggingface_hub datasets python-dotenv pypdf2 | |
| # Download PDFs from Hugging Face dataset | |
| # Use a build arg for the token | |
| ARG HF_TOKEN | |
| # Check token availability using a simpler approach | |
| RUN echo "=================== CHECKING HF_TOKEN ===================" && \ | |
| if [ -n "${HF_TOKEN}" ]; then \ | |
| echo "HF_TOKEN is available (first character: ${HF_TOKEN:0:1}*)"; \ | |
| python -c "from huggingface_hub import login; login(token='${HF_TOKEN}')"; \ | |
| else \ | |
| echo "ERROR: HF_TOKEN is empty or not set!"; \ | |
| fi | |
| # Use uv run to ensure we use the environment where datasets is installed | |
| RUN uv run python download_pdfs.py || echo "WARNING: download_pdfs.py failed, but continuing build" | |
| # Run preprocessing to generate the embeddings | |
| # Note: This requires the OPENAI_API_KEY environment variable to be set during build | |
| # For Hugging Face, you'll need to use their build secrets feature | |
| ARG OPENAI_API_KEY | |
| RUN echo "=================== CHECKING OPENAI_API_KEY ===================" && \ | |
| if [ -n "${OPENAI_API_KEY}" ]; then \ | |
| echo "OPENAI_API_KEY is available (first character: ${OPENAI_API_KEY:0:1}*)"; \ | |
| OPENAI_API_KEY=${OPENAI_API_KEY} uv run python preprocess.py || echo "WARNING: preprocessing failed, app will not work properly"; \ | |
| else \ | |
| echo "ERROR: OPENAI_API_KEY is empty or not set!"; \ | |
| echo "WARNING: App will not work without preprocessed data!"; \ | |
| fi | |
| # Expose the port | |
| EXPOSE 7860 | |
| # Run the app | |
| CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"] |