Spaces:
Sleeping
Sleeping
File size: 2,258 Bytes
b06d945 6b569cb ec7c0c2 8f4b657 79ad99e e41df66 79ad99e e41df66 8f4b657 e41df66 8f4b657 bd54963 e41df66 b06d945 67d8ac4 e41df66 67d8ac4 e41df66 67d8ac4 e41df66 67d8ac4 b06d945 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # Get a distribution that has uv already installed
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
# Add user - this is the user that will run the app
# If you do not set user, the app will run as root (undesirable)
RUN useradd -m -u 1000 user
USER user
# Set the home directory and path
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
ENV UVICORN_WS_PROTOCOL=websockets
# Set the working directory
WORKDIR $HOME/app
# Copy the app to the container
COPY --chown=user . $HOME/app
# Install the dependencies
# RUN uv sync --frozen
RUN uv sync
# Create data directory if it doesn't exist
RUN mkdir -p $HOME/app/data
# Install additional required packages
RUN uv pip install huggingface_hub datasets python-dotenv pypdf2
# Download PDFs from Hugging Face dataset
# Use a build arg for the token
ARG HF_TOKEN
# Check token availability using a simpler approach
RUN echo "=================== CHECKING HF_TOKEN ===================" && \
if [ -n "${HF_TOKEN}" ]; then \
echo "HF_TOKEN is available (first character: ${HF_TOKEN:0:1}*)"; \
python -c "from huggingface_hub import login; login(token='${HF_TOKEN}')"; \
else \
echo "ERROR: HF_TOKEN is empty or not set!"; \
fi
# Use uv run to ensure we use the environment where datasets is installed
RUN uv run python download_pdfs.py || echo "WARNING: download_pdfs.py failed, but continuing build"
# Run preprocessing to generate the embeddings
# Note: This requires the OPENAI_API_KEY environment variable to be set during build
# For Hugging Face, you'll need to use their build secrets feature
ARG OPENAI_API_KEY
RUN echo "=================== CHECKING OPENAI_API_KEY ===================" && \
if [ -n "${OPENAI_API_KEY}" ]; then \
echo "OPENAI_API_KEY is available (first character: ${OPENAI_API_KEY:0:1}*)"; \
OPENAI_API_KEY=${OPENAI_API_KEY} uv run python preprocess.py || echo "WARNING: preprocessing failed, app will not work properly"; \
else \
echo "ERROR: OPENAI_API_KEY is empty or not set!"; \
echo "WARNING: App will not work without preprocessed data!"; \
fi
# Expose the port
EXPOSE 7860
# Run the app
CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"] |