File size: 2,258 Bytes
b06d945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b569cb
 
ec7c0c2
 
8f4b657
 
79ad99e
e41df66
79ad99e
e41df66
8f4b657
 
e41df66
8f4b657
 
bd54963
e41df66
b06d945
 
 
 
67d8ac4
e41df66
67d8ac4
e41df66
 
67d8ac4
e41df66
67d8ac4
 
b06d945
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Get a distribution that has uv already installed
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim

# Add user - this is the user that will run the app
# If you do not set user, the app will run as root (undesirable)
RUN useradd -m -u 1000 user
USER user

# Set the home directory and path
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH        

ENV UVICORN_WS_PROTOCOL=websockets


# Set the working directory
WORKDIR $HOME/app

# Copy the app to the container
COPY --chown=user . $HOME/app

# Install the dependencies
# RUN uv sync --frozen
RUN uv sync

# Create data directory if it doesn't exist
RUN mkdir -p $HOME/app/data

# Install additional required packages
RUN uv pip install huggingface_hub datasets python-dotenv pypdf2

# Download PDFs from Hugging Face dataset
# Use a build arg for the token
ARG HF_TOKEN
# Check token availability using a simpler approach
RUN echo "=================== CHECKING HF_TOKEN ===================" && \
    if [ -n "${HF_TOKEN}" ]; then \
        echo "HF_TOKEN is available (first character: ${HF_TOKEN:0:1}*)"; \
        python -c "from huggingface_hub import login; login(token='${HF_TOKEN}')"; \
    else \
        echo "ERROR: HF_TOKEN is empty or not set!"; \
    fi

# Use uv run to ensure we use the environment where datasets is installed
RUN uv run python download_pdfs.py || echo "WARNING: download_pdfs.py failed, but continuing build"

# Run preprocessing to generate the embeddings
# Note: This requires the OPENAI_API_KEY environment variable to be set during build
# For Hugging Face, you'll need to use their build secrets feature
ARG OPENAI_API_KEY
RUN echo "=================== CHECKING OPENAI_API_KEY ===================" && \
    if [ -n "${OPENAI_API_KEY}" ]; then \
        echo "OPENAI_API_KEY is available (first character: ${OPENAI_API_KEY:0:1}*)"; \
        OPENAI_API_KEY=${OPENAI_API_KEY} uv run python preprocess.py || echo "WARNING: preprocessing failed, app will not work properly"; \
    else \
        echo "ERROR: OPENAI_API_KEY is empty or not set!"; \
        echo "WARNING: App will not work without preprocessed data!"; \
    fi

# Expose the port
EXPOSE 7860

# Run the app
CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]