File size: 2,685 Bytes
2eb4533
 
 
 
 
 
 
3863dba
2eb4533
 
 
3863dba
2eb4533
 
9fa16f1
2eb4533
 
9fa16f1
2eb4533
 
9fa16f1
 
2eb4533
9fa16f1
 
 
 
 
 
 
 
2eb4533
2585f8a
 
 
9fa16f1
 
 
 
3863dba
 
9fa16f1
 
 
3863dba
2585f8a
3863dba
2585f8a
 
 
 
 
 
 
 
 
 
9fa16f1
3863dba
9fa16f1
3863dba
9fa16f1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
FROM python:3.11-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    gcc \
    && rm -rf /var/lib/apt/lists/*

# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy the rest of the application code
COPY . .

# Create data directory for PDFs (if not already created)
RUN mkdir -p data

# Create directory for pre-processed data
RUN mkdir -p processed_data

# Configure Streamlit to run in headless mode (no welcome screen)
RUN mkdir -p /root/.streamlit && \
    echo '[general]' > /root/.streamlit/config.toml && \
    echo 'email = ""' >> /root/.streamlit/config.toml && \
    echo 'showWarningOnDirectExecution = false' >> /root/.streamlit/config.toml && \
    echo '' >> /root/.streamlit/config.toml && \
    echo '[server]' >> /root/.streamlit/config.toml && \
    echo 'headless = true' >> /root/.streamlit/config.toml

# Install additional required packages
RUN pip install --no-cache-dir huggingface_hub datasets

# Set environment variables
ENV HOST=0.0.0.0
ENV PORT=8000

# Expose the port
EXPOSE $PORT

# Create the entrypoint script
RUN echo '#!/bin/bash' > /app/entrypoint.sh && \
    echo 'echo "Starting AB Testing RAG Agent"' >> /app/entrypoint.sh && \
    echo 'echo "Checking for pre-processed data..."' >> /app/entrypoint.sh && \
    echo 'if [ ! -f "processed_data/document_chunks.pkl" ] || [ ! -d "processed_data/qdrant_vectorstore" ]; then' >> /app/entrypoint.sh && \
    echo '    echo "Pre-processed data not found. Downloading PDFs..."' >> /app/entrypoint.sh && \
    echo '    if [ -n "${HF_TOKEN}" ]; then' >> /app/entrypoint.sh && \
    echo '        python download_pdfs.py' >> /app/entrypoint.sh && \
    echo '        echo "Running preprocessing..."' >> /app/entrypoint.sh && \
    echo '        python scripts/preprocess_data.py' >> /app/entrypoint.sh && \
    echo '    else' >> /app/entrypoint.sh && \
    echo '        echo "Error: HF_TOKEN environment variable is not set. Cannot download PDFs."' >> /app/entrypoint.sh && \
    echo '        echo "Please set the HF_TOKEN environment variable in your Hugging Face Space settings."' >> /app/entrypoint.sh && \
    echo '        exit 1' >> /app/entrypoint.sh && \
    echo '    fi' >> /app/entrypoint.sh && \
    echo 'else' >> /app/entrypoint.sh && \
    echo '    echo "Using existing pre-processed data"' >> /app/entrypoint.sh && \
    echo 'fi' >> /app/entrypoint.sh && \
    echo 'streamlit run streamlit_app.py --server.address $HOST --server.port $PORT' >> /app/entrypoint.sh && \
    chmod +x /app/entrypoint.sh

# Run the application
ENTRYPOINT ["/app/entrypoint.sh"]