FROM python:3.11-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ gcc \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application code COPY . . # Create data directory for PDFs (if not already created) RUN mkdir -p data # Create directory for pre-processed data RUN mkdir -p processed_data # Configure Streamlit to run in headless mode (no welcome screen) RUN mkdir -p /root/.streamlit && \ echo '[general]' > /root/.streamlit/config.toml && \ echo 'email = ""' >> /root/.streamlit/config.toml && \ echo 'showWarningOnDirectExecution = false' >> /root/.streamlit/config.toml && \ echo '' >> /root/.streamlit/config.toml && \ echo '[server]' >> /root/.streamlit/config.toml && \ echo 'headless = true' >> /root/.streamlit/config.toml # Install additional required packages RUN pip install --no-cache-dir huggingface_hub datasets # Set environment variables ENV HOST=0.0.0.0 ENV PORT=8000 # Expose the port EXPOSE $PORT # Create the entrypoint script RUN echo '#!/bin/bash' > /app/entrypoint.sh && \ echo 'echo "Starting AB Testing RAG Agent"' >> /app/entrypoint.sh && \ echo 'echo "Checking for pre-processed data..."' >> /app/entrypoint.sh && \ echo 'if [ ! -f "processed_data/document_chunks.pkl" ] || [ ! -d "processed_data/qdrant_vectorstore" ]; then' >> /app/entrypoint.sh && \ echo ' echo "Pre-processed data not found. Downloading PDFs..."' >> /app/entrypoint.sh && \ echo ' if [ -n "${HF_TOKEN}" ]; then' >> /app/entrypoint.sh && \ echo ' python download_pdfs.py' >> /app/entrypoint.sh && \ echo ' echo "Running preprocessing..."' >> /app/entrypoint.sh && \ echo ' python scripts/preprocess_data.py' >> /app/entrypoint.sh && \ echo ' else' >> /app/entrypoint.sh && \ echo ' echo "Error: HF_TOKEN environment variable is not set. Cannot download PDFs."' >> /app/entrypoint.sh && \ echo ' echo "Please set the HF_TOKEN environment variable in your Hugging Face Space settings."' >> /app/entrypoint.sh && \ echo ' exit 1' >> /app/entrypoint.sh && \ echo ' fi' >> /app/entrypoint.sh && \ echo 'else' >> /app/entrypoint.sh && \ echo ' echo "Using existing pre-processed data"' >> /app/entrypoint.sh && \ echo 'fi' >> /app/entrypoint.sh && \ echo 'streamlit run streamlit_app.py --server.address $HOST --server.port $PORT' >> /app/entrypoint.sh && \ chmod +x /app/entrypoint.sh # Run the application ENTRYPOINT ["/app/entrypoint.sh"]