Spaces:
Sleeping
Sleeping
| FROM python:3.11-slim | |
| WORKDIR /app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| gcc \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Python dependencies | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Copy the rest of the application code | |
| COPY . . | |
| # Create data directory for PDFs (if not already created) | |
| RUN mkdir -p data | |
| # Create directory for pre-processed data | |
| RUN mkdir -p processed_data | |
| # Configure Streamlit to run in headless mode (no welcome screen) | |
| RUN mkdir -p /root/.streamlit && \ | |
| echo '[general]' > /root/.streamlit/config.toml && \ | |
| echo 'email = ""' >> /root/.streamlit/config.toml && \ | |
| echo 'showWarningOnDirectExecution = false' >> /root/.streamlit/config.toml && \ | |
| echo '' >> /root/.streamlit/config.toml && \ | |
| echo '[server]' >> /root/.streamlit/config.toml && \ | |
| echo 'headless = true' >> /root/.streamlit/config.toml | |
| # Install additional required packages | |
| RUN pip install --no-cache-dir huggingface_hub datasets | |
| # Set environment variables | |
| ENV HOST=0.0.0.0 | |
| ENV PORT=8000 | |
| # Expose the port | |
| EXPOSE $PORT | |
| # Create the entrypoint script | |
| RUN echo '#!/bin/bash' > /app/entrypoint.sh && \ | |
| echo 'echo "Starting AB Testing RAG Agent"' >> /app/entrypoint.sh && \ | |
| echo 'echo "Checking for pre-processed data..."' >> /app/entrypoint.sh && \ | |
| echo 'if [ ! -f "processed_data/document_chunks.pkl" ] || [ ! -d "processed_data/qdrant_vectorstore" ]; then' >> /app/entrypoint.sh && \ | |
| echo ' echo "Pre-processed data not found. Downloading PDFs..."' >> /app/entrypoint.sh && \ | |
| echo ' if [ -n "${HF_TOKEN}" ]; then' >> /app/entrypoint.sh && \ | |
| echo ' python download_pdfs.py' >> /app/entrypoint.sh && \ | |
| echo ' echo "Running preprocessing..."' >> /app/entrypoint.sh && \ | |
| echo ' python scripts/preprocess_data.py' >> /app/entrypoint.sh && \ | |
| echo ' else' >> /app/entrypoint.sh && \ | |
| echo ' echo "Error: HF_TOKEN environment variable is not set. Cannot download PDFs."' >> /app/entrypoint.sh && \ | |
| echo ' echo "Please set the HF_TOKEN environment variable in your Hugging Face Space settings."' >> /app/entrypoint.sh && \ | |
| echo ' exit 1' >> /app/entrypoint.sh && \ | |
| echo ' fi' >> /app/entrypoint.sh && \ | |
| echo 'else' >> /app/entrypoint.sh && \ | |
| echo ' echo "Using existing pre-processed data"' >> /app/entrypoint.sh && \ | |
| echo 'fi' >> /app/entrypoint.sh && \ | |
| echo 'streamlit run streamlit_app.py --server.address $HOST --server.port $PORT' >> /app/entrypoint.sh && \ | |
| chmod +x /app/entrypoint.sh | |
| # Run the application | |
| ENTRYPOINT ["/app/entrypoint.sh"] |