# syntax=docker/dockerfile:1.4 FROM python:3.10-slim AS source ARG HF_API_TOKEN ARG SRC_URL # Ensure git and certificates are available for cloning RUN apt-get update && apt-get install -y --no-install-recommends \ git ca-certificates && rm -rf /var/lib/apt/lists/* # Clone the repository once in its own stage. Files will be moved to /repo # Use a shallow clone to reduce time and bandwidth and make caching more stable # This RUN attempts to read a BuildKit secret at /run/secrets/HF_API_TOKEN, and # falls back to the HF_API_TOKEN environment variable if present. It fails early # with a clear message when no token is provided. RUN --mount=type=secret,id=HF_API_TOKEN,required=false --mount=type=secret,id=SRC_URL,required=false sh -c '\ if [ -f /run/secrets/HF_API_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_API_TOKEN); \ elif [ -f /run/secrets/HF_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_TOKEN); \ elif [ -n "$HF_API_TOKEN" ]; then TOKEN=$HF_API_TOKEN; \ elif [ -n "$HF_TOKEN" ]; then TOKEN=$HF_TOKEN; \ else echo "ERROR: HF token not provided (set BuildKit secret HF_API_TOKEN/HF_TOKEN or HF_API_TOKEN/HF_TOKEN env)"; exit 1; fi && \ # Attempt to clone directly into /repo. If the remote creates a single top-level # directory, detect that and move its contents into /repo so /repo/frontend exists. mkdir -p /repo && \ # Determine source URL: secret at /run/secrets/SRC_URL > ARG SRC_URL if [ -f /run/secrets/SRC_URL ]; then SRC=$(cat /run/secrets/SRC_URL); \ elif [ -n "$SRC_URL" ]; then SRC=$SRC_URL; \ else echo "ERROR: SRC_URL not provided (set BuildKit secret SRC_URL or build-arg SRC_URL)"; exit 1; fi && \ echo "Cloning from $SRC" && \ # Normalize SRC: remove leading http(s):// if present, then insert token credentials if echo "$SRC" | grep -qE '^https?://'; then \ NO_SCHEME=$(echo "$SRC" | sed -E 's#^https?://##'); \ else \ NO_SCHEME="$SRC"; \ fi && \ CLONE_URL="https://__token__:$TOKEN@$NO_SCHEME" && \ git clone --depth 1 "$CLONE_URL" /repo_tmp && \ echo "--- Debug: listing /repo_tmp (show hidden and nested) ---" && \ ls -la /repo_tmp || true && \ # If repo_tmp contains exactly one directory and no other files, move its contents up set -- /repo_tmp/*; count=$#; if [ $count -eq 1 ] && [ -d "$1" ]; then \ echo "--- Single top-level dir detected: moving its contents into /repo ---" && \ mv "$1"/* "$1"/.??* /repo/ 2>/dev/null || true; \ else \ echo "--- Multiple entries detected: moving all into /repo ---" && \ mv /repo_tmp/* /repo/ 2>/dev/null || true; \ mv /repo_tmp/.[!.]* /repo/ 2>/dev/null || true; \ fi && \ rm -rf /repo_tmp/.git && rm -rf /repo_tmp' # Verify the clone succeeded and /repo contains files; fail early with a helpful message RUN [ -d /repo ] && [ "$(ls -A /repo | wc -c)" -gt 0 ] || (echo "ERROR: clone failed or /repo is empty" && exit 1) # --- Stage 1: Build React frontend --- FROM node:20-alpine AS frontend WORKDIR /app/frontend # Install dependencies (copied from the cloned source stage) COPY --from=source /repo/frontend/package*.json ./ COPY --from=source /repo/frontend/package-lock.json ./ RUN npm install --frozen-lockfile # Build frontend (source files copied from the cloned source stage) COPY --from=source /repo/frontend/ ./ RUN npm run build # --- Stage 2: Python backend (CPU only) --- FROM python:3.10-slim AS backend # Environment setup ENV DEBIAN_FRONTEND=noninteractive \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ HF_HOME=/app/.cache/huggingface # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ git curl && \ rm -rf /var/lib/apt/lists/* # Create non-root user RUN useradd -m appuser # Create necessary directories and set permissions RUN mkdir -p /app/.cache/huggingface \ && mkdir -p /app/static \ && chown -R appuser:appuser /app # Switch to non-root user USER appuser WORKDIR /app # Upgrade pip and install Python dependencies COPY --from=source /repo/backend/requirements.txt /app/backend/requirements.txt RUN python3 -m pip install --upgrade pip && \ python3 -m pip install -r /app/backend/requirements.txt # Copy backend code COPY --from=source /repo/backend/ /app/backend/ # Fathom-Search-4B files are now part of the backend app directory # Copy frontend build to static directory COPY --from=frontend /app/frontend/out/ /app/static/ # App-specific environment variables ENV STATIC_DIR=/app/static \ MODEL_ID=FractalAIResearch/Fathom-R1-14B \ PIPELINE_TASK=text-generation \ QUANTIZE=auto \ PORT_SERPER_HOST=2221 \ HOST_SERPER_URL=http://0.0.0.0:2221 \ SERPER_URL=http://0.0.0.0:2221 \ PYTHONPATH=/app/backend/app:/app/backend \ MAX_OUTBOUND=256 \ JINA_CACHE_DIR=/app/.cache/jina_cache \ SERPER_CACHE_DIR=/app/.cache/serper_cache \ BOXED_WRAP_WIDTH=130 \ CRAWL4AI_EP=http://localhost:8080 \ CURL_CA_BUNDLE="" \ REQUESTS_CA_BUNDLE="" \ SSL_VERIFY=false # Create cache directories RUN mkdir -p /app/.cache/jina_cache /app/.cache/serper_cache && \ chown -R appuser:appuser /app/.cache # Optional: Healthcheck endpoint - check both services HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ CMD curl -f http://localhost:7860/docs && curl -f http://localhost:2221/health || exit 1 EXPOSE 7860 2221 # Create startup script with proper service management RUN echo '#!/bin/bash\n\ set -e\n\ \n\ # Cleanup function\n\ cleanup() {\n\ echo "🛑 Shutting down services..."\n\ if [ ! -z "$SERPER_PID" ] && kill -0 $SERPER_PID 2>/dev/null; then\n\ kill $SERPER_PID\n\ echo "✅ Serper service stopped"\n\ fi\n\ if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then\n\ kill $BACKEND_PID\n\ echo "✅ Backend service stopped"\n\ fi\n\ exit 0\n\ }\n\ \n\ # Set up signal handlers\n\ trap cleanup SIGTERM SIGINT\n\ \n\ echo "🚀 Starting FathomPlayground on Hugging Face Spaces"\n\ echo "✅ Environment variables configured:"\n\ echo " HF_MODEL_URL: configured"\n\ echo " HOST_SERPER_URL: configured"\n\ echo " PORT_SERPER_HOST: configured"\n\ echo " HF_API_TOKEN: SET"\n\ echo " SERPER_API_KEY: SET"\n\ echo " OPENAI_API_KEY: SET"\n\ echo " HF_TOKEN: SET"\n\ echo " SUMMARY_HF_MODEL_URL: configured"\n\ echo " CRAWL4AI_EP: configured"\n\ echo " JINA_API_KEY: SET"\n\ echo " JINA_CACHE_DIR: configured"\n\ echo " SERPER_CACHE_DIR: configured"\n\ \n\ echo "🔍 Starting Serper Host Server..."\n\ cd /app/backend/app\n\ python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\ SERPER_PID=$!\n\ echo "✅ Serper service started"\n\ \n\ # Wait for Serper service to be ready\n\ echo "⏳ Waiting for Serper service to be ready..."\n\ for i in {1..30}; do\n\ if curl -s http://localhost:2221/health > /dev/null 2>&1; then\n\ echo "✅ Serper service is ready"\n\ break\n\ fi\n\ if [ $i -eq 30 ]; then\n\ echo "❌ Serper service failed to start within 30 seconds"\n\ cleanup\n\ exit 1\n\ fi\n\ sleep 1\n\ done\n\ \n\ echo "🚀 Starting Backend Service..."\n\ python3 -m uvicorn main:app --host 0.0.0.0 --port 7860 &\n\ BACKEND_PID=$!\n\ echo "✅ Backend service started on port 7860 (PID: $BACKEND_PID)"\n\ \n\ # Monitor both services\n\ while true; do\n\ if ! kill -0 $SERPER_PID 2>/dev/null; then\n\ echo "❌ Serper service died, restarting..."\n\ python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\ SERPER_PID=$!\n\ echo "✅ Serper service restarted (PID: $SERPER_PID)"\n\ fi\n\ if ! kill -0 $BACKEND_PID 2>/dev/null; then\n\ echo "❌ Backend service died, exiting..."\n\ cleanup\n\ exit 1\n\ fi\n\ sleep 5\n\ done' > /app/start.sh && \ chmod +x /app/start.sh && \ chown appuser:appuser /app/start.sh ENTRYPOINT ["/app/start.sh"]