FROM python:3.12-slim-bookworm AS build ENV PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PYTHONDONTWRITEBYTECODE=1 \ DEBIAN_FRONTEND=noninteractive \ HF_PORT=7860 RUN apt-get update && apt-get install -y --no-install-recommends curl gnupg git \ && rm -rf /var/lib/apt/lists/* WORKDIR /build # Clone the crawl4ai repo to get deploy/docker files RUN git clone --depth 1 --branch main https://github.com/unclecode/crawl4ai.git /build/crawl4ai # --- Final image --- FROM python:3.12-slim-bookworm ENV PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PYTHONDONTWRITEBYTECODE=1 \ DEBIAN_FRONTEND=noninteractive \ HF_PORT=7860 # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ curl gnupg wget git cmake pkg-config python3-dev libjpeg-dev \ redis-server supervisor \ libglib2.0-0 libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 \ libcups2 libdrm2 libdbus-1-3 libxcb1 libxkbcommon0 libx11-6 \ libxcomposite1 libxdamage1 libxext6 libxfixes3 libxrandr2 \ libgbm1 libpango-1.0-0 libcairo2 libasound2 libatspi2.0-0 \ && apt-get clean && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get dist-upgrade -y \ && rm -rf /var/lib/apt/lists/* # Create non-root user RUN groupadd -r appuser && useradd --no-log-init -r -g appuser appuser \ && mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser WORKDIR /app # Install crawl4ai RUN pip install --no-cache-dir crawl4ai # Install server dependencies RUN pip install --no-cache-dir \ "fastapi>=0.115.12" \ "uvicorn>=0.34.2" \ "gunicorn>=23.0.0" \ "slowapi==0.1.9" \ "prometheus-fastapi-instrumentator>=7.1.0" \ "redis>=5.2.1" \ "jwt>=1.3.1" \ "dnspython>=2.7.0" \ "email-validator==2.2.0" \ "sse-starlette==2.2.1" \ "pydantic>=2.11" \ "rank-bm25==0.2.2" \ "anyio==4.9.0" \ "mcp>=1.18.0" \ "websockets>=15.0.1" \ "httpx[http2]>=0.27.2" \ "psutil>=5.9.0" # Setup Playwright + Chromium RUN crawl4ai-setup && playwright install --with-deps chromium # Copy Playwright browsers to appuser home RUN mkdir -p /home/appuser/.cache/ms-playwright \ && cp -r /root/.cache/ms-playwright/chromium-* /home/appuser/.cache/ms-playwright/ \ && chown -R appuser:appuser /home/appuser/.cache # Copy server code from cloned repo COPY --from=build /build/crawl4ai/deploy/docker/server.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/api.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/auth.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/crawler_pool.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/hook_manager.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/job.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/monitor.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/monitor_routes.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/mcp_bridge.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/schemas.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/utils.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/webhook.py /app/ COPY --from=build /build/crawl4ai/deploy/docker/static /app/static # Copy our custom config and supervisor config COPY config.yml /app/config.yml COPY supervisord.conf /app/supervisord.conf # Create Redis data/log dirs and fix permissions RUN mkdir -p /var/lib/redis /var/log/redis /home/appuser/.cache \ && chown -R appuser:appuser /var/lib/redis /var/log/redis /home/appuser/.cache /app EXPOSE ${HF_PORT} USER appuser HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ CMD curl -f http://localhost:${HF_PORT}/health || exit 1 CMD ["supervisord", "-c", "supervisord.conf"]