# Dockerfile (place this in the root of your firecrawl project) # 1. Base Image: Use a Node.js LTS version that includes build tools FROM node:18 # 2. Environment Variables ENV PNPM_HOME="/pnpm" ENV PATH="$PNPM_HOME:$PATH" # Prevent apt-get from asking questions ENV DEBIAN_FRONTEND=noninteractive # Set Node environment (can be overridden by supervisor conf or HF secrets) ENV NODE_ENV=production # 3. Install System Dependencies: Redis, Supervisor, Git, and utilities RUN apt-get update && apt-get install -y --no-install-recommends \ redis-server \ supervisor \ git \ curl \ wget \ gnupg \ # Clean up apt cache && rm -rf /var/lib/apt/lists/* # 4. Install pnpm v9+ globally RUN npm install -g pnpm@9 # 5. Set Application Directory WORKDIR /usr/src/app # 6. Copy Package Definitions & Install Dependencies (Leverages Docker cache) # Copy root files first COPY package.json pnpm-lock.yaml ./ # Copy workspace config if it exists (use wildcard *) COPY pnpm-workspace.yaml* ./ # Copy the specific package.json for the api app COPY apps/api/package.json ./apps/api/ # Install ALL monorepo dependencies using the lockfile (includes devDeps needed for playwright) RUN pnpm install --frozen-lockfile # 7. Install Playwright Browsers & Dependencies # This command downloads browsers (e.g., Chromium) AND tries to install needed OS libraries. # Run this using the 'api' package context, assuming playwright is its dependency. # Specify the browser(s) you need (e.g., chromium). Check Firecrawl needs. RUN pnpm --filter api exec playwright install --with-deps chromium # 8. Copy Application Code # Copy the rest of your Firecrawl project code into the image COPY . . # 9. Copy Supervisor Configuration COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf # 10. Configure Environment for Internal Communication (inside the container) # --- !! UPDATED PORT !! --- ENV PORT=7860 \ HOST=0.0.0.0 \ REDIS_URL=redis://localhost:6379 \ REDIS_RATE_LIMIT_URL=redis://localhost:6379 \ USE_DB_AUTHENTICATION=false \ # Add any other required non-secret ENVs here LOGGING_LEVEL=INFO # --- Configure Hugging Face Space specific settings --- # --- !! UPDATED PORT !! --- # Expose the port the internal application will listen on EXPOSE 7860 # Health check endpoint (if Firecrawl has one, e.g., /test or /health) # --- !! UPDATED PORT in HEALTHCHECK !! --- # HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ # CMD curl -f http://localhost:7860/test || exit 1 # (Uncomment and adjust HEALTHCHECK if you know the correct endpoint /test) # 11. Start Supervisor # This command starts supervisord, which in turn starts redis, the api, and the worker(s) based on supervisord.conf CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]