Spaces:
Sleeping
Sleeping
| # ============================================================================== | |
| # 🐳 WebScraper.pro — Dockerfile for Hugging Face Free Spaces | |
| # ============================================================================== | |
| # This Dockerfile is highly optimized for Hugging Face Spaces: | |
| # 1. Uses the official Playwright base image matching playwright==1.44.0 | |
| # 2. Runs as a secure, non-root user (UID 1000) as required by HF Spaces | |
| # 3. Ensures write access for SQLite database, logs, and scraped exports | |
| # 4. Serves the Flask app via Gunicorn on the standard HF Space port (7860) | |
| # ============================================================================== | |
| # Use official Playwright Python image. The jammy tag is based on Ubuntu Jammy. | |
| # The version v1.44.0-jammy perfectly matches the playwright version in requirements.txt. | |
| FROM mcr.microsoft.com/playwright/python:v1.44.0-jammy | |
| # Set shell to bash and enable pipefail | |
| SHELL ["/bin/bash", "-o", "pipefail", "-c"] | |
| # Set environment variables for Python, Flask, and Playwright | |
| ENV PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| FLASK_APP=run.py \ | |
| FLASK_ENV=production \ | |
| PLAYWRIGHT_BROWSERS_PATH=/ms-playwright \ | |
| PORT=7860 | |
| # Install additional utility system packages if needed | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| sqlite3 \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set up the working directory inside the container | |
| WORKDIR /app | |
| # Install Python packages | |
| # We copy requirements.txt separately to leverage Docker build cache | |
| COPY requirements.txt /app/ | |
| RUN pip install --no-cache-dir -U pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # Leverage the pre-created 'pwuser' user (UID 1000) already defined in the official Playwright image | |
| # This prevents UID collision errors during useradd and complies with HF Spaces UID 1000 requirement. | |
| RUN mkdir -p /app/database /app/logs /app/exports && \ | |
| chown -R 1000:1000 /app | |
| # Ensure that the Playwright browser files are fully readable/executable by the non-root user. | |
| # In the official Playwright image, browsers are stored in /ms-playwright. | |
| RUN chmod -R 755 /ms-playwright | |
| # Copy the rest of the application files to the container and set ownership to UID 1000 (pwuser) | |
| COPY --chown=1000:1000 . /app | |
| # Switch to the non-root user (UID 1000) | |
| USER 1000 | |
| # Expose the default port expected by Hugging Face Spaces (7860) | |
| EXPOSE 7860 | |
| # Define the command to start the application using Gunicorn | |
| # - --bind 0.0.0.0:7860: Listens on all interfaces on the Hugging Face port | |
| # - --workers 2: Employs 2 worker processes (optimal for HF's 2-vCPU free tier) | |
| # - --timeout 120: High timeout because web scraping tasks can take longer to load/process | |
| CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120", "run:app"] | |