File size: 1,690 Bytes
2ca3b0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Dockerfile.browser -- Browser-enabled HF Space image for web scraping
# ======================================================================
#
# NOT ACTIVE YET -- this is a template for when we need browser-based
# scraping on HF Spaces (e.g., scraping odds pages with JS rendering).
#
# Current HF Spaces use the default Python runtime without browser deps.
# To activate: rename to Dockerfile and push to the target Space.
#
# Requirements:
#   - HF Space must be configured as "Docker" SDK (not Gradio SDK)
#   - The Space will be larger (~2GB) due to Chromium
#   - CPU-only is fine for scraping (no GPU needed)
#
# Size estimate: ~2.5GB image (Playwright + Chromium + Python deps)

FROM python:3.11-slim-bookworm

# Install system deps for Playwright/Chromium
RUN apt-get update && apt-get install -y --no-install-recommends \
    wget \
    ca-certificates \
    fonts-liberation \
    libasound2 \
    libatk-bridge2.0-0 \
    libatk1.0-0 \
    libcups2 \
    libdbus-1-3 \
    libdrm2 \
    libgbm1 \
    libgtk-3-0 \
    libnspr4 \
    libnss3 \
    libx11-xcb1 \
    libxcomposite1 \
    libxdamage1 \
    libxrandr2 \
    xdg-utils \
    && rm -rf /var/lib/apt/lists/*

# Create non-root user (HF Spaces requirement)
RUN useradd -m -u 1000 user
WORKDIR /home/user/app

# Install Python deps
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Install Playwright and Chromium browser
RUN pip install --no-cache-dir crawl4ai>=0.4 playwright
RUN playwright install chromium
RUN playwright install-deps chromium

# Copy application code
COPY . .

# Fix permissions
RUN chown -R user:user /home/user/app

USER user

EXPOSE 7860

CMD ["python", "app.py"]