Spaces:
Paused
Paused
| # MEGAMIND Curiosity Crawler - HuggingFace Spaces Deployment | |
| # Multi-stage build: golang:1.22 builder -> debian:bookworm-slim runtime | |
| # Stage 1: Build | |
| FROM golang:1.22-bookworm AS builder | |
| WORKDIR /build | |
| # Copy go module files first for better caching | |
| COPY go.mod ./ | |
| # Copy source code | |
| COPY *.go ./ | |
| # Initialize module if not present | |
| RUN if [ ! -f go.mod ]; then go mod init curiosity-crawler; fi | |
| RUN go mod tidy | |
| # Build static binary | |
| RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o curiosity-crawler . | |
| # Stage 2: Runtime | |
| FROM debian:bookworm-slim | |
| # Install CA certificates and curl for HTTPS and health checks | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| ca-certificates curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Create non-root user (HuggingFace Spaces requirement) | |
| RUN useradd -m -u 1000 crawler | |
| RUN mkdir -p /app/data && chown -R crawler:crawler /app | |
| WORKDIR /app | |
| # Copy binary from builder | |
| COPY --from=builder /build/curiosity-crawler /app/curiosity-crawler | |
| RUN chmod +x /app/curiosity-crawler | |
| # W_know will be downloaded on startup from HuggingFace dataset | |
| # No need to copy it here - keeps the image small | |
| # Switch to non-root user | |
| USER crawler | |
| # Environment | |
| ENV WKNOW_PATH=/app/data/w_know.bin | |
| # HuggingFace Spaces requires port 7860 | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \ | |
| CMD curl -f http://localhost:7860/status || exit 1 | |
| # Run the crawler | |
| CMD ["/app/curiosity-crawler"] | |