curiosity-crawler / Dockerfile
MEGAMIND Curiosity Crawler
Fix Dockerfile: add curl, remove optional copy
3e6f343
# MEGAMIND Curiosity Crawler - HuggingFace Spaces Deployment
# Multi-stage build: golang:1.22 builder -> debian:bookworm-slim runtime
# Stage 1: Build
FROM golang:1.22-bookworm AS builder
WORKDIR /build
# Copy go module files first for better caching
COPY go.mod ./
# Copy source code
COPY *.go ./
# Initialize module if not present
RUN if [ ! -f go.mod ]; then go mod init curiosity-crawler; fi
RUN go mod tidy
# Build static binary
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o curiosity-crawler .
# Stage 2: Runtime
FROM debian:bookworm-slim
# Install CA certificates and curl for HTTPS and health checks
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user (HuggingFace Spaces requirement)
RUN useradd -m -u 1000 crawler
RUN mkdir -p /app/data && chown -R crawler:crawler /app
WORKDIR /app
# Copy binary from builder
COPY --from=builder /build/curiosity-crawler /app/curiosity-crawler
RUN chmod +x /app/curiosity-crawler
# W_know will be downloaded on startup from HuggingFace dataset
# No need to copy it here - keeps the image small
# Switch to non-root user
USER crawler
# Environment
ENV WKNOW_PATH=/app/data/w_know.bin
# HuggingFace Spaces requires port 7860
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \
CMD curl -f http://localhost:7860/status || exit 1
# Run the crawler
CMD ["/app/curiosity-crawler"]