File size: 1,502 Bytes
003a967
 
 
 
 
 
 
 
 
3e6f343
003a967
 
 
 
 
 
 
 
 
 
 
 
 
 
3e6f343
003a967
3e6f343
003a967
 
 
 
 
 
 
 
 
 
 
 
3e6f343
 
003a967
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# MEGAMIND Curiosity Crawler - HuggingFace Spaces Deployment
# Multi-stage build: golang:1.22 builder -> debian:bookworm-slim runtime

# Stage 1: Build
FROM golang:1.22-bookworm AS builder

WORKDIR /build

# Copy go module files first for better caching
COPY go.mod ./

# Copy source code
COPY *.go ./

# Initialize module if not present
RUN if [ ! -f go.mod ]; then go mod init curiosity-crawler; fi
RUN go mod tidy

# Build static binary
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o curiosity-crawler .

# Stage 2: Runtime
FROM debian:bookworm-slim

# Install CA certificates and curl for HTTPS and health checks
RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates curl \
    && rm -rf /var/lib/apt/lists/*

# Create non-root user (HuggingFace Spaces requirement)
RUN useradd -m -u 1000 crawler
RUN mkdir -p /app/data && chown -R crawler:crawler /app

WORKDIR /app

# Copy binary from builder
COPY --from=builder /build/curiosity-crawler /app/curiosity-crawler
RUN chmod +x /app/curiosity-crawler

# W_know will be downloaded on startup from HuggingFace dataset
# No need to copy it here - keeps the image small

# Switch to non-root user
USER crawler

# Environment
ENV WKNOW_PATH=/app/data/w_know.bin

# HuggingFace Spaces requires port 7860
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \
    CMD curl -f http://localhost:7860/status || exit 1

# Run the crawler
CMD ["/app/curiosity-crawler"]