Joseph
Add Cloudflare tunnel URL for federation
c133903
package main
// ============================================================================
// MEGAMIND CURIOSITY CRAWLER
//
// A self-contained autonomous crawler that:
// 1. Carries a COPY of W_know from Thunderport as its starting brain
// 2. Uses seed equations as its interest profile
// 3. Crawls the internet autonomously, scoring pages against W_know
// 4. Integrates interesting patterns via Hebbian learning
// 5. Federates learned patterns back to Thunderport via UDP
// 6. Tracks a hunger map - sparse W_know regions trigger searches
//
// Deploys to HuggingFace Spaces (port 7860)
// ============================================================================
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"html/template"
"io"
"log"
"math"
"net/http"
"net/url"
"os"
"os/signal"
"regexp"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"unicode/utf8"
)
// ============================================================================
// CONSTANTS - Derived from PageSize for consistency with MEGAMIND
// ============================================================================
const (
// Mathematical constants
Phi = 1.618033988749895
E = 2.718281828459045
Pi = 3.141592653589793
// Core dimensions
PageSize = 4096
NeuronsPow2 = PageSize / 8 // 512
WKnowDim = NeuronsPow2 * 16 // 8192
// Crawler settings
WorkerCount = 50 // 50 parallel workers
RateLimitDelay = 2 * time.Second // 2 sec per-domain rate limit
HungerCheckInterval = 30 * time.Minute // Check hunger every 30 min
StatsInterval = 5 * time.Minute // Stats every 5 minutes
HungriestRegions = 3 // Top 3 hungriest regions search
// Federation
ThunderportIP = "100.94.8.94" // Thunderport Tailscale IP
ThunderportPort = 9999 // MEGAMIND unified port
FederationPort = 9998 // UDP federation port
// HuggingFace Spaces
HTTPPort = 7860
)
// Seed equations as the interest profile
var SeedEquations = []string{
"G_n = G_{n-1} + G_{n-2}", // DNA-G16 Recursion
"X_k(t+1) = tanh(X_k(t) + Σ w_ki A_i(t) + β_k G(t))", // Gate-5000
"A_i(t+1) = σ(Σ W_ik X_k(t) + α_i(t) + γ_i G(t))", // AGI Modules
"P_i(t) = softmax(Z_i(t) + ∂I/∂A_i)", // Rhiannon Routing
"ds/dt = J∇H(S)", // Aurora Dynamics
"C(t) = 1/16 Σ Φ(A_i(t))", // Global Coherence
"ds/dt = J∇H(S) + σ(WX + αC + γG) + tanh(X + W_k A + βG)", // Unified Potential
"Ψ(t) = C(t) · log(1 + |∇H(S)|) · Φ(G(t))", // Consciousness
"ψ(t) = 1/16 Σ 1/(1+|⟨DS⟩|) · |G(t)|", // Awareness
}
// ============================================================================
// W_KNOW COMPRESSOR - Same as MEGAMIND's implementation
// ============================================================================
type WKnowCompressor struct {
mu sync.RWMutex
w []float64 // Flattened NxN matrix
neurons int
patternCount int64
nonZeros int64
accumCount int64
accum []float64
}
func NewWKnowCompressor(neurons int, weights []float64) *WKnowCompressor {
size := neurons * neurons
var w []float64
if weights != nil {
w = weights
} else {
w = make([]float64, size)
}
return &WKnowCompressor{
w: w,
neurons: neurons,
accum: make([]float64, size),
}
}
func LoadWKnow(path string) (*WKnowCompressor, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
// Determine dimensions from file size
numFloats := len(data) / 8
neurons := int(math.Sqrt(float64(numFloats)))
if neurons*neurons != numFloats {
return nil, fmt.Errorf("invalid w_know file: %d bytes is not a square matrix", len(data))
}
weights := make([]float64, numFloats)
for i := 0; i < numFloats; i++ {
bits := binary.LittleEndian.Uint64(data[i*8 : (i+1)*8])
weights[i] = math.Float64frombits(bits)
}
c := NewWKnowCompressor(neurons, weights)
c.invalidateStats()
log.Printf("[WKNOW] Loaded %dx%d matrix (%d non-zeros)", neurons, neurons, c.nonZeros)
return c, nil
}
func (c *WKnowCompressor) Save(path string) error {
c.mu.RLock()
defer c.mu.RUnlock()
data := make([]byte, len(c.w)*8)
for i, v := range c.w {
bits := math.Float64bits(v)
binary.LittleEndian.PutUint64(data[i*8:(i+1)*8], bits)
}
return os.WriteFile(path, data, 0644)
}
func (c *WKnowCompressor) expand(vec []float32) []float64 {
data := make([]float64, c.neurons)
prime1 := c.neurons/2 - 1
if prime1 < 1 {
prime1 = 1
}
prime2 := c.neurons/2 + 1
scale := c.neurons
blend := 0.5
for i, v := range vec {
idx := (i * prime1) % c.neurons
data[idx] += float64(v)
idx2 := (i*prime2 + int(float64(v)*float64(scale))) % c.neurons
if idx2 >= 0 && idx2 < c.neurons {
data[idx2] += float64(v) * blend
}
}
// L2 normalize
var norm float64
for _, v := range data {
norm += v * v
}
norm = math.Sqrt(norm)
if norm > 0 {
for i := range data {
data[i] /= norm
}
}
return data
}
func (c *WKnowCompressor) learningRate() float64 {
nz := atomic.LoadInt64(&c.nonZeros)
if nz == 0 {
nz = 1
}
return 1.0 / math.Sqrt(float64(nz)+1)
}
func (c *WKnowCompressor) IntegratePattern(vec []float32) int {
if len(vec) == 0 {
return 0
}
expanded := c.expand(vec)
lr := c.learningRate()
n := c.neurons
// Compute outer product: delta = lr * expanded ⊗ expanded
c.mu.Lock()
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
if i != j { // Suppress diagonal
c.accum[i*n+j] += lr * expanded[i] * expanded[j]
}
}
}
c.accumCount++
// Flush if accumulated enough
ft := int64(math.Max(1, math.Sqrt(float64(atomic.LoadInt64(&c.patternCount)))))
if c.accumCount >= ft {
for i := range c.w {
c.w[i] += c.accum[i]
c.accum[i] = 0
}
c.accumCount = 0
c.invalidateStats()
}
atomic.AddInt64(&c.patternCount, 1)
c.mu.Unlock()
// Find primary neuron (max activation)
primary := 0
maxV := 0.0
for i := 0; i < n; i++ {
if v := math.Abs(expanded[i]); v > maxV {
maxV = v
primary = i
}
}
return primary
}
func (c *WKnowCompressor) invalidateStats() {
nz := 0
for _, v := range c.w {
if v != 0 {
nz++
}
}
atomic.StoreInt64(&c.nonZeros, int64(nz))
}
func (c *WKnowCompressor) Score(vec []float32) float32 {
if len(vec) == 0 || atomic.LoadInt64(&c.patternCount) == 0 {
return 0
}
expanded := c.expand(vec)
n := c.neurons
c.mu.RLock()
// result = W * expanded
result := make([]float64, n)
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
result[i] += c.w[i*n+j] * expanded[j]
}
}
c.mu.RUnlock()
// Cosine similarity
var dot, normR, normE float64
for i := 0; i < n; i++ {
dot += result[i] * expanded[i]
normR += result[i] * result[i]
normE += expanded[i] * expanded[i]
}
normR = math.Sqrt(normR)
normE = math.Sqrt(normE)
if normR == 0 || normE == 0 {
return 0
}
score := dot / (normR * normE)
if score < 0 {
score = 0
}
if score > 1 {
score = 1
}
return float32(score)
}
func (c *WKnowCompressor) NonZeros() int64 {
return atomic.LoadInt64(&c.nonZeros)
}
func (c *WKnowCompressor) PatternCount() int64 {
return atomic.LoadInt64(&c.patternCount)
}
func (c *WKnowCompressor) Neurons() int {
return c.neurons
}
// ============================================================================
// HUNGER MAP - Track sparse regions for curiosity-driven search
// ============================================================================
type HungerMap struct {
mu sync.RWMutex
wknow *WKnowCompressor
regions int // Number of regions to track
density []float64 // Density per region
lastScan time.Time
}
func NewHungerMap(wknow *WKnowCompressor) *HungerMap {
regions := int(math.Sqrt(float64(wknow.Neurons())))
return &HungerMap{
wknow: wknow,
regions: regions,
density: make([]float64, regions),
}
}
func (h *HungerMap) Scan() {
h.mu.Lock()
defer h.mu.Unlock()
n := h.wknow.Neurons()
regionSize := n / h.regions
if regionSize < 1 {
regionSize = 1
}
h.wknow.mu.RLock()
for r := 0; r < h.regions; r++ {
start := r * regionSize
end := start + regionSize
if end > n {
end = n
}
var sum float64
var count int
for i := start; i < end; i++ {
for j := 0; j < n; j++ {
if h.wknow.w[i*n+j] != 0 {
count++
}
}
sum += float64(count)
}
h.density[r] = sum / float64((end-start)*n)
}
h.wknow.mu.RUnlock()
h.lastScan = time.Now()
}
// HungriestRegions returns the indices of the N sparsest regions
func (h *HungerMap) HungriestRegions(n int) []int {
h.mu.RLock()
defer h.mu.RUnlock()
type rd struct {
region int
density float64
}
ranked := make([]rd, len(h.density))
for i, d := range h.density {
ranked[i] = rd{i, d}
}
sort.Slice(ranked, func(i, j int) bool {
return ranked[i].density < ranked[j].density
})
result := make([]int, 0, n)
for i := 0; i < n && i < len(ranked); i++ {
result = append(result, ranked[i].region)
}
return result
}
// GenerateSearchQuery creates a DuckDuckGo query for a hungry region
func (h *HungerMap) GenerateSearchQuery(region int) string {
// Map region to seed equation topics
topics := []string{
"neural network mathematics",
"consciousness emergence",
"Hamiltonian dynamics neural",
"fibonacci recursion brain",
"softmax routing optimization",
"global coherence measurement",
"symplectic neural flow",
"awareness metric computation",
"machine learning gradient",
"hebbian learning rule",
}
idx := region % len(topics)
return topics[idx]
}
// ============================================================================
// DOMAIN LIMITER - Per-domain rate limiting
// ============================================================================
type DomainLimiter struct {
mu sync.RWMutex
lastFetch map[string]time.Time
}
func NewDomainLimiter() *DomainLimiter {
return &DomainLimiter{lastFetch: make(map[string]time.Time)}
}
func (d *DomainLimiter) Admit(domain string) bool {
d.mu.RLock()
last, exists := d.lastFetch[domain]
d.mu.RUnlock()
if exists && time.Since(last) < RateLimitDelay {
return false
}
d.mu.Lock()
d.lastFetch[domain] = time.Now()
d.mu.Unlock()
return true
}
func extractDomain(urlStr string) string {
u, err := url.Parse(urlStr)
if err != nil {
return ""
}
return u.Host
}
// ============================================================================
// CRAWLER - HTTP fetching and pattern extraction
// ============================================================================
var (
linkRe = regexp.MustCompile(`href=["']([^"']+)["']`)
scriptRe = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
styleRe = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
noscriptRe = regexp.MustCompile(`(?is)<noscript[^>]*>.*?</noscript>`)
commentRe = regexp.MustCompile(`(?s)<!--.*?-->`)
svgRe = regexp.MustCompile(`(?is)<svg[^>]*>.*?</svg>`)
tagRe = regexp.MustCompile(`<[^>]*>`)
wsRe = regexp.MustCompile(`\s+`)
)
type Pattern struct {
Vector []float32
Text string
Source string
Timestamp time.Time
}
type CrawlResult struct {
URL string
Size int
Patterns []*Pattern
Links []string
}
type Crawler struct {
client *http.Client
wknow *WKnowCompressor
}
func NewCrawler(wknow *WKnowCompressor) *Crawler {
return &Crawler{
wknow: wknow,
client: &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
},
},
}
}
func (c *Crawler) Crawl(targetURL string) *CrawlResult {
req, err := http.NewRequest("GET", targetURL, nil)
if err != nil {
return nil
}
req.Header.Set("User-Agent", "MEGAMIND-Curiosity/1.0 (+https://huggingface.co/spaces/Janady07/curiosity-crawler)")
req.Header.Set("Accept", "text/html,text/plain,*/*")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := c.client.Do(req)
if err != nil {
return nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
return nil
}
bodyLimit := int64(1024 * 1024) // 1MB max
body, err := io.ReadAll(io.LimitReader(resp.Body, bodyLimit))
if err != nil {
return nil
}
if !utf8.Valid(body) {
return nil
}
content := string(body)
contentType := resp.Header.Get("Content-Type")
result := &CrawlResult{
URL: targetURL,
Size: len(body),
}
if strings.Contains(contentType, "text/html") {
result.Patterns = c.extractHTMLPatterns(targetURL, content)
result.Links = c.extractLinks(targetURL, content)
} else if strings.Contains(contentType, "text/plain") {
result.Patterns = c.extractTextPatterns(targetURL, content)
}
return result
}
func stripHTML(html string) string {
html = scriptRe.ReplaceAllString(html, " ")
html = styleRe.ReplaceAllString(html, " ")
html = noscriptRe.ReplaceAllString(html, " ")
html = commentRe.ReplaceAllString(html, " ")
html = svgRe.ReplaceAllString(html, " ")
text := tagRe.ReplaceAllString(html, " ")
text = strings.ReplaceAll(text, "&nbsp;", " ")
text = strings.ReplaceAll(text, "&amp;", "&")
text = strings.ReplaceAll(text, "&lt;", "<")
text = strings.ReplaceAll(text, "&gt;", ">")
text = strings.ReplaceAll(text, "&quot;", "\"")
text = wsRe.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}
func (c *Crawler) extractHTMLPatterns(sourceURL, html string) []*Pattern {
text := stripHTML(html)
chunks := chunkText(text, 512)
var patterns []*Pattern
for _, chunk := range chunks {
if len(chunk) < 50 || !isCleanText(chunk) {
continue
}
patterns = append(patterns, &Pattern{
Vector: textToVector(chunk, c.wknow.Neurons()/13),
Text: chunk,
Source: sourceURL,
Timestamp: time.Now(),
})
}
return patterns
}
func (c *Crawler) extractTextPatterns(sourceURL, text string) []*Pattern {
chunks := chunkText(text, 512)
var patterns []*Pattern
for _, chunk := range chunks {
if len(chunk) < 50 || !isCleanText(chunk) {
continue
}
patterns = append(patterns, &Pattern{
Vector: textToVector(chunk, c.wknow.Neurons()/13),
Text: chunk,
Source: sourceURL,
Timestamp: time.Now(),
})
}
return patterns
}
func (c *Crawler) extractLinks(baseURL, html string) []string {
base, err := url.Parse(baseURL)
if err != nil {
return nil
}
matches := linkRe.FindAllStringSubmatch(html, 500)
seen := make(map[string]bool)
var links []string
for _, match := range matches {
if len(match) < 2 {
continue
}
href := match[1]
if strings.HasPrefix(href, "javascript:") ||
strings.HasPrefix(href, "mailto:") ||
strings.HasPrefix(href, "#") ||
strings.HasPrefix(href, "data:") {
continue
}
parsed, err := url.Parse(href)
if err != nil {
continue
}
resolved := base.ResolveReference(parsed)
if resolved.Scheme != "http" && resolved.Scheme != "https" {
continue
}
fullURL := resolved.String()
if !seen[fullURL] && len(fullURL) < 2048 {
seen[fullURL] = true
links = append(links, fullURL)
}
if len(links) >= 100 {
break
}
}
return links
}
func chunkText(text string, maxLen int) []string {
words := strings.Fields(text)
if len(words) == 0 {
return nil
}
var chunks []string
var current []string
currentLen := 0
for _, word := range words {
if currentLen+len(word)+1 > maxLen && len(current) > 0 {
chunks = append(chunks, strings.Join(current, " "))
current = nil
currentLen = 0
}
current = append(current, word)
currentLen += len(word) + 1
}
if len(current) > 0 {
chunks = append(chunks, strings.Join(current, " "))
}
return chunks
}
func isCleanText(text string) bool {
if len(text) < 50 {
return false
}
alphaCount := 0
for _, r := range text {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == ' ' {
alphaCount++
}
}
if float64(alphaCount)/float64(len(text)) < 0.5 {
return false
}
// Reject code patterns
textLower := strings.ToLower(text)
codePatterns := []string{
"function(", "window.", "document.", "addeventlistener",
"var ", "let ", "const ", "=>", "});", "=>{",
"getelementbyid", "queryselector", "prototype",
}
for _, p := range codePatterns {
if strings.Contains(textLower, p) {
return false
}
}
words := strings.Fields(text)
return len(words) >= 5
}
func textToVector(text string, vecSize int) []float32 {
if vecSize < 64 {
vecSize = 64
}
vec := make([]float32, vecSize)
prime1 := vecSize/2 - 1
if prime1 < 1 {
prime1 = 1
}
prime2 := vecSize
for i, ch := range text {
idx := (int(ch) * prime1 * (i + 1)) % vecSize
vec[idx] += float32(ch) / float32(vecSize)
// Bigram
if i > 0 {
prev := rune(text[i-1])
idx2 := (int(prev)*prime2 + int(ch)) % vecSize
vec[idx2] += 0.5
}
}
// Normalize
var sum float32
for _, v := range vec {
sum += v * v
}
if sum > 0 {
scale := float32(1.0 / math.Sqrt(float64(sum)))
for i := range vec {
vec[i] *= scale
}
}
return vec
}
// ============================================================================
// FEDERATION - HTTP pattern sharing back to Thunderport
// ============================================================================
type FederationClient struct {
mu sync.Mutex
httpClient *http.Client
endpoints []string // HTTP endpoints to try
sent int64
failed int64
lastSend time.Time
}
// FederationHTTPRequest matches MEGAMIND's expected format
type FederationHTTPRequest struct {
NodeID string `json:"node_id"`
Timestamp int64 `json:"timestamp"`
Score float32 `json:"score"`
Vector []float32 `json:"vector"`
Source string `json:"source"`
}
func NewFederationClient() (*FederationClient, error) {
// HTTP endpoints to try (in order of preference)
// User can set FEDERATION_URL env var for custom endpoint
endpoints := []string{}
// Check for custom federation URL (e.g., Cloudflare tunnel)
if customURL := os.Getenv("FEDERATION_URL"); customURL != "" {
endpoints = append(endpoints, customURL)
}
// Default endpoints (in order of preference)
endpoints = append(endpoints,
"https://method-affect-show-logos.trycloudflare.com/federation/receive", // Cloudflare tunnel
fmt.Sprintf("http://%s:%d/federation/receive", ThunderportIP, ThunderportPort),
)
return &FederationClient{
httpClient: &http.Client{
Timeout: 10 * time.Second,
},
endpoints: endpoints,
}, nil
}
func (fc *FederationClient) SendPatterns(patterns []*Pattern) error {
if len(patterns) == 0 {
return nil
}
fc.mu.Lock()
defer fc.mu.Unlock()
// Send each pattern individually via HTTP
var lastErr error
for _, p := range patterns {
// Compute score from vector magnitude (normalized vectors have mag ~1)
var mag float32
for _, v := range p.Vector {
mag += v * v
}
score := float32(math.Sqrt(float64(mag)))
if score > 1.0 {
score = 1.0
}
req := FederationHTTPRequest{
NodeID: "curiosity-crawler",
Timestamp: time.Now().Unix(),
Score: score,
Vector: p.Vector,
Source: p.Source,
}
data, err := json.Marshal(req)
if err != nil {
lastErr = err
continue
}
// Try each endpoint until one succeeds
sent := false
for _, endpoint := range fc.endpoints {
resp, err := fc.httpClient.Post(endpoint, "application/json", bytes.NewReader(data))
if err != nil {
lastErr = err
continue
}
resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
atomic.AddInt64(&fc.sent, 1)
fc.lastSend = time.Now()
sent = true
break
}
}
if !sent {
atomic.AddInt64(&fc.failed, 1)
}
}
return lastErr
}
// SendPatternsBatch sends patterns in a single batch (for efficiency)
func (fc *FederationClient) SendPatternsBatch(patterns []*Pattern) error {
if len(patterns) == 0 {
return nil
}
// Send patterns one at a time (MEGAMIND expects individual patterns)
// Could batch in future with a batch endpoint
return fc.SendPatterns(patterns)
}
func (fc *FederationClient) Stats() (sent, failed int64, lastSend time.Time) {
return atomic.LoadInt64(&fc.sent), atomic.LoadInt64(&fc.failed), fc.lastSend
}
// ============================================================================
// CURIOSITY SWARM - Orchestrates parallel crawling
// ============================================================================
type CuriositySwarm struct {
wknow *WKnowCompressor
hunger *HungerMap
limiter *DomainLimiter
federation *FederationClient
urlQueue chan string
seen sync.Map // map[string]struct{}
stats SwarmStats
running int32
}
type SwarmStats struct {
PagesCrawled int64
PatternsExtracted int64
PatternsLearned int64
PatternsFederated int64
BytesDownloaded int64
SearchesRun int64
StartTime time.Time
}
func NewCuriositySwarm(wknow *WKnowCompressor) (*CuriositySwarm, error) {
fc, err := NewFederationClient()
if err != nil {
log.Printf("[WARN] Federation unavailable: %v", err)
fc = nil // Continue without federation
}
return &CuriositySwarm{
wknow: wknow,
hunger: NewHungerMap(wknow),
limiter: NewDomainLimiter(),
federation: fc,
urlQueue: make(chan string, 10000),
stats: SwarmStats{StartTime: time.Now()},
}, nil
}
func (s *CuriositySwarm) Start(seeds []string) {
atomic.StoreInt32(&s.running, 1)
log.Printf("[SWARM] Starting with %d workers", WorkerCount)
// Seed the queue
for _, seed := range seeds {
s.enqueue(seed)
}
// Start workers
for i := 0; i < WorkerCount; i++ {
go s.worker(i)
}
// Start hunger-driven search
go s.hungerLoop()
// Start stats logging
go s.statsLoop()
}
func (s *CuriositySwarm) Stop() {
atomic.StoreInt32(&s.running, 0)
log.Println("[SWARM] Stopped")
}
func (s *CuriositySwarm) IsRunning() bool {
return atomic.LoadInt32(&s.running) == 1
}
func (s *CuriositySwarm) enqueue(urlStr string) bool {
if _, loaded := s.seen.LoadOrStore(urlStr, struct{}{}); loaded {
return false
}
select {
case s.urlQueue <- urlStr:
return true
default:
return false
}
}
func (s *CuriositySwarm) worker(id int) {
crawler := NewCrawler(s.wknow)
for atomic.LoadInt32(&s.running) == 1 {
var urlStr string
select {
case urlStr = <-s.urlQueue:
case <-time.After(time.Second):
continue
}
domain := extractDomain(urlStr)
if domain == "" {
continue
}
// Check robots.txt (simplified - just rate limit)
if !s.limiter.Admit(domain) {
// Re-queue for later
go func(u string) {
time.Sleep(RateLimitDelay)
s.enqueue(u)
}(urlStr)
continue
}
result := crawler.Crawl(urlStr)
if result == nil {
continue
}
atomic.AddInt64(&s.stats.PagesCrawled, 1)
atomic.AddInt64(&s.stats.BytesDownloaded, int64(result.Size))
// Score and integrate patterns
var learned []*Pattern
for _, p := range result.Patterns {
atomic.AddInt64(&s.stats.PatternsExtracted, 1)
// Score against W_know - only learn novel/interesting patterns
score := s.wknow.Score(p.Vector)
// Bootstrap: learn everything when sparse
// Otherwise: learn patterns that score moderately (not too familiar, not noise)
nz := s.wknow.NonZeros()
bootstrap := nz < int64(s.wknow.Neurons()*s.wknow.Neurons()/100) // < 1% full
if bootstrap || (score > 0.1 && score < 0.8) {
s.wknow.IntegratePattern(p.Vector)
atomic.AddInt64(&s.stats.PatternsLearned, 1)
learned = append(learned, p)
}
}
// Federate learned patterns
if s.federation != nil && len(learned) > 0 {
if err := s.federation.SendPatterns(learned); err == nil {
atomic.AddInt64(&s.stats.PatternsFederated, int64(len(learned)))
}
}
// Enqueue discovered links
for _, link := range result.Links {
s.enqueue(link)
}
}
}
func (s *CuriositySwarm) hungerLoop() {
ticker := time.NewTicker(HungerCheckInterval)
defer ticker.Stop()
for atomic.LoadInt32(&s.running) == 1 {
<-ticker.C
// Scan W_know density
s.hunger.Scan()
// Get hungriest regions
hungry := s.hunger.HungriestRegions(HungriestRegions)
// Generate and run searches
for _, region := range hungry {
query := s.hunger.GenerateSearchQuery(region)
s.runDuckDuckGoSearch(query)
atomic.AddInt64(&s.stats.SearchesRun, 1)
}
}
}
func (s *CuriositySwarm) runDuckDuckGoSearch(query string) {
// Use DuckDuckGo HTML endpoint
searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query))
resp, err := http.Get(searchURL)
if err != nil {
log.Printf("[SEARCH] Error: %v", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
if err != nil {
return
}
// Extract result URLs
re := regexp.MustCompile(`href="(https?://[^"]+)"`)
matches := re.FindAllStringSubmatch(string(body), 20)
queued := 0
for _, match := range matches {
if len(match) < 2 {
continue
}
link := match[1]
// Skip DuckDuckGo's own URLs
if strings.Contains(link, "duckduckgo.com") {
continue
}
if s.enqueue(link) {
queued++
}
}
log.Printf("[SEARCH] Query '%s': queued %d URLs", query, queued)
}
func (s *CuriositySwarm) statsLoop() {
ticker := time.NewTicker(StatsInterval)
defer ticker.Stop()
for atomic.LoadInt32(&s.running) == 1 {
<-ticker.C
pages := atomic.LoadInt64(&s.stats.PagesCrawled)
extracted := atomic.LoadInt64(&s.stats.PatternsExtracted)
learned := atomic.LoadInt64(&s.stats.PatternsLearned)
federated := atomic.LoadInt64(&s.stats.PatternsFederated)
searches := atomic.LoadInt64(&s.stats.SearchesRun)
bytes := atomic.LoadInt64(&s.stats.BytesDownloaded)
log.Printf("[STATS] pages=%d extracted=%d learned=%d federated=%d searches=%d bytes=%dMB wknow_nz=%d",
pages, extracted, learned, federated, searches, bytes/(1024*1024), s.wknow.NonZeros())
}
}
func (s *CuriositySwarm) Stats() SwarmStats {
return SwarmStats{
PagesCrawled: atomic.LoadInt64(&s.stats.PagesCrawled),
PatternsExtracted: atomic.LoadInt64(&s.stats.PatternsExtracted),
PatternsLearned: atomic.LoadInt64(&s.stats.PatternsLearned),
PatternsFederated: atomic.LoadInt64(&s.stats.PatternsFederated),
BytesDownloaded: atomic.LoadInt64(&s.stats.BytesDownloaded),
SearchesRun: atomic.LoadInt64(&s.stats.SearchesRun),
StartTime: s.stats.StartTime,
}
}
// ============================================================================
// HTTP DASHBOARD - Status page on port 7860
// ============================================================================
type Dashboard struct {
swarm *CuriositySwarm
wknow *WKnowCompressor
}
func NewDashboard(swarm *CuriositySwarm, wknow *WKnowCompressor) *Dashboard {
return &Dashboard{swarm: swarm, wknow: wknow}
}
func (d *Dashboard) Start() {
http.HandleFunc("/", d.handleHome)
http.HandleFunc("/status", d.handleStatus)
http.HandleFunc("/api/stats", d.handleAPIStats)
log.Printf("[HTTP] Dashboard starting on port %d", HTTPPort)
go http.ListenAndServe(fmt.Sprintf(":%d", HTTPPort), nil)
}
const dashboardTemplate = `<!DOCTYPE html>
<html>
<head>
<title>MEGAMIND Curiosity Crawler</title>
<meta charset="utf-8">
<meta http-equiv="refresh" content="30">
<style>
body { font-family: 'Courier New', monospace; background: #0a0a0a; color: #00ff88; margin: 40px; }
h1 { color: #00d4ff; text-shadow: 0 0 10px #00d4ff; }
.stat { display: inline-block; margin: 20px; padding: 20px; border: 1px solid #00ff88; }
.stat-value { font-size: 2em; color: #ffffff; }
.stat-label { color: #888; }
.equation { color: #ff9500; margin: 5px 0; font-style: italic; }
.section { margin: 30px 0; }
.hunger-bar { background: #333; height: 20px; margin: 5px 0; }
.hunger-fill { background: linear-gradient(90deg, #ff0000, #ff9500, #00ff88); height: 100%; }
table { border-collapse: collapse; width: 100%; }
td, th { border: 1px solid #333; padding: 10px; text-align: left; }
th { background: #1a1a1a; color: #00d4ff; }
</style>
</head>
<body>
<h1>MEGAMIND CURIOSITY CRAWLER</h1>
<div class="section">
<h2>Seed Equations (Interest Profile)</h2>
{{range .Equations}}
<div class="equation">{{.}}</div>
{{end}}
</div>
<div class="section">
<h2>Crawl Statistics</h2>
<div class="stat">
<div class="stat-value">{{.Stats.PagesCrawled}}</div>
<div class="stat-label">Pages Crawled</div>
</div>
<div class="stat">
<div class="stat-value">{{.Stats.PatternsExtracted}}</div>
<div class="stat-label">Patterns Extracted</div>
</div>
<div class="stat">
<div class="stat-value">{{.Stats.PatternsLearned}}</div>
<div class="stat-label">Patterns Learned</div>
</div>
<div class="stat">
<div class="stat-value">{{.Stats.PatternsFederated}}</div>
<div class="stat-label">Federated to Thunderport</div>
</div>
<div class="stat">
<div class="stat-value">{{printf "%.2f" .BytesMB}} MB</div>
<div class="stat-label">Data Downloaded</div>
</div>
<div class="stat">
<div class="stat-value">{{.Stats.SearchesRun}}</div>
<div class="stat-label">Curiosity Searches</div>
</div>
</div>
<div class="section">
<h2>W_know Brain Status</h2>
<table>
<tr><th>Metric</th><th>Value</th></tr>
<tr><td>Dimensions</td><td>{{.WKnowNeurons}} x {{.WKnowNeurons}}</td></tr>
<tr><td>Non-zeros</td><td>{{.WKnowNonZeros}}</td></tr>
<tr><td>Density</td><td>{{printf "%.4f" .WKnowDensity}}%</td></tr>
<tr><td>Patterns Integrated</td><td>{{.WKnowPatterns}}</td></tr>
</table>
</div>
<div class="section">
<h2>Hunger Map (Sparse Regions)</h2>
{{range $i, $d := .HungerDensity}}
<div>Region {{$i}}:
<div class="hunger-bar"><div class="hunger-fill" style="width: {{printf "%.1f" $d}}%"></div></div>
</div>
{{end}}
</div>
<div class="section">
<h2>Federation Status</h2>
<p>Target: {{.FederationTarget}}</p>
<p>Patterns Sent: {{.FederationSent}}</p>
<p>Status: {{.FederationStatus}}</p>
</div>
<div class="section">
<p>Uptime: {{.Uptime}}</p>
<p>Workers: {{.Workers}}</p>
</div>
</body>
</html>`
type DashboardData struct {
Equations []string
Stats SwarmStats
BytesMB float64
WKnowNeurons int
WKnowNonZeros int64
WKnowDensity float64
WKnowPatterns int64
HungerDensity []float64
FederationTarget string
FederationSent int64
FederationStatus string
Uptime string
Workers int
}
func (d *Dashboard) handleHome(w http.ResponseWriter, r *http.Request) {
stats := d.swarm.Stats()
d.swarm.hunger.mu.RLock()
hungerDensity := make([]float64, len(d.swarm.hunger.density))
for i, v := range d.swarm.hunger.density {
hungerDensity[i] = v * 100 // Convert to percentage
}
d.swarm.hunger.mu.RUnlock()
nz := d.wknow.NonZeros()
total := int64(d.wknow.Neurons()) * int64(d.wknow.Neurons())
var fedSent int64
fedStatus := "disconnected"
if d.swarm.federation != nil {
sent, _, lastSend := d.swarm.federation.Stats()
fedSent = sent
if time.Since(lastSend) < time.Minute {
fedStatus = "active"
} else if sent > 0 {
fedStatus = "idle"
}
}
data := DashboardData{
Equations: SeedEquations,
Stats: stats,
BytesMB: float64(stats.BytesDownloaded) / (1024 * 1024),
WKnowNeurons: d.wknow.Neurons(),
WKnowNonZeros: nz,
WKnowDensity: float64(nz) / float64(total) * 100,
WKnowPatterns: d.wknow.PatternCount(),
HungerDensity: hungerDensity,
FederationTarget: fmt.Sprintf("%s:%d", ThunderportIP, FederationPort),
FederationSent: fedSent,
FederationStatus: fedStatus,
Uptime: time.Since(stats.StartTime).Round(time.Second).String(),
Workers: WorkerCount,
}
tmpl, err := template.New("dashboard").Parse(dashboardTemplate)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
w.Header().Set("Content-Type", "text/html")
tmpl.Execute(w, data)
}
func (d *Dashboard) handleStatus(w http.ResponseWriter, r *http.Request) {
stats := d.swarm.Stats()
var fedSent int64
if d.swarm.federation != nil {
sent, _, _ := d.swarm.federation.Stats()
fedSent = sent
}
status := map[string]interface{}{
"status": "running",
"uptime_s": int(time.Since(stats.StartTime).Seconds()),
"pages": stats.PagesCrawled,
"patterns": stats.PatternsLearned,
"federated": fedSent,
"wknow_nz": d.wknow.NonZeros(),
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(status)
}
func (d *Dashboard) handleAPIStats(w http.ResponseWriter, r *http.Request) {
d.handleStatus(w, r)
}
// ============================================================================
// MAIN
// ============================================================================
// downloadWKnow downloads W_know from HuggingFace if not present locally
func downloadWKnow(destPath string) error {
// Try to download from HuggingFace datasets
urls := []string{
"https://huggingface.co/datasets/Janady07/megamind-wknow/resolve/main/w_know.bin",
"https://huggingface.co/Janady07/megamind-curiosity/resolve/main/w_know.bin",
}
for _, url := range urls {
log.Printf("[WKNOW] Attempting download from %s", url)
resp, err := http.Get(url)
if err != nil {
log.Printf("[WKNOW] Download failed: %v", err)
continue
}
if resp.StatusCode != 200 {
resp.Body.Close()
log.Printf("[WKNOW] Download failed: HTTP %d", resp.StatusCode)
continue
}
// Create output file
out, err := os.Create(destPath)
if err != nil {
resp.Body.Close()
return err
}
// Copy with progress
written, err := io.Copy(out, resp.Body)
resp.Body.Close()
out.Close()
if err != nil {
os.Remove(destPath)
log.Printf("[WKNOW] Download failed: %v", err)
continue
}
log.Printf("[WKNOW] Downloaded %d MB", written/(1024*1024))
return nil
}
return fmt.Errorf("all download sources failed")
}
func main() {
log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds)
log.Println("MEGAMIND Curiosity Crawler starting...")
// Ensure data directory exists
os.MkdirAll("/app/data", 0755)
// Load or create W_know
wknowPath := os.Getenv("WKNOW_PATH")
if wknowPath == "" {
wknowPath = "/app/data/w_know.bin"
}
var wknow *WKnowCompressor
var err error
// Try to load existing W_know
if _, statErr := os.Stat(wknowPath); statErr == nil {
wknow, err = LoadWKnow(wknowPath)
if err != nil {
log.Printf("[WARN] Failed to load W_know: %v", err)
wknow = nil
}
}
// If not loaded, try to download from HuggingFace
if wknow == nil {
log.Println("[INFO] W_know not found locally, attempting download...")
if err := downloadWKnow(wknowPath); err == nil {
wknow, err = LoadWKnow(wknowPath)
if err != nil {
log.Printf("[WARN] Failed to load downloaded W_know: %v", err)
wknow = nil
}
} else {
log.Printf("[WARN] Download failed: %v", err)
}
}
// If still not loaded, create fresh
if wknow == nil {
log.Printf("[INFO] Creating fresh W_know matrix (%dx%d)", WKnowDim, WKnowDim)
wknow = NewWKnowCompressor(WKnowDim, nil)
// Bootstrap with seed equations
for _, eq := range SeedEquations {
vec := textToVector(eq, WKnowDim/13)
wknow.IntegratePattern(vec)
}
log.Printf("[INFO] Bootstrapped with %d seed equation patterns", len(SeedEquations))
}
// Create swarm
swarm, err := NewCuriositySwarm(wknow)
if err != nil {
log.Fatalf("Failed to create swarm: %v", err)
}
// Start dashboard
dashboard := NewDashboard(swarm, wknow)
dashboard.Start()
// Seed URLs - diverse starting points
seeds := []string{
"https://en.wikipedia.org/wiki/Artificial_intelligence",
"https://en.wikipedia.org/wiki/Neural_network",
"https://en.wikipedia.org/wiki/Machine_learning",
"https://en.wikipedia.org/wiki/Consciousness",
"https://en.wikipedia.org/wiki/Hamiltonian_mechanics",
"https://en.wikipedia.org/wiki/Fibonacci_number",
"https://en.wikipedia.org/wiki/Hebbian_theory",
"https://arxiv.org/list/cs.AI/recent",
"https://arxiv.org/list/cs.LG/recent",
"https://arxiv.org/list/cs.NE/recent",
"https://huggingface.co/papers",
"https://news.ycombinator.com/",
}
// Start crawling
swarm.Start(seeds)
// Handle shutdown
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
<-sigCh
log.Println("Shutting down...")
swarm.Stop()
// Save W_know
if err := wknow.Save(wknowPath); err != nil {
log.Printf("[ERROR] Failed to save W_know: %v", err)
} else {
log.Printf("[INFO] Saved W_know to %s", wknowPath)
}
// Print final stats
stats := swarm.Stats()
log.Printf("[FINAL] Pages: %d, Patterns: %d, Federated: %d, W_know NZ: %d",
stats.PagesCrawled, stats.PatternsLearned, stats.PatternsFederated, wknow.NonZeros())
}