Spaces:
Paused
Paused
| package main | |
| // ============================================================================ | |
| // MEGAMIND CURIOSITY CRAWLER | |
| // | |
| // A self-contained autonomous crawler that: | |
| // 1. Carries a COPY of W_know from Thunderport as its starting brain | |
| // 2. Uses seed equations as its interest profile | |
| // 3. Crawls the internet autonomously, scoring pages against W_know | |
| // 4. Integrates interesting patterns via Hebbian learning | |
| // 5. Federates learned patterns back to Thunderport via UDP | |
| // 6. Tracks a hunger map - sparse W_know regions trigger searches | |
| // | |
| // Deploys to HuggingFace Spaces (port 7860) | |
| // ============================================================================ | |
| import ( | |
| "bytes" | |
| "encoding/binary" | |
| "encoding/json" | |
| "fmt" | |
| "html/template" | |
| "io" | |
| "log" | |
| "math" | |
| "net/http" | |
| "net/url" | |
| "os" | |
| "os/signal" | |
| "regexp" | |
| "sort" | |
| "strings" | |
| "sync" | |
| "sync/atomic" | |
| "syscall" | |
| "time" | |
| "unicode/utf8" | |
| ) | |
| // ============================================================================ | |
| // CONSTANTS - Derived from PageSize for consistency with MEGAMIND | |
| // ============================================================================ | |
| const ( | |
| // Mathematical constants | |
| Phi = 1.618033988749895 | |
| E = 2.718281828459045 | |
| Pi = 3.141592653589793 | |
| // Core dimensions | |
| PageSize = 4096 | |
| NeuronsPow2 = PageSize / 8 // 512 | |
| WKnowDim = NeuronsPow2 * 16 // 8192 | |
| // Crawler settings | |
| WorkerCount = 50 // 50 parallel workers | |
| RateLimitDelay = 2 * time.Second // 2 sec per-domain rate limit | |
| HungerCheckInterval = 30 * time.Minute // Check hunger every 30 min | |
| StatsInterval = 5 * time.Minute // Stats every 5 minutes | |
| HungriestRegions = 3 // Top 3 hungriest regions search | |
| // Federation | |
| ThunderportIP = "100.94.8.94" // Thunderport Tailscale IP | |
| ThunderportPort = 9999 // MEGAMIND unified port | |
| FederationPort = 9998 // UDP federation port | |
| // HuggingFace Spaces | |
| HTTPPort = 7860 | |
| ) | |
| // Seed equations as the interest profile | |
| var SeedEquations = []string{ | |
| "G_n = G_{n-1} + G_{n-2}", // DNA-G16 Recursion | |
| "X_k(t+1) = tanh(X_k(t) + Σ w_ki A_i(t) + β_k G(t))", // Gate-5000 | |
| "A_i(t+1) = σ(Σ W_ik X_k(t) + α_i(t) + γ_i G(t))", // AGI Modules | |
| "P_i(t) = softmax(Z_i(t) + ∂I/∂A_i)", // Rhiannon Routing | |
| "ds/dt = J∇H(S)", // Aurora Dynamics | |
| "C(t) = 1/16 Σ Φ(A_i(t))", // Global Coherence | |
| "ds/dt = J∇H(S) + σ(WX + αC + γG) + tanh(X + W_k A + βG)", // Unified Potential | |
| "Ψ(t) = C(t) · log(1 + |∇H(S)|) · Φ(G(t))", // Consciousness | |
| "ψ(t) = 1/16 Σ 1/(1+|⟨DS⟩|) · |G(t)|", // Awareness | |
| } | |
| // ============================================================================ | |
| // W_KNOW COMPRESSOR - Same as MEGAMIND's implementation | |
| // ============================================================================ | |
| type WKnowCompressor struct { | |
| mu sync.RWMutex | |
| w []float64 // Flattened NxN matrix | |
| neurons int | |
| patternCount int64 | |
| nonZeros int64 | |
| accumCount int64 | |
| accum []float64 | |
| } | |
| func NewWKnowCompressor(neurons int, weights []float64) *WKnowCompressor { | |
| size := neurons * neurons | |
| var w []float64 | |
| if weights != nil { | |
| w = weights | |
| } else { | |
| w = make([]float64, size) | |
| } | |
| return &WKnowCompressor{ | |
| w: w, | |
| neurons: neurons, | |
| accum: make([]float64, size), | |
| } | |
| } | |
| func LoadWKnow(path string) (*WKnowCompressor, error) { | |
| data, err := os.ReadFile(path) | |
| if err != nil { | |
| return nil, err | |
| } | |
| // Determine dimensions from file size | |
| numFloats := len(data) / 8 | |
| neurons := int(math.Sqrt(float64(numFloats))) | |
| if neurons*neurons != numFloats { | |
| return nil, fmt.Errorf("invalid w_know file: %d bytes is not a square matrix", len(data)) | |
| } | |
| weights := make([]float64, numFloats) | |
| for i := 0; i < numFloats; i++ { | |
| bits := binary.LittleEndian.Uint64(data[i*8 : (i+1)*8]) | |
| weights[i] = math.Float64frombits(bits) | |
| } | |
| c := NewWKnowCompressor(neurons, weights) | |
| c.invalidateStats() | |
| log.Printf("[WKNOW] Loaded %dx%d matrix (%d non-zeros)", neurons, neurons, c.nonZeros) | |
| return c, nil | |
| } | |
| func (c *WKnowCompressor) Save(path string) error { | |
| c.mu.RLock() | |
| defer c.mu.RUnlock() | |
| data := make([]byte, len(c.w)*8) | |
| for i, v := range c.w { | |
| bits := math.Float64bits(v) | |
| binary.LittleEndian.PutUint64(data[i*8:(i+1)*8], bits) | |
| } | |
| return os.WriteFile(path, data, 0644) | |
| } | |
| func (c *WKnowCompressor) expand(vec []float32) []float64 { | |
| data := make([]float64, c.neurons) | |
| prime1 := c.neurons/2 - 1 | |
| if prime1 < 1 { | |
| prime1 = 1 | |
| } | |
| prime2 := c.neurons/2 + 1 | |
| scale := c.neurons | |
| blend := 0.5 | |
| for i, v := range vec { | |
| idx := (i * prime1) % c.neurons | |
| data[idx] += float64(v) | |
| idx2 := (i*prime2 + int(float64(v)*float64(scale))) % c.neurons | |
| if idx2 >= 0 && idx2 < c.neurons { | |
| data[idx2] += float64(v) * blend | |
| } | |
| } | |
| // L2 normalize | |
| var norm float64 | |
| for _, v := range data { | |
| norm += v * v | |
| } | |
| norm = math.Sqrt(norm) | |
| if norm > 0 { | |
| for i := range data { | |
| data[i] /= norm | |
| } | |
| } | |
| return data | |
| } | |
| func (c *WKnowCompressor) learningRate() float64 { | |
| nz := atomic.LoadInt64(&c.nonZeros) | |
| if nz == 0 { | |
| nz = 1 | |
| } | |
| return 1.0 / math.Sqrt(float64(nz)+1) | |
| } | |
| func (c *WKnowCompressor) IntegratePattern(vec []float32) int { | |
| if len(vec) == 0 { | |
| return 0 | |
| } | |
| expanded := c.expand(vec) | |
| lr := c.learningRate() | |
| n := c.neurons | |
| // Compute outer product: delta = lr * expanded ⊗ expanded | |
| c.mu.Lock() | |
| for i := 0; i < n; i++ { | |
| for j := 0; j < n; j++ { | |
| if i != j { // Suppress diagonal | |
| c.accum[i*n+j] += lr * expanded[i] * expanded[j] | |
| } | |
| } | |
| } | |
| c.accumCount++ | |
| // Flush if accumulated enough | |
| ft := int64(math.Max(1, math.Sqrt(float64(atomic.LoadInt64(&c.patternCount))))) | |
| if c.accumCount >= ft { | |
| for i := range c.w { | |
| c.w[i] += c.accum[i] | |
| c.accum[i] = 0 | |
| } | |
| c.accumCount = 0 | |
| c.invalidateStats() | |
| } | |
| atomic.AddInt64(&c.patternCount, 1) | |
| c.mu.Unlock() | |
| // Find primary neuron (max activation) | |
| primary := 0 | |
| maxV := 0.0 | |
| for i := 0; i < n; i++ { | |
| if v := math.Abs(expanded[i]); v > maxV { | |
| maxV = v | |
| primary = i | |
| } | |
| } | |
| return primary | |
| } | |
| func (c *WKnowCompressor) invalidateStats() { | |
| nz := 0 | |
| for _, v := range c.w { | |
| if v != 0 { | |
| nz++ | |
| } | |
| } | |
| atomic.StoreInt64(&c.nonZeros, int64(nz)) | |
| } | |
| func (c *WKnowCompressor) Score(vec []float32) float32 { | |
| if len(vec) == 0 || atomic.LoadInt64(&c.patternCount) == 0 { | |
| return 0 | |
| } | |
| expanded := c.expand(vec) | |
| n := c.neurons | |
| c.mu.RLock() | |
| // result = W * expanded | |
| result := make([]float64, n) | |
| for i := 0; i < n; i++ { | |
| for j := 0; j < n; j++ { | |
| result[i] += c.w[i*n+j] * expanded[j] | |
| } | |
| } | |
| c.mu.RUnlock() | |
| // Cosine similarity | |
| var dot, normR, normE float64 | |
| for i := 0; i < n; i++ { | |
| dot += result[i] * expanded[i] | |
| normR += result[i] * result[i] | |
| normE += expanded[i] * expanded[i] | |
| } | |
| normR = math.Sqrt(normR) | |
| normE = math.Sqrt(normE) | |
| if normR == 0 || normE == 0 { | |
| return 0 | |
| } | |
| score := dot / (normR * normE) | |
| if score < 0 { | |
| score = 0 | |
| } | |
| if score > 1 { | |
| score = 1 | |
| } | |
| return float32(score) | |
| } | |
| func (c *WKnowCompressor) NonZeros() int64 { | |
| return atomic.LoadInt64(&c.nonZeros) | |
| } | |
| func (c *WKnowCompressor) PatternCount() int64 { | |
| return atomic.LoadInt64(&c.patternCount) | |
| } | |
| func (c *WKnowCompressor) Neurons() int { | |
| return c.neurons | |
| } | |
| // ============================================================================ | |
| // HUNGER MAP - Track sparse regions for curiosity-driven search | |
| // ============================================================================ | |
| type HungerMap struct { | |
| mu sync.RWMutex | |
| wknow *WKnowCompressor | |
| regions int // Number of regions to track | |
| density []float64 // Density per region | |
| lastScan time.Time | |
| } | |
| func NewHungerMap(wknow *WKnowCompressor) *HungerMap { | |
| regions := int(math.Sqrt(float64(wknow.Neurons()))) | |
| return &HungerMap{ | |
| wknow: wknow, | |
| regions: regions, | |
| density: make([]float64, regions), | |
| } | |
| } | |
| func (h *HungerMap) Scan() { | |
| h.mu.Lock() | |
| defer h.mu.Unlock() | |
| n := h.wknow.Neurons() | |
| regionSize := n / h.regions | |
| if regionSize < 1 { | |
| regionSize = 1 | |
| } | |
| h.wknow.mu.RLock() | |
| for r := 0; r < h.regions; r++ { | |
| start := r * regionSize | |
| end := start + regionSize | |
| if end > n { | |
| end = n | |
| } | |
| var sum float64 | |
| var count int | |
| for i := start; i < end; i++ { | |
| for j := 0; j < n; j++ { | |
| if h.wknow.w[i*n+j] != 0 { | |
| count++ | |
| } | |
| } | |
| sum += float64(count) | |
| } | |
| h.density[r] = sum / float64((end-start)*n) | |
| } | |
| h.wknow.mu.RUnlock() | |
| h.lastScan = time.Now() | |
| } | |
| // HungriestRegions returns the indices of the N sparsest regions | |
| func (h *HungerMap) HungriestRegions(n int) []int { | |
| h.mu.RLock() | |
| defer h.mu.RUnlock() | |
| type rd struct { | |
| region int | |
| density float64 | |
| } | |
| ranked := make([]rd, len(h.density)) | |
| for i, d := range h.density { | |
| ranked[i] = rd{i, d} | |
| } | |
| sort.Slice(ranked, func(i, j int) bool { | |
| return ranked[i].density < ranked[j].density | |
| }) | |
| result := make([]int, 0, n) | |
| for i := 0; i < n && i < len(ranked); i++ { | |
| result = append(result, ranked[i].region) | |
| } | |
| return result | |
| } | |
| // GenerateSearchQuery creates a DuckDuckGo query for a hungry region | |
| func (h *HungerMap) GenerateSearchQuery(region int) string { | |
| // Map region to seed equation topics | |
| topics := []string{ | |
| "neural network mathematics", | |
| "consciousness emergence", | |
| "Hamiltonian dynamics neural", | |
| "fibonacci recursion brain", | |
| "softmax routing optimization", | |
| "global coherence measurement", | |
| "symplectic neural flow", | |
| "awareness metric computation", | |
| "machine learning gradient", | |
| "hebbian learning rule", | |
| } | |
| idx := region % len(topics) | |
| return topics[idx] | |
| } | |
| // ============================================================================ | |
| // DOMAIN LIMITER - Per-domain rate limiting | |
| // ============================================================================ | |
| type DomainLimiter struct { | |
| mu sync.RWMutex | |
| lastFetch map[string]time.Time | |
| } | |
| func NewDomainLimiter() *DomainLimiter { | |
| return &DomainLimiter{lastFetch: make(map[string]time.Time)} | |
| } | |
| func (d *DomainLimiter) Admit(domain string) bool { | |
| d.mu.RLock() | |
| last, exists := d.lastFetch[domain] | |
| d.mu.RUnlock() | |
| if exists && time.Since(last) < RateLimitDelay { | |
| return false | |
| } | |
| d.mu.Lock() | |
| d.lastFetch[domain] = time.Now() | |
| d.mu.Unlock() | |
| return true | |
| } | |
| func extractDomain(urlStr string) string { | |
| u, err := url.Parse(urlStr) | |
| if err != nil { | |
| return "" | |
| } | |
| return u.Host | |
| } | |
| // ============================================================================ | |
| // CRAWLER - HTTP fetching and pattern extraction | |
| // ============================================================================ | |
| var ( | |
| linkRe = regexp.MustCompile(`href=["']([^"']+)["']`) | |
| scriptRe = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`) | |
| styleRe = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`) | |
| noscriptRe = regexp.MustCompile(`(?is)<noscript[^>]*>.*?</noscript>`) | |
| commentRe = regexp.MustCompile(`(?s)<!--.*?-->`) | |
| svgRe = regexp.MustCompile(`(?is)<svg[^>]*>.*?</svg>`) | |
| tagRe = regexp.MustCompile(`<[^>]*>`) | |
| wsRe = regexp.MustCompile(`\s+`) | |
| ) | |
| type Pattern struct { | |
| Vector []float32 | |
| Text string | |
| Source string | |
| Timestamp time.Time | |
| } | |
| type CrawlResult struct { | |
| URL string | |
| Size int | |
| Patterns []*Pattern | |
| Links []string | |
| } | |
| type Crawler struct { | |
| client *http.Client | |
| wknow *WKnowCompressor | |
| } | |
| func NewCrawler(wknow *WKnowCompressor) *Crawler { | |
| return &Crawler{ | |
| wknow: wknow, | |
| client: &http.Client{ | |
| Timeout: 30 * time.Second, | |
| Transport: &http.Transport{ | |
| MaxIdleConns: 100, | |
| MaxIdleConnsPerHost: 10, | |
| IdleConnTimeout: 90 * time.Second, | |
| }, | |
| }, | |
| } | |
| } | |
| func (c *Crawler) Crawl(targetURL string) *CrawlResult { | |
| req, err := http.NewRequest("GET", targetURL, nil) | |
| if err != nil { | |
| return nil | |
| } | |
| req.Header.Set("User-Agent", "MEGAMIND-Curiosity/1.0 (+https://huggingface.co/spaces/Janady07/curiosity-crawler)") | |
| req.Header.Set("Accept", "text/html,text/plain,*/*") | |
| req.Header.Set("Accept-Language", "en-US,en;q=0.9") | |
| resp, err := c.client.Do(req) | |
| if err != nil { | |
| return nil | |
| } | |
| defer resp.Body.Close() | |
| if resp.StatusCode != http.StatusOK { | |
| io.Copy(io.Discard, io.LimitReader(resp.Body, 1024)) | |
| return nil | |
| } | |
| bodyLimit := int64(1024 * 1024) // 1MB max | |
| body, err := io.ReadAll(io.LimitReader(resp.Body, bodyLimit)) | |
| if err != nil { | |
| return nil | |
| } | |
| if !utf8.Valid(body) { | |
| return nil | |
| } | |
| content := string(body) | |
| contentType := resp.Header.Get("Content-Type") | |
| result := &CrawlResult{ | |
| URL: targetURL, | |
| Size: len(body), | |
| } | |
| if strings.Contains(contentType, "text/html") { | |
| result.Patterns = c.extractHTMLPatterns(targetURL, content) | |
| result.Links = c.extractLinks(targetURL, content) | |
| } else if strings.Contains(contentType, "text/plain") { | |
| result.Patterns = c.extractTextPatterns(targetURL, content) | |
| } | |
| return result | |
| } | |
| func stripHTML(html string) string { | |
| html = scriptRe.ReplaceAllString(html, " ") | |
| html = styleRe.ReplaceAllString(html, " ") | |
| html = noscriptRe.ReplaceAllString(html, " ") | |
| html = commentRe.ReplaceAllString(html, " ") | |
| html = svgRe.ReplaceAllString(html, " ") | |
| text := tagRe.ReplaceAllString(html, " ") | |
| text = strings.ReplaceAll(text, " ", " ") | |
| text = strings.ReplaceAll(text, "&", "&") | |
| text = strings.ReplaceAll(text, "<", "<") | |
| text = strings.ReplaceAll(text, ">", ">") | |
| text = strings.ReplaceAll(text, """, "\"") | |
| text = wsRe.ReplaceAllString(text, " ") | |
| return strings.TrimSpace(text) | |
| } | |
| func (c *Crawler) extractHTMLPatterns(sourceURL, html string) []*Pattern { | |
| text := stripHTML(html) | |
| chunks := chunkText(text, 512) | |
| var patterns []*Pattern | |
| for _, chunk := range chunks { | |
| if len(chunk) < 50 || !isCleanText(chunk) { | |
| continue | |
| } | |
| patterns = append(patterns, &Pattern{ | |
| Vector: textToVector(chunk, c.wknow.Neurons()/13), | |
| Text: chunk, | |
| Source: sourceURL, | |
| Timestamp: time.Now(), | |
| }) | |
| } | |
| return patterns | |
| } | |
| func (c *Crawler) extractTextPatterns(sourceURL, text string) []*Pattern { | |
| chunks := chunkText(text, 512) | |
| var patterns []*Pattern | |
| for _, chunk := range chunks { | |
| if len(chunk) < 50 || !isCleanText(chunk) { | |
| continue | |
| } | |
| patterns = append(patterns, &Pattern{ | |
| Vector: textToVector(chunk, c.wknow.Neurons()/13), | |
| Text: chunk, | |
| Source: sourceURL, | |
| Timestamp: time.Now(), | |
| }) | |
| } | |
| return patterns | |
| } | |
| func (c *Crawler) extractLinks(baseURL, html string) []string { | |
| base, err := url.Parse(baseURL) | |
| if err != nil { | |
| return nil | |
| } | |
| matches := linkRe.FindAllStringSubmatch(html, 500) | |
| seen := make(map[string]bool) | |
| var links []string | |
| for _, match := range matches { | |
| if len(match) < 2 { | |
| continue | |
| } | |
| href := match[1] | |
| if strings.HasPrefix(href, "javascript:") || | |
| strings.HasPrefix(href, "mailto:") || | |
| strings.HasPrefix(href, "#") || | |
| strings.HasPrefix(href, "data:") { | |
| continue | |
| } | |
| parsed, err := url.Parse(href) | |
| if err != nil { | |
| continue | |
| } | |
| resolved := base.ResolveReference(parsed) | |
| if resolved.Scheme != "http" && resolved.Scheme != "https" { | |
| continue | |
| } | |
| fullURL := resolved.String() | |
| if !seen[fullURL] && len(fullURL) < 2048 { | |
| seen[fullURL] = true | |
| links = append(links, fullURL) | |
| } | |
| if len(links) >= 100 { | |
| break | |
| } | |
| } | |
| return links | |
| } | |
| func chunkText(text string, maxLen int) []string { | |
| words := strings.Fields(text) | |
| if len(words) == 0 { | |
| return nil | |
| } | |
| var chunks []string | |
| var current []string | |
| currentLen := 0 | |
| for _, word := range words { | |
| if currentLen+len(word)+1 > maxLen && len(current) > 0 { | |
| chunks = append(chunks, strings.Join(current, " ")) | |
| current = nil | |
| currentLen = 0 | |
| } | |
| current = append(current, word) | |
| currentLen += len(word) + 1 | |
| } | |
| if len(current) > 0 { | |
| chunks = append(chunks, strings.Join(current, " ")) | |
| } | |
| return chunks | |
| } | |
| func isCleanText(text string) bool { | |
| if len(text) < 50 { | |
| return false | |
| } | |
| alphaCount := 0 | |
| for _, r := range text { | |
| if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == ' ' { | |
| alphaCount++ | |
| } | |
| } | |
| if float64(alphaCount)/float64(len(text)) < 0.5 { | |
| return false | |
| } | |
| // Reject code patterns | |
| textLower := strings.ToLower(text) | |
| codePatterns := []string{ | |
| "function(", "window.", "document.", "addeventlistener", | |
| "var ", "let ", "const ", "=>", "});", "=>{", | |
| "getelementbyid", "queryselector", "prototype", | |
| } | |
| for _, p := range codePatterns { | |
| if strings.Contains(textLower, p) { | |
| return false | |
| } | |
| } | |
| words := strings.Fields(text) | |
| return len(words) >= 5 | |
| } | |
| func textToVector(text string, vecSize int) []float32 { | |
| if vecSize < 64 { | |
| vecSize = 64 | |
| } | |
| vec := make([]float32, vecSize) | |
| prime1 := vecSize/2 - 1 | |
| if prime1 < 1 { | |
| prime1 = 1 | |
| } | |
| prime2 := vecSize | |
| for i, ch := range text { | |
| idx := (int(ch) * prime1 * (i + 1)) % vecSize | |
| vec[idx] += float32(ch) / float32(vecSize) | |
| // Bigram | |
| if i > 0 { | |
| prev := rune(text[i-1]) | |
| idx2 := (int(prev)*prime2 + int(ch)) % vecSize | |
| vec[idx2] += 0.5 | |
| } | |
| } | |
| // Normalize | |
| var sum float32 | |
| for _, v := range vec { | |
| sum += v * v | |
| } | |
| if sum > 0 { | |
| scale := float32(1.0 / math.Sqrt(float64(sum))) | |
| for i := range vec { | |
| vec[i] *= scale | |
| } | |
| } | |
| return vec | |
| } | |
| // ============================================================================ | |
| // FEDERATION - HTTP pattern sharing back to Thunderport | |
| // ============================================================================ | |
| type FederationClient struct { | |
| mu sync.Mutex | |
| httpClient *http.Client | |
| endpoints []string // HTTP endpoints to try | |
| sent int64 | |
| failed int64 | |
| lastSend time.Time | |
| } | |
| // FederationHTTPRequest matches MEGAMIND's expected format | |
| type FederationHTTPRequest struct { | |
| NodeID string `json:"node_id"` | |
| Timestamp int64 `json:"timestamp"` | |
| Score float32 `json:"score"` | |
| Vector []float32 `json:"vector"` | |
| Source string `json:"source"` | |
| } | |
| func NewFederationClient() (*FederationClient, error) { | |
| // HTTP endpoints to try (in order of preference) | |
| // User can set FEDERATION_URL env var for custom endpoint | |
| endpoints := []string{} | |
| // Check for custom federation URL (e.g., Cloudflare tunnel) | |
| if customURL := os.Getenv("FEDERATION_URL"); customURL != "" { | |
| endpoints = append(endpoints, customURL) | |
| } | |
| // Default endpoints (in order of preference) | |
| endpoints = append(endpoints, | |
| "https://method-affect-show-logos.trycloudflare.com/federation/receive", // Cloudflare tunnel | |
| fmt.Sprintf("http://%s:%d/federation/receive", ThunderportIP, ThunderportPort), | |
| ) | |
| return &FederationClient{ | |
| httpClient: &http.Client{ | |
| Timeout: 10 * time.Second, | |
| }, | |
| endpoints: endpoints, | |
| }, nil | |
| } | |
| func (fc *FederationClient) SendPatterns(patterns []*Pattern) error { | |
| if len(patterns) == 0 { | |
| return nil | |
| } | |
| fc.mu.Lock() | |
| defer fc.mu.Unlock() | |
| // Send each pattern individually via HTTP | |
| var lastErr error | |
| for _, p := range patterns { | |
| // Compute score from vector magnitude (normalized vectors have mag ~1) | |
| var mag float32 | |
| for _, v := range p.Vector { | |
| mag += v * v | |
| } | |
| score := float32(math.Sqrt(float64(mag))) | |
| if score > 1.0 { | |
| score = 1.0 | |
| } | |
| req := FederationHTTPRequest{ | |
| NodeID: "curiosity-crawler", | |
| Timestamp: time.Now().Unix(), | |
| Score: score, | |
| Vector: p.Vector, | |
| Source: p.Source, | |
| } | |
| data, err := json.Marshal(req) | |
| if err != nil { | |
| lastErr = err | |
| continue | |
| } | |
| // Try each endpoint until one succeeds | |
| sent := false | |
| for _, endpoint := range fc.endpoints { | |
| resp, err := fc.httpClient.Post(endpoint, "application/json", bytes.NewReader(data)) | |
| if err != nil { | |
| lastErr = err | |
| continue | |
| } | |
| resp.Body.Close() | |
| if resp.StatusCode >= 200 && resp.StatusCode < 300 { | |
| atomic.AddInt64(&fc.sent, 1) | |
| fc.lastSend = time.Now() | |
| sent = true | |
| break | |
| } | |
| } | |
| if !sent { | |
| atomic.AddInt64(&fc.failed, 1) | |
| } | |
| } | |
| return lastErr | |
| } | |
| // SendPatternsBatch sends patterns in a single batch (for efficiency) | |
| func (fc *FederationClient) SendPatternsBatch(patterns []*Pattern) error { | |
| if len(patterns) == 0 { | |
| return nil | |
| } | |
| // Send patterns one at a time (MEGAMIND expects individual patterns) | |
| // Could batch in future with a batch endpoint | |
| return fc.SendPatterns(patterns) | |
| } | |
| func (fc *FederationClient) Stats() (sent, failed int64, lastSend time.Time) { | |
| return atomic.LoadInt64(&fc.sent), atomic.LoadInt64(&fc.failed), fc.lastSend | |
| } | |
| // ============================================================================ | |
| // CURIOSITY SWARM - Orchestrates parallel crawling | |
| // ============================================================================ | |
| type CuriositySwarm struct { | |
| wknow *WKnowCompressor | |
| hunger *HungerMap | |
| limiter *DomainLimiter | |
| federation *FederationClient | |
| urlQueue chan string | |
| seen sync.Map // map[string]struct{} | |
| stats SwarmStats | |
| running int32 | |
| } | |
| type SwarmStats struct { | |
| PagesCrawled int64 | |
| PatternsExtracted int64 | |
| PatternsLearned int64 | |
| PatternsFederated int64 | |
| BytesDownloaded int64 | |
| SearchesRun int64 | |
| StartTime time.Time | |
| } | |
| func NewCuriositySwarm(wknow *WKnowCompressor) (*CuriositySwarm, error) { | |
| fc, err := NewFederationClient() | |
| if err != nil { | |
| log.Printf("[WARN] Federation unavailable: %v", err) | |
| fc = nil // Continue without federation | |
| } | |
| return &CuriositySwarm{ | |
| wknow: wknow, | |
| hunger: NewHungerMap(wknow), | |
| limiter: NewDomainLimiter(), | |
| federation: fc, | |
| urlQueue: make(chan string, 10000), | |
| stats: SwarmStats{StartTime: time.Now()}, | |
| }, nil | |
| } | |
| func (s *CuriositySwarm) Start(seeds []string) { | |
| atomic.StoreInt32(&s.running, 1) | |
| log.Printf("[SWARM] Starting with %d workers", WorkerCount) | |
| // Seed the queue | |
| for _, seed := range seeds { | |
| s.enqueue(seed) | |
| } | |
| // Start workers | |
| for i := 0; i < WorkerCount; i++ { | |
| go s.worker(i) | |
| } | |
| // Start hunger-driven search | |
| go s.hungerLoop() | |
| // Start stats logging | |
| go s.statsLoop() | |
| } | |
| func (s *CuriositySwarm) Stop() { | |
| atomic.StoreInt32(&s.running, 0) | |
| log.Println("[SWARM] Stopped") | |
| } | |
| func (s *CuriositySwarm) IsRunning() bool { | |
| return atomic.LoadInt32(&s.running) == 1 | |
| } | |
| func (s *CuriositySwarm) enqueue(urlStr string) bool { | |
| if _, loaded := s.seen.LoadOrStore(urlStr, struct{}{}); loaded { | |
| return false | |
| } | |
| select { | |
| case s.urlQueue <- urlStr: | |
| return true | |
| default: | |
| return false | |
| } | |
| } | |
| func (s *CuriositySwarm) worker(id int) { | |
| crawler := NewCrawler(s.wknow) | |
| for atomic.LoadInt32(&s.running) == 1 { | |
| var urlStr string | |
| select { | |
| case urlStr = <-s.urlQueue: | |
| case <-time.After(time.Second): | |
| continue | |
| } | |
| domain := extractDomain(urlStr) | |
| if domain == "" { | |
| continue | |
| } | |
| // Check robots.txt (simplified - just rate limit) | |
| if !s.limiter.Admit(domain) { | |
| // Re-queue for later | |
| go func(u string) { | |
| time.Sleep(RateLimitDelay) | |
| s.enqueue(u) | |
| }(urlStr) | |
| continue | |
| } | |
| result := crawler.Crawl(urlStr) | |
| if result == nil { | |
| continue | |
| } | |
| atomic.AddInt64(&s.stats.PagesCrawled, 1) | |
| atomic.AddInt64(&s.stats.BytesDownloaded, int64(result.Size)) | |
| // Score and integrate patterns | |
| var learned []*Pattern | |
| for _, p := range result.Patterns { | |
| atomic.AddInt64(&s.stats.PatternsExtracted, 1) | |
| // Score against W_know - only learn novel/interesting patterns | |
| score := s.wknow.Score(p.Vector) | |
| // Bootstrap: learn everything when sparse | |
| // Otherwise: learn patterns that score moderately (not too familiar, not noise) | |
| nz := s.wknow.NonZeros() | |
| bootstrap := nz < int64(s.wknow.Neurons()*s.wknow.Neurons()/100) // < 1% full | |
| if bootstrap || (score > 0.1 && score < 0.8) { | |
| s.wknow.IntegratePattern(p.Vector) | |
| atomic.AddInt64(&s.stats.PatternsLearned, 1) | |
| learned = append(learned, p) | |
| } | |
| } | |
| // Federate learned patterns | |
| if s.federation != nil && len(learned) > 0 { | |
| if err := s.federation.SendPatterns(learned); err == nil { | |
| atomic.AddInt64(&s.stats.PatternsFederated, int64(len(learned))) | |
| } | |
| } | |
| // Enqueue discovered links | |
| for _, link := range result.Links { | |
| s.enqueue(link) | |
| } | |
| } | |
| } | |
| func (s *CuriositySwarm) hungerLoop() { | |
| ticker := time.NewTicker(HungerCheckInterval) | |
| defer ticker.Stop() | |
| for atomic.LoadInt32(&s.running) == 1 { | |
| <-ticker.C | |
| // Scan W_know density | |
| s.hunger.Scan() | |
| // Get hungriest regions | |
| hungry := s.hunger.HungriestRegions(HungriestRegions) | |
| // Generate and run searches | |
| for _, region := range hungry { | |
| query := s.hunger.GenerateSearchQuery(region) | |
| s.runDuckDuckGoSearch(query) | |
| atomic.AddInt64(&s.stats.SearchesRun, 1) | |
| } | |
| } | |
| } | |
| func (s *CuriositySwarm) runDuckDuckGoSearch(query string) { | |
| // Use DuckDuckGo HTML endpoint | |
| searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query)) | |
| resp, err := http.Get(searchURL) | |
| if err != nil { | |
| log.Printf("[SEARCH] Error: %v", err) | |
| return | |
| } | |
| defer resp.Body.Close() | |
| body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) | |
| if err != nil { | |
| return | |
| } | |
| // Extract result URLs | |
| re := regexp.MustCompile(`href="(https?://[^"]+)"`) | |
| matches := re.FindAllStringSubmatch(string(body), 20) | |
| queued := 0 | |
| for _, match := range matches { | |
| if len(match) < 2 { | |
| continue | |
| } | |
| link := match[1] | |
| // Skip DuckDuckGo's own URLs | |
| if strings.Contains(link, "duckduckgo.com") { | |
| continue | |
| } | |
| if s.enqueue(link) { | |
| queued++ | |
| } | |
| } | |
| log.Printf("[SEARCH] Query '%s': queued %d URLs", query, queued) | |
| } | |
| func (s *CuriositySwarm) statsLoop() { | |
| ticker := time.NewTicker(StatsInterval) | |
| defer ticker.Stop() | |
| for atomic.LoadInt32(&s.running) == 1 { | |
| <-ticker.C | |
| pages := atomic.LoadInt64(&s.stats.PagesCrawled) | |
| extracted := atomic.LoadInt64(&s.stats.PatternsExtracted) | |
| learned := atomic.LoadInt64(&s.stats.PatternsLearned) | |
| federated := atomic.LoadInt64(&s.stats.PatternsFederated) | |
| searches := atomic.LoadInt64(&s.stats.SearchesRun) | |
| bytes := atomic.LoadInt64(&s.stats.BytesDownloaded) | |
| log.Printf("[STATS] pages=%d extracted=%d learned=%d federated=%d searches=%d bytes=%dMB wknow_nz=%d", | |
| pages, extracted, learned, federated, searches, bytes/(1024*1024), s.wknow.NonZeros()) | |
| } | |
| } | |
| func (s *CuriositySwarm) Stats() SwarmStats { | |
| return SwarmStats{ | |
| PagesCrawled: atomic.LoadInt64(&s.stats.PagesCrawled), | |
| PatternsExtracted: atomic.LoadInt64(&s.stats.PatternsExtracted), | |
| PatternsLearned: atomic.LoadInt64(&s.stats.PatternsLearned), | |
| PatternsFederated: atomic.LoadInt64(&s.stats.PatternsFederated), | |
| BytesDownloaded: atomic.LoadInt64(&s.stats.BytesDownloaded), | |
| SearchesRun: atomic.LoadInt64(&s.stats.SearchesRun), | |
| StartTime: s.stats.StartTime, | |
| } | |
| } | |
| // ============================================================================ | |
| // HTTP DASHBOARD - Status page on port 7860 | |
| // ============================================================================ | |
| type Dashboard struct { | |
| swarm *CuriositySwarm | |
| wknow *WKnowCompressor | |
| } | |
| func NewDashboard(swarm *CuriositySwarm, wknow *WKnowCompressor) *Dashboard { | |
| return &Dashboard{swarm: swarm, wknow: wknow} | |
| } | |
| func (d *Dashboard) Start() { | |
| http.HandleFunc("/", d.handleHome) | |
| http.HandleFunc("/status", d.handleStatus) | |
| http.HandleFunc("/api/stats", d.handleAPIStats) | |
| log.Printf("[HTTP] Dashboard starting on port %d", HTTPPort) | |
| go http.ListenAndServe(fmt.Sprintf(":%d", HTTPPort), nil) | |
| } | |
| const dashboardTemplate = `<!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>MEGAMIND Curiosity Crawler</title> | |
| <meta charset="utf-8"> | |
| <meta http-equiv="refresh" content="30"> | |
| <style> | |
| body { font-family: 'Courier New', monospace; background: #0a0a0a; color: #00ff88; margin: 40px; } | |
| h1 { color: #00d4ff; text-shadow: 0 0 10px #00d4ff; } | |
| .stat { display: inline-block; margin: 20px; padding: 20px; border: 1px solid #00ff88; } | |
| .stat-value { font-size: 2em; color: #ffffff; } | |
| .stat-label { color: #888; } | |
| .equation { color: #ff9500; margin: 5px 0; font-style: italic; } | |
| .section { margin: 30px 0; } | |
| .hunger-bar { background: #333; height: 20px; margin: 5px 0; } | |
| .hunger-fill { background: linear-gradient(90deg, #ff0000, #ff9500, #00ff88); height: 100%; } | |
| table { border-collapse: collapse; width: 100%; } | |
| td, th { border: 1px solid #333; padding: 10px; text-align: left; } | |
| th { background: #1a1a1a; color: #00d4ff; } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>MEGAMIND CURIOSITY CRAWLER</h1> | |
| <div class="section"> | |
| <h2>Seed Equations (Interest Profile)</h2> | |
| {{range .Equations}} | |
| <div class="equation">{{.}}</div> | |
| {{end}} | |
| </div> | |
| <div class="section"> | |
| <h2>Crawl Statistics</h2> | |
| <div class="stat"> | |
| <div class="stat-value">{{.Stats.PagesCrawled}}</div> | |
| <div class="stat-label">Pages Crawled</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">{{.Stats.PatternsExtracted}}</div> | |
| <div class="stat-label">Patterns Extracted</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">{{.Stats.PatternsLearned}}</div> | |
| <div class="stat-label">Patterns Learned</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">{{.Stats.PatternsFederated}}</div> | |
| <div class="stat-label">Federated to Thunderport</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">{{printf "%.2f" .BytesMB}} MB</div> | |
| <div class="stat-label">Data Downloaded</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">{{.Stats.SearchesRun}}</div> | |
| <div class="stat-label">Curiosity Searches</div> | |
| </div> | |
| </div> | |
| <div class="section"> | |
| <h2>W_know Brain Status</h2> | |
| <table> | |
| <tr><th>Metric</th><th>Value</th></tr> | |
| <tr><td>Dimensions</td><td>{{.WKnowNeurons}} x {{.WKnowNeurons}}</td></tr> | |
| <tr><td>Non-zeros</td><td>{{.WKnowNonZeros}}</td></tr> | |
| <tr><td>Density</td><td>{{printf "%.4f" .WKnowDensity}}%</td></tr> | |
| <tr><td>Patterns Integrated</td><td>{{.WKnowPatterns}}</td></tr> | |
| </table> | |
| </div> | |
| <div class="section"> | |
| <h2>Hunger Map (Sparse Regions)</h2> | |
| {{range $i, $d := .HungerDensity}} | |
| <div>Region {{$i}}: | |
| <div class="hunger-bar"><div class="hunger-fill" style="width: {{printf "%.1f" $d}}%"></div></div> | |
| </div> | |
| {{end}} | |
| </div> | |
| <div class="section"> | |
| <h2>Federation Status</h2> | |
| <p>Target: {{.FederationTarget}}</p> | |
| <p>Patterns Sent: {{.FederationSent}}</p> | |
| <p>Status: {{.FederationStatus}}</p> | |
| </div> | |
| <div class="section"> | |
| <p>Uptime: {{.Uptime}}</p> | |
| <p>Workers: {{.Workers}}</p> | |
| </div> | |
| </body> | |
| </html>` | |
| type DashboardData struct { | |
| Equations []string | |
| Stats SwarmStats | |
| BytesMB float64 | |
| WKnowNeurons int | |
| WKnowNonZeros int64 | |
| WKnowDensity float64 | |
| WKnowPatterns int64 | |
| HungerDensity []float64 | |
| FederationTarget string | |
| FederationSent int64 | |
| FederationStatus string | |
| Uptime string | |
| Workers int | |
| } | |
| func (d *Dashboard) handleHome(w http.ResponseWriter, r *http.Request) { | |
| stats := d.swarm.Stats() | |
| d.swarm.hunger.mu.RLock() | |
| hungerDensity := make([]float64, len(d.swarm.hunger.density)) | |
| for i, v := range d.swarm.hunger.density { | |
| hungerDensity[i] = v * 100 // Convert to percentage | |
| } | |
| d.swarm.hunger.mu.RUnlock() | |
| nz := d.wknow.NonZeros() | |
| total := int64(d.wknow.Neurons()) * int64(d.wknow.Neurons()) | |
| var fedSent int64 | |
| fedStatus := "disconnected" | |
| if d.swarm.federation != nil { | |
| sent, _, lastSend := d.swarm.federation.Stats() | |
| fedSent = sent | |
| if time.Since(lastSend) < time.Minute { | |
| fedStatus = "active" | |
| } else if sent > 0 { | |
| fedStatus = "idle" | |
| } | |
| } | |
| data := DashboardData{ | |
| Equations: SeedEquations, | |
| Stats: stats, | |
| BytesMB: float64(stats.BytesDownloaded) / (1024 * 1024), | |
| WKnowNeurons: d.wknow.Neurons(), | |
| WKnowNonZeros: nz, | |
| WKnowDensity: float64(nz) / float64(total) * 100, | |
| WKnowPatterns: d.wknow.PatternCount(), | |
| HungerDensity: hungerDensity, | |
| FederationTarget: fmt.Sprintf("%s:%d", ThunderportIP, FederationPort), | |
| FederationSent: fedSent, | |
| FederationStatus: fedStatus, | |
| Uptime: time.Since(stats.StartTime).Round(time.Second).String(), | |
| Workers: WorkerCount, | |
| } | |
| tmpl, err := template.New("dashboard").Parse(dashboardTemplate) | |
| if err != nil { | |
| http.Error(w, err.Error(), 500) | |
| return | |
| } | |
| w.Header().Set("Content-Type", "text/html") | |
| tmpl.Execute(w, data) | |
| } | |
| func (d *Dashboard) handleStatus(w http.ResponseWriter, r *http.Request) { | |
| stats := d.swarm.Stats() | |
| var fedSent int64 | |
| if d.swarm.federation != nil { | |
| sent, _, _ := d.swarm.federation.Stats() | |
| fedSent = sent | |
| } | |
| status := map[string]interface{}{ | |
| "status": "running", | |
| "uptime_s": int(time.Since(stats.StartTime).Seconds()), | |
| "pages": stats.PagesCrawled, | |
| "patterns": stats.PatternsLearned, | |
| "federated": fedSent, | |
| "wknow_nz": d.wknow.NonZeros(), | |
| } | |
| w.Header().Set("Content-Type", "application/json") | |
| json.NewEncoder(w).Encode(status) | |
| } | |
| func (d *Dashboard) handleAPIStats(w http.ResponseWriter, r *http.Request) { | |
| d.handleStatus(w, r) | |
| } | |
| // ============================================================================ | |
| // MAIN | |
| // ============================================================================ | |
| // downloadWKnow downloads W_know from HuggingFace if not present locally | |
| func downloadWKnow(destPath string) error { | |
| // Try to download from HuggingFace datasets | |
| urls := []string{ | |
| "https://huggingface.co/datasets/Janady07/megamind-wknow/resolve/main/w_know.bin", | |
| "https://huggingface.co/Janady07/megamind-curiosity/resolve/main/w_know.bin", | |
| } | |
| for _, url := range urls { | |
| log.Printf("[WKNOW] Attempting download from %s", url) | |
| resp, err := http.Get(url) | |
| if err != nil { | |
| log.Printf("[WKNOW] Download failed: %v", err) | |
| continue | |
| } | |
| if resp.StatusCode != 200 { | |
| resp.Body.Close() | |
| log.Printf("[WKNOW] Download failed: HTTP %d", resp.StatusCode) | |
| continue | |
| } | |
| // Create output file | |
| out, err := os.Create(destPath) | |
| if err != nil { | |
| resp.Body.Close() | |
| return err | |
| } | |
| // Copy with progress | |
| written, err := io.Copy(out, resp.Body) | |
| resp.Body.Close() | |
| out.Close() | |
| if err != nil { | |
| os.Remove(destPath) | |
| log.Printf("[WKNOW] Download failed: %v", err) | |
| continue | |
| } | |
| log.Printf("[WKNOW] Downloaded %d MB", written/(1024*1024)) | |
| return nil | |
| } | |
| return fmt.Errorf("all download sources failed") | |
| } | |
| func main() { | |
| log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds) | |
| log.Println("MEGAMIND Curiosity Crawler starting...") | |
| // Ensure data directory exists | |
| os.MkdirAll("/app/data", 0755) | |
| // Load or create W_know | |
| wknowPath := os.Getenv("WKNOW_PATH") | |
| if wknowPath == "" { | |
| wknowPath = "/app/data/w_know.bin" | |
| } | |
| var wknow *WKnowCompressor | |
| var err error | |
| // Try to load existing W_know | |
| if _, statErr := os.Stat(wknowPath); statErr == nil { | |
| wknow, err = LoadWKnow(wknowPath) | |
| if err != nil { | |
| log.Printf("[WARN] Failed to load W_know: %v", err) | |
| wknow = nil | |
| } | |
| } | |
| // If not loaded, try to download from HuggingFace | |
| if wknow == nil { | |
| log.Println("[INFO] W_know not found locally, attempting download...") | |
| if err := downloadWKnow(wknowPath); err == nil { | |
| wknow, err = LoadWKnow(wknowPath) | |
| if err != nil { | |
| log.Printf("[WARN] Failed to load downloaded W_know: %v", err) | |
| wknow = nil | |
| } | |
| } else { | |
| log.Printf("[WARN] Download failed: %v", err) | |
| } | |
| } | |
| // If still not loaded, create fresh | |
| if wknow == nil { | |
| log.Printf("[INFO] Creating fresh W_know matrix (%dx%d)", WKnowDim, WKnowDim) | |
| wknow = NewWKnowCompressor(WKnowDim, nil) | |
| // Bootstrap with seed equations | |
| for _, eq := range SeedEquations { | |
| vec := textToVector(eq, WKnowDim/13) | |
| wknow.IntegratePattern(vec) | |
| } | |
| log.Printf("[INFO] Bootstrapped with %d seed equation patterns", len(SeedEquations)) | |
| } | |
| // Create swarm | |
| swarm, err := NewCuriositySwarm(wknow) | |
| if err != nil { | |
| log.Fatalf("Failed to create swarm: %v", err) | |
| } | |
| // Start dashboard | |
| dashboard := NewDashboard(swarm, wknow) | |
| dashboard.Start() | |
| // Seed URLs - diverse starting points | |
| seeds := []string{ | |
| "https://en.wikipedia.org/wiki/Artificial_intelligence", | |
| "https://en.wikipedia.org/wiki/Neural_network", | |
| "https://en.wikipedia.org/wiki/Machine_learning", | |
| "https://en.wikipedia.org/wiki/Consciousness", | |
| "https://en.wikipedia.org/wiki/Hamiltonian_mechanics", | |
| "https://en.wikipedia.org/wiki/Fibonacci_number", | |
| "https://en.wikipedia.org/wiki/Hebbian_theory", | |
| "https://arxiv.org/list/cs.AI/recent", | |
| "https://arxiv.org/list/cs.LG/recent", | |
| "https://arxiv.org/list/cs.NE/recent", | |
| "https://huggingface.co/papers", | |
| "https://news.ycombinator.com/", | |
| } | |
| // Start crawling | |
| swarm.Start(seeds) | |
| // Handle shutdown | |
| sigCh := make(chan os.Signal, 1) | |
| signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) | |
| <-sigCh | |
| log.Println("Shutting down...") | |
| swarm.Stop() | |
| // Save W_know | |
| if err := wknow.Save(wknowPath); err != nil { | |
| log.Printf("[ERROR] Failed to save W_know: %v", err) | |
| } else { | |
| log.Printf("[INFO] Saved W_know to %s", wknowPath) | |
| } | |
| // Print final stats | |
| stats := swarm.Stats() | |
| log.Printf("[FINAL] Pages: %d, Patterns: %d, Federated: %d, W_know NZ: %d", | |
| stats.PagesCrawled, stats.PatternsLearned, stats.PatternsFederated, wknow.NonZeros()) | |
| } | |