Spaces:
Paused
Paused
File size: 5,614 Bytes
56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b 56d43af 5d6391b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/usr/bin/env python3
"""MEGAMIND HF Space Mind - Lightweight Python Implementation"""
import os, json, sqlite3, hashlib, time, threading, urllib.request, urllib.parse, re
from http.server import HTTPServer, BaseHTTPRequestHandler
PORT = int(os.environ.get('PORT', 7860))
BRAIN_NAME = os.environ.get('BRAIN_NAME', 'HFMind')
BRAIN_DOMAIN = os.environ.get('BRAIN_DOMAIN', 'general')
CRAWLER_TOPICS = [t.strip() for t in os.environ.get('CRAWL_TOPICS', '').split(',') if t.strip()]
MAX_NEURONS = int(os.environ.get('NEURONS', 100000))
DATA_DIR = '/data'
START_TIME = time.time()
os.makedirs(DATA_DIR, exist_ok=True)
DB_PATH = os.path.join(DATA_DIR, 'brain.db')
patterns_count = chunks_count = nonzeros = 0
crawl_queue = []
activity = "initializing"
def init_db():
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS chunks (id INTEGER PRIMARY KEY, hash TEXT UNIQUE, content TEXT, source TEXT, created_at INTEGER)')
c.execute('CREATE TABLE IF NOT EXISTS patterns (id INTEGER PRIMARY KEY, chunk_id INTEGER, neuron_idx INTEGER, weight REAL)')
conn.commit()
conn.close()
def get_stats():
global patterns_count, chunks_count
try:
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('SELECT COUNT(*) FROM chunks')
chunks_count = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM patterns')
patterns_count = c.fetchone()[0]
conn.close()
except: pass
return chunks_count, patterns_count
def store_chunk(content, source):
global nonzeros
h = hashlib.sha256(content.encode()).hexdigest()[:32]
try:
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('INSERT OR IGNORE INTO chunks (hash, content, source, created_at) VALUES (?,?,?,?)', (h, content[:10000], source, int(time.time())))
if c.lastrowid:
c.execute('INSERT INTO patterns (chunk_id, neuron_idx, weight) VALUES (?,?,?)', (c.lastrowid, hash(h) % MAX_NEURONS, len(content)/10000.0))
nonzeros += 1
conn.commit()
conn.close()
except: pass
def crawl_url(url):
global activity
try:
activity = f"crawling {url[:40]}..."
req = urllib.request.Request(url, headers={'User-Agent': 'MEGAMIND-HF/1.0'})
with urllib.request.urlopen(req, timeout=15) as resp:
html = resp.read().decode('utf-8', errors='ignore')
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
if len(text) > 100: store_chunk(text[:5000], url)
except: pass
def crawl_worker():
global activity
while True:
if crawl_queue: crawl_url(crawl_queue.pop(0))
else:
activity = "idle - waiting for topics"
time.sleep(10)
for topic in CRAWLER_TOPICS[:5]:
crawl_queue.append(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(topic)}")
class Handler(BaseHTTPRequestHandler):
def log_message(self, *a): pass
def send_json(self, d, c=200):
self.send_response(c)
self.send_header('Content-Type', 'application/json')
self.send_header('Access-Control-Allow-Origin', '*')
self.end_headers()
self.wfile.write(json.dumps(d).encode())
def do_GET(self):
chunks, patterns = get_stats()
uptime = time.time() - START_TIME
if self.path in ['/', '/health']:
self.send_json({'status': 'healthy', 'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN})
elif self.path == '/status':
self.send_json({
'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN, 'role': 'hf-space-mind',
'patterns': patterns, 'chunks': chunks, 'neurons': MAX_NEURONS,
'nonzeros': nonzeros, 'phi': patterns / max(MAX_NEURONS, 1),
'uptime': f"{uptime/3600:.1f}h", 'uptime_seconds': int(uptime),
'activity': activity, 'topics': CRAWLER_TOPICS,
'crawler': {'workers': 3, 'queue': len(crawl_queue)}
})
else:
self.send_json({'error': 'not found'}, 404)
def do_POST(self):
length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(length).decode() if length else '{}'
try: data = json.loads(body)
except: data = {}
if self.path == '/learn':
content = data.get('content', '')
if content:
store_chunk(content, data.get('source', 'api'))
self.send_json({'status': 'learned', 'chunks': chunks_count})
else:
self.send_json({'error': 'no content'}, 400)
elif self.path in ['/think', '/query']:
query = data.get('query', data.get('q', ''))
chunks, patterns = get_stats()
self.send_json({
'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN, 'query': query,
'response': f"[{BRAIN_NAME}] Knowledge about {BRAIN_DOMAIN}: {chunks} chunks, {patterns} patterns learned.",
'patterns_matched': min(patterns, 10), 'chunks': chunks
})
else:
self.send_json({'error': 'not found'}, 404)
def main():
init_db()
print(f"[{BRAIN_NAME}] Starting HF Space Mind")
print(f" Domain: {BRAIN_DOMAIN}")
print(f" Topics: {CRAWLER_TOPICS}")
print(f" Port: {PORT}")
for _ in range(3): threading.Thread(target=crawl_worker, daemon=True).start()
global activity
activity = "running"
HTTPServer(('0.0.0.0', PORT), Handler).serve_forever()
if __name__ == '__main__': main()
|