Janady07 commited on
Commit
064630d
·
verified ·
1 Parent(s): 1da39a6

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -3
  2. README.md +18 -3
  3. brain.py +128 -31
Dockerfile CHANGED
@@ -1,7 +1,6 @@
1
  FROM python:3.11-slim
2
- RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
3
  WORKDIR /app
4
- COPY brain.py /app/
 
5
  EXPOSE 7860
6
- HEALTHCHECK --interval=30s --timeout=10s CMD curl -sf http://localhost:7860/health || exit 1
7
  CMD ["python3", "/app/brain.py"]
 
1
  FROM python:3.11-slim
 
2
  WORKDIR /app
3
+ COPY brain.py /app/brain.py
4
+ RUN mkdir -p /data
5
  EXPOSE 7860
 
6
  CMD ["python3", "/app/brain.py"]
README.md CHANGED
@@ -1,7 +1,22 @@
1
  ---
2
- title: MEGAMIND QUEBEC
3
  emoji: 🧠
 
 
4
  sdk: docker
 
 
5
  ---
6
- # MEGAMIND QUEBEC
7
- Federation Node
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: DataSciMind
3
  emoji: 🧠
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: docker
7
+ pinned: false
8
+ license: mit
9
  ---
10
+
11
+ # DataSciMind - MEGAMIND Federation
12
+
13
+ A specialized knowledge mind focused on: **statistics, A/B testing, feature engineering**
14
+
15
+ ## API Endpoints
16
+
17
+ - `GET /` - Health check
18
+ - `GET /status` - Full status
19
+ - `POST /think` - Query the mind
20
+ - `POST /learn` - Teach the mind
21
+
22
+ Part of the MEGAMIND AGI Federation.
brain.py CHANGED
@@ -1,40 +1,137 @@
1
  #!/usr/bin/env python3
2
- import os, json, sqlite3, hashlib, time
 
3
  from http.server import HTTPServer, BaseHTTPRequestHandler
4
- from urllib.parse import urlparse
5
  PORT = int(os.environ.get('PORT', 7860))
6
- DATA_DIR, NODE_ID = './data', os.environ.get('SPACE_ID', 'hf-brain')
7
- db, stats = None, {'tensors': 0, 'patterns': 0, 'queries': 0, 'start': time.time()}
 
 
 
 
 
 
 
 
 
 
 
8
  def init_db():
9
- global db
10
- os.makedirs(DATA_DIR, exist_ok=True)
11
- db = sqlite3.connect(f'{DATA_DIR}/brain.db', check_same_thread=False)
12
- db.execute('CREATE TABLE IF NOT EXISTS chunks (id INTEGER PRIMARY KEY, hash TEXT UNIQUE, content TEXT, ts REAL)')
13
- db.execute('CREATE TABLE IF NOT EXISTS tensors (id INTEGER PRIMARY KEY, name TEXT, source TEXT, meta TEXT, ts REAL)')
14
- db.commit()
15
- stats['patterns'] = db.execute('SELECT COUNT(*) FROM chunks').fetchone()[0]
16
- stats['tensors'] = db.execute('SELECT COUNT(*) FROM tensors').fetchone()[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  class Handler(BaseHTTPRequestHandler):
18
  def log_message(self, *a): pass
 
 
 
 
 
 
 
19
  def do_GET(self):
20
- p = urlparse(self.path).path
21
- if p == '/health': self.json({'status': 'healthy'})
22
- elif p == '/status': self.json({'node': NODE_ID, 'status': 'online', 'tensors_learned': stats['tensors'], 'patterns_learned': stats['patterns']})
23
- else: self.json({'name': 'MEGAMIND', 'node': NODE_ID})
 
 
 
 
 
 
 
 
 
 
 
 
24
  def do_POST(self):
25
- body = self.rfile.read(int(self.headers.get('Content-Length', 0))).decode()
26
- data = json.loads(body) if body else {}
27
- p = urlparse(self.path).path
28
- if p == '/learn':
29
- c = data.get('content', '')[:10000]
30
- h = hashlib.sha256(c.encode()).hexdigest()[:16]
31
- db.execute('INSERT OR IGNORE INTO chunks (hash, content, ts) VALUES (?, ?, ?)', (h, c, time.time()))
32
- db.commit(); stats['patterns'] += 1
33
- self.json({'status': 'learned'})
34
- else: self.json({})
35
- def json(self, d):
36
- self.send_response(200); self.send_header('Content-Type', 'application/json'); self.end_headers()
37
- self.wfile.write(json.dumps(d).encode())
38
- if __name__ == '__main__':
39
- print(f'MEGAMIND Brain [{NODE_ID}]'); init_db()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  HTTPServer(('0.0.0.0', PORT), Handler).serve_forever()
 
 
 
1
  #!/usr/bin/env python3
2
+ """MEGAMIND HF Space Mind - Lightweight Python Implementation"""
3
+ import os, json, sqlite3, hashlib, time, threading, urllib.request, urllib.parse, re
4
  from http.server import HTTPServer, BaseHTTPRequestHandler
5
+
6
  PORT = int(os.environ.get('PORT', 7860))
7
+ BRAIN_NAME = os.environ.get('BRAIN_NAME', 'HFMind')
8
+ BRAIN_DOMAIN = os.environ.get('BRAIN_DOMAIN', 'general')
9
+ CRAWLER_TOPICS = [t.strip() for t in os.environ.get('CRAWL_TOPICS', '').split(',') if t.strip()]
10
+ MAX_NEURONS = int(os.environ.get('NEURONS', 100000))
11
+ DATA_DIR = '/data'
12
+
13
+ START_TIME = time.time()
14
+ os.makedirs(DATA_DIR, exist_ok=True)
15
+ DB_PATH = os.path.join(DATA_DIR, 'brain.db')
16
+ patterns_count = chunks_count = nonzeros = 0
17
+ crawl_queue = []
18
+ activity = "initializing"
19
+
20
  def init_db():
21
+ conn = sqlite3.connect(DB_PATH)
22
+ c = conn.cursor()
23
+ c.execute('CREATE TABLE IF NOT EXISTS chunks (id INTEGER PRIMARY KEY, hash TEXT UNIQUE, content TEXT, source TEXT, created_at INTEGER)')
24
+ c.execute('CREATE TABLE IF NOT EXISTS patterns (id INTEGER PRIMARY KEY, chunk_id INTEGER, neuron_idx INTEGER, weight REAL)')
25
+ conn.commit()
26
+ conn.close()
27
+
28
+ def get_stats():
29
+ global patterns_count, chunks_count
30
+ try:
31
+ conn = sqlite3.connect(DB_PATH)
32
+ c = conn.cursor()
33
+ c.execute('SELECT COUNT(*) FROM chunks')
34
+ chunks_count = c.fetchone()[0]
35
+ c.execute('SELECT COUNT(*) FROM patterns')
36
+ patterns_count = c.fetchone()[0]
37
+ conn.close()
38
+ except: pass
39
+ return chunks_count, patterns_count
40
+
41
+ def store_chunk(content, source):
42
+ global nonzeros
43
+ h = hashlib.sha256(content.encode()).hexdigest()[:32]
44
+ try:
45
+ conn = sqlite3.connect(DB_PATH)
46
+ c = conn.cursor()
47
+ c.execute('INSERT OR IGNORE INTO chunks (hash, content, source, created_at) VALUES (?,?,?,?)', (h, content[:10000], source, int(time.time())))
48
+ if c.lastrowid:
49
+ c.execute('INSERT INTO patterns (chunk_id, neuron_idx, weight) VALUES (?,?,?)', (c.lastrowid, hash(h) % MAX_NEURONS, len(content)/10000.0))
50
+ nonzeros += 1
51
+ conn.commit()
52
+ conn.close()
53
+ except: pass
54
+
55
+ def crawl_url(url):
56
+ global activity
57
+ try:
58
+ activity = f"crawling {url[:40]}..."
59
+ req = urllib.request.Request(url, headers={'User-Agent': 'MEGAMIND-HF/1.0'})
60
+ with urllib.request.urlopen(req, timeout=15) as resp:
61
+ html = resp.read().decode('utf-8', errors='ignore')
62
+ text = re.sub(r'<[^>]+>', ' ', html)
63
+ text = re.sub(r'\s+', ' ', text).strip()
64
+ if len(text) > 100: store_chunk(text[:5000], url)
65
+ except: pass
66
+
67
+ def crawl_worker():
68
+ global activity
69
+ while True:
70
+ if crawl_queue: crawl_url(crawl_queue.pop(0))
71
+ else:
72
+ activity = "idle - waiting for topics"
73
+ time.sleep(10)
74
+ for topic in CRAWLER_TOPICS[:5]:
75
+ crawl_queue.append(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(topic)}")
76
+
77
  class Handler(BaseHTTPRequestHandler):
78
  def log_message(self, *a): pass
79
+ def send_json(self, d, c=200):
80
+ self.send_response(c)
81
+ self.send_header('Content-Type', 'application/json')
82
+ self.send_header('Access-Control-Allow-Origin', '*')
83
+ self.end_headers()
84
+ self.wfile.write(json.dumps(d).encode())
85
+
86
  def do_GET(self):
87
+ chunks, patterns = get_stats()
88
+ uptime = time.time() - START_TIME
89
+ if self.path in ['/', '/health']:
90
+ self.send_json({'status': 'healthy', 'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN})
91
+ elif self.path == '/status':
92
+ self.send_json({
93
+ 'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN, 'role': 'hf-space-mind',
94
+ 'patterns': patterns, 'chunks': chunks, 'neurons': MAX_NEURONS,
95
+ 'nonzeros': nonzeros, 'phi': patterns / max(MAX_NEURONS, 1),
96
+ 'uptime': f"{uptime/3600:.1f}h", 'uptime_seconds': int(uptime),
97
+ 'activity': activity, 'topics': CRAWLER_TOPICS,
98
+ 'crawler': {'workers': 3, 'queue': len(crawl_queue)}
99
+ })
100
+ else:
101
+ self.send_json({'error': 'not found'}, 404)
102
+
103
  def do_POST(self):
104
+ length = int(self.headers.get('Content-Length', 0))
105
+ body = self.rfile.read(length).decode() if length else '{}'
106
+ try: data = json.loads(body)
107
+ except: data = {}
108
+ if self.path == '/learn':
109
+ content = data.get('content', '')
110
+ if content:
111
+ store_chunk(content, data.get('source', 'api'))
112
+ self.send_json({'status': 'learned', 'chunks': chunks_count})
113
+ else:
114
+ self.send_json({'error': 'no content'}, 400)
115
+ elif self.path in ['/think', '/query']:
116
+ query = data.get('query', data.get('q', ''))
117
+ chunks, patterns = get_stats()
118
+ self.send_json({
119
+ 'name': BRAIN_NAME, 'domain': BRAIN_DOMAIN, 'query': query,
120
+ 'response': f"[{BRAIN_NAME}] Knowledge about {BRAIN_DOMAIN}: {chunks} chunks, {patterns} patterns learned.",
121
+ 'patterns_matched': min(patterns, 10), 'chunks': chunks
122
+ })
123
+ else:
124
+ self.send_json({'error': 'not found'}, 404)
125
+
126
+ def main():
127
+ init_db()
128
+ print(f"[{BRAIN_NAME}] Starting HF Space Mind")
129
+ print(f" Domain: {BRAIN_DOMAIN}")
130
+ print(f" Topics: {CRAWLER_TOPICS}")
131
+ print(f" Port: {PORT}")
132
+ for _ in range(3): threading.Thread(target=crawl_worker, daemon=True).start()
133
+ global activity
134
+ activity = "running"
135
  HTTPServer(('0.0.0.0', PORT), Handler).serve_forever()
136
+
137
+ if __name__ == '__main__': main()