File size: 7,730 Bytes
a5ae1ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
"""
Brain: two-layer performance wrapper over brain_core.

Layer 1 (fast): co-occurrence dict + word list. In-memory. Microseconds.
Layer 2 (background): neurons, successors, sentences → SQLite. Deferred.

The brain KNOWS things at Layer 1 speed. It SAVES them at Layer 2 speed.
Like human memory: learn instantly, consolidate during sleep.

    from brain import Brain
    brain = Brain(db_path="./my_brain")
    brain.teach("paris is the capital of france")  # instant (dict only)
    brain.ask("capital of france")                  # works immediately
    brain.flush()                                   # persist to SQLite
"""

import os
import numpy as np
from collections import deque
from threading import Lock, Thread
from concurrent.futures import ThreadPoolExecutor

from brain_core import BrainCore, FUNCTION_WORDS, STRUCTURAL_WORDS, COOCCURRENCE_PULL

__all__ = ['Brain', 'FUNCTION_WORDS', 'STRUCTURAL_WORDS', 'COOCCURRENCE_PULL']


class Brain(BrainCore):
    """Two-layer brain. Same API as BrainCore, much faster teach.

    Layer 1: _learn_word + _learn_cooccurrence (dict ops, ~microseconds)
    Layer 2: neuron insert + successors + sentences (SQLite, deferred)
    """

    def __init__(self, db_path=None):
        self._bulk_mode = False
        self._search_dirty = False
        self._nid_to_word_cache = None

        # Layer 2: async persist queue + background thread
        self._persist_queue = deque()
        self._persist_count = 0
        self._flush_thread = None
        self._flushing = False

        super().__init__(db_path=db_path)

    # --- Two-layer teach ---

    def teach(self, sentence, confidence=0.5):
        """Layer 1: instant. Dict operations only.
        Layer 2: queued for background persist."""
        tokens = self._tokenize(sentence)
        content = [t for t in tokens if t not in FUNCTION_WORDS]
        if not content:
            return []

        with self._lock:
            dim_before = len(self._words)

            # Layer 1: co-occurrence (instant)
            for word in content:
                self._learn_word(word)
            if len(content) >= 2:
                self._learn_cooccurrence(content)

            if len(self._words) != dim_before:
                self._search_dirty = True
                self._nid_to_word_cache = None

        # Queue for Layer 2 persist — backpressure prevents OOM
        self._persist_queue.append((sentence, content, tokens, confidence))
        self._persist_count += 1

        # Backpressure: flush when queue hits cap (memory-bounded)
        MAX_QUEUE = 5000
        if len(self._persist_queue) >= MAX_QUEUE:
            self._flush_persist()  # blocks until drained — backpressure

        return list(range(len(content)))  # placeholder IDs

    def _async_flush(self):
        """Kick off a background flush if not already running."""
        if self._flushing:
            return
        self._flushing = True
        self._flush_thread = Thread(target=self._bg_flush, daemon=True)
        self._flush_thread.start()

    def _bg_flush(self):
        """Background thread: persist queued teaches to SQLite."""
        try:
            self._flush_persist()
        finally:
            self._flushing = False

    def _flush_persist(self):
        """Layer 2: flush queued teaches to SQLite."""
        if not self._persist_queue:
            return

        self.db.begin_batch()
        flushed = 0

        while self._persist_queue:
            sentence, content, tokens, confidence = self._persist_queue.popleft()

            neurons = []
            for word in content:
                if word in self._word_neurons:
                    n = self.db.get(self._word_neurons[word])
                    if n:
                        neurons.append(n)
                        continue
                vec = self._encode_word(word)
                if np.any(vec != 0):
                    n = self.db.insert(vec, confidence=confidence)
                    self._word_neurons[word] = n.id
                    self._nid_to_word_cache = None
                    self.db.save_word_mapping(word, n.id)
                    neurons.append(n)

            for i in range(len(neurons) - 1):
                self.db.update_successors(neurons[i].id, neurons[i + 1].id, 0.8)
                self.db.update_predecessors(neurons[i + 1].id, neurons[i].id)

            if len(neurons) >= 2:
                self.db.record_sentence([n.id for n in neurons])

            if len(tokens) >= 3:
                self._extract_template(tokens)

            flushed += 1

        self.db.end_batch()
        return flushed

    def flush(self):
        """Force flush all pending teaches to SQLite."""
        self._flush_persist()
        self._save_cooc()
        if self._search_dirty:
            self._rebuild_search_matrix()
            self._search_dirty = False

    def ask(self, question):
        """Wait for any async flush, then query."""
        # Wait for background flush to finish
        if self._flush_thread and self._flush_thread.is_alive():
            self._flush_thread.join()
        # Flush any remaining
        if self._persist_queue:
            self._flush_persist()
        if getattr(self, '_search_dirty', False):
            self._rebuild_search_matrix()
            self._search_dirty = False
        return super().ask(question)

    def teach_batch(self, sentences, confidence=0.5):
        """Batch teach — all Layer 1 instant, one Layer 2 flush at end."""
        results = [self.teach(s, confidence) for s in sentences]
        return results

    # --- Bulk mode ---

    def begin_bulk(self):
        """Enter bulk mode. Layer 2 deferred until end_bulk."""
        self._bulk_mode = True

    def end_bulk(self):
        """Exit bulk mode. Flush everything."""
        self._bulk_mode = False
        self._flush_persist()
        self._save_cooc()
        self._rebuild_search_matrix()

    # --- Close ---

    def close(self):
        self._pool.shutdown(wait=False)
        self._batch_pool.shutdown(wait=False)
        if self._flush_thread and self._flush_thread.is_alive():
            self._flush_thread.join()
        self._flush_persist()
        self._save_cooc()
        self.db.close()

    # --- Health with death risk ---

    def health(self):
        h = super().health()
        h["persist_queue"] = len(self._persist_queue)

        try:
            with open('/proc/meminfo') as f:
                for line in f:
                    if line.startswith('MemAvailable:'):
                        h["system_avail_mb"] = round(int(line.split()[1]) / 1024, 1)
                    elif line.startswith('SwapTotal:'):
                        h["swap_total_mb"] = round(int(line.split()[1]) / 1024, 1)
                    elif line.startswith('SwapFree:'):
                        h["swap_free_mb"] = round(int(line.split()[1]) / 1024, 1)
        except Exception:
            pass

        h["swap_used_mb"] = round(h.get("swap_total_mb", 0) - h.get("swap_free_mb", 0), 1)

        risk = 0
        avail = h.get("system_avail_mb", 9999)
        if avail < 2048:
            risk += int((2048 - avail) / 2048 * 40)
        if h.get("swap_used_mb", 0) > 100:
            risk += min(25, int(h["swap_used_mb"] / 500 * 25))
        if h["disk_free_gb"] < 5:
            risk += int((5 - h["disk_free_gb"]) / 5 * 20)
        try:
            load1 = os.getloadavg()[0]
            ncpu = os.cpu_count() or 1
            if load1 > ncpu * 0.9:
                risk += 10
        except Exception:
            pass
        if h["rss_mb"] > 512:
            risk += min(5, int((h["rss_mb"] - 512) / 1024 * 5))
        h["death_risk"] = min(100, risk)

        return h