File size: 13,342 Bytes
a5ae1ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | """
Tests for persistence: neurons, templates, word mappings, FAISS index.
Verifies:
- Create engine, teach facts, close → reopen → everything survives
- Templates persist and reload correctly
- Word→neuron mappings persist
- FAISS index saves/loads (fast boot)
- Delete persists across restarts
- In-memory mode still works (no path = no persistence)
"""
import json
import os
import sys
import tempfile
from pathlib import Path
import numpy as np
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from neuron import NeuronDB, VECTOR_DIM
from generator import TemplateStore
from encoder import Encoder
DIM = 300
def random_vector(seed: int, dim=DIM) -> np.ndarray:
rng = np.random.RandomState(seed)
v = rng.randn(dim).astype(np.float32)
return v / np.linalg.norm(v)
class TestNeuronPersistence:
def test_neurons_survive_restart(self):
"""Insert neurons, close DB, reopen → neurons still there."""
with tempfile.TemporaryDirectory() as tmpdir:
# Session 1: insert
db = NeuronDB(path=tmpdir, dim=DIM)
v1 = random_vector(1)
v2 = random_vector(2)
n1 = db.insert(v1, confidence=0.7)
n2 = db.insert(v2, confidence=0.3)
db.close()
# Session 2: reopen
db2 = NeuronDB(path=tmpdir, dim=DIM)
assert db2.count() == 2
reloaded1 = db2.get(n1.id)
assert reloaded1 is not None
assert abs(reloaded1.confidence - 0.7) < 0.001
reloaded2 = db2.get(n2.id)
assert reloaded2 is not None
assert abs(reloaded2.confidence - 0.3) < 0.001
db2.close()
def test_successors_persist(self):
"""Successor relationships survive restart."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n1 = db.insert(random_vector(1), confidence=0.5)
n2 = db.insert(random_vector(2), confidence=0.5)
db.update_successors(n1.id, n2.id, 0.8)
db.update_predecessors(n2.id, n1.id)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
r1 = db2.get(n1.id)
r2 = db2.get(n2.id)
assert len(r1.successors) == 1
assert r1.successors[0][0] == n2.id
assert abs(r1.successors[0][1] - 0.8) < 0.001 # float32 precision
assert n1.id in r2.predecessors
db2.close()
def test_confidence_updates_persist(self):
"""Confidence changes survive restart."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n = db.insert(random_vector(1), confidence=0.5)
db.update_confidence(n.id, useful=True)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
r = db2.get(n.id)
assert r.confidence > 0.5 # was reinforced
db2.close()
def test_delete_persists(self):
"""Deleted neurons stay deleted after restart."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n1 = db.insert(random_vector(1), confidence=0.5)
n2 = db.insert(random_vector(2), confidence=0.5)
db.delete(n1.id)
assert db.count() == 1
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
assert db2.count() == 1
assert db2.get(n1.id) is None
assert db2.get(n2.id) is not None
db2.close()
class TestSearchPersistence:
def test_search_survives_restart(self):
"""Search matrix rebuilds from SQLite on restart."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
v1 = random_vector(1)
v2 = random_vector(2)
db.insert(v1, confidence=0.7)
db.insert(v2, confidence=0.3)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
assert db2.count() == 2
results = db2.search(v1, k=2)
assert len(results) == 2
db2.close()
def test_search_works_after_reload(self):
"""Spatial search should return correct results after reload."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
v_target = random_vector(42)
v_far = random_vector(99)
n_target = db.insert(v_target, confidence=0.7)
db.insert(v_far, confidence=0.5)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
results = db2.search(v_target, k=1)
assert len(results) == 1
assert results[0].id == n_target.id
db2.close()
class TestTemplatePersistence:
def _make_encoder(self, dim=DIM):
"""Create an encoder with test embeddings."""
enc = Encoder(data_dir="/tmp/test-enc", dim=dim)
enc.load_from_dict({
"wrote": random_vector(10, dim),
"discovered": random_vector(11, dim),
"is": random_vector(12, dim),
})
return enc
def test_templates_survive_restart(self):
"""Templates saved via TemplateStore should reload from SQLite."""
with tempfile.TemporaryDirectory() as tmpdir:
enc = self._make_encoder()
# Session 1: add templates
db = NeuronDB(path=tmpdir, dim=DIM)
store = TemplateStore(enc, db=db)
store.add(
"[PERSON] wrote [WORK]",
{"PERSON": "noun", "WORK": "noun"},
confidence=0.8,
)
store.add(
"[PERSON] discovered [CONCEPT]",
{"PERSON": "noun", "CONCEPT": "noun"},
confidence=0.7,
)
assert store.count() == 2
db.close()
# Session 2: reopen
db2 = NeuronDB(path=tmpdir, dim=DIM)
store2 = TemplateStore(enc, db=db2)
assert store2.count() == 2
t0 = store2.templates[0]
assert t0.pattern == "[PERSON] wrote [WORK]"
assert t0.slots == {"PERSON": "noun", "WORK": "noun"}
assert abs(t0.confidence - 0.8) < 0.001
t1 = store2.templates[1]
assert t1.pattern == "[PERSON] discovered [CONCEPT]"
db2.close()
def test_template_delete_persists(self):
"""Deleted templates stay deleted."""
with tempfile.TemporaryDirectory() as tmpdir:
enc = self._make_encoder()
db = NeuronDB(path=tmpdir, dim=DIM)
store = TemplateStore(enc, db=db)
t = store.add("[X] is [Y]", {"X": "noun", "Y": "noun"}, 0.5)
assert store.count() == 1
store.delete(t.id)
assert store.count() == 0
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
store2 = TemplateStore(enc, db=db2)
assert store2.count() == 0
db2.close()
def test_in_memory_templates_no_crash(self):
"""TemplateStore without DB should work (in-memory only)."""
enc = self._make_encoder()
store = TemplateStore(enc, db=None)
store.add("[A] is [B]", {"A": "noun", "B": "noun"}, 0.5)
assert store.count() == 1
def test_template_search_after_reload(self):
"""Template search should work correctly after reload."""
with tempfile.TemporaryDirectory() as tmpdir:
enc = self._make_encoder()
db = NeuronDB(path=tmpdir, dim=DIM)
store = TemplateStore(enc, db=db)
store.add(
"[PERSON] wrote [WORK]",
{"PERSON": "noun", "WORK": "noun"}, 0.8,
)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
store2 = TemplateStore(enc, db=db2)
# Search with the "wrote" vector
wrote_vec = enc.encode_word("wrote")
results = store2.search(wrote_vec, k=1)
assert len(results) == 1
assert results[0].pattern == "[PERSON] wrote [WORK]"
db2.close()
class TestWordMappingPersistence:
def test_word_mappings_survive_restart(self):
"""Word→neuron mappings persist across restarts."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n1 = db.insert(random_vector(1), confidence=0.5)
n2 = db.insert(random_vector(2), confidence=0.5)
db.save_word_mapping("shakespeare", n1.id)
db.save_word_mapping("hamlet", n2.id)
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
mappings = db2.load_word_mappings()
assert mappings["shakespeare"] == n1.id
assert mappings["hamlet"] == n2.id
db2.close()
def test_word_mapping_delete_persists(self):
"""Deleted word mappings stay deleted."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n = db.insert(random_vector(1), confidence=0.5)
db.save_word_mapping("test", n.id)
db.delete_word_mapping("test")
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
mappings = db2.load_word_mappings()
assert "test" not in mappings
db2.close()
def test_word_mapping_update(self):
"""Updating a word mapping replaces the old one."""
with tempfile.TemporaryDirectory() as tmpdir:
db = NeuronDB(path=tmpdir, dim=DIM)
n1 = db.insert(random_vector(1), confidence=0.5)
n2 = db.insert(random_vector(2), confidence=0.5)
db.save_word_mapping("word", n1.id)
db.save_word_mapping("word", n2.id) # overwrite
db.close()
db2 = NeuronDB(path=tmpdir, dim=DIM)
mappings = db2.load_word_mappings()
assert mappings["word"] == n2.id
db2.close()
class TestEndToEndPersistence:
def test_full_engine_persistence(self):
"""
Complete test: create engine with data_dir, teach facts and templates,
close, reopen → everything is still there.
"""
with tempfile.TemporaryDirectory() as tmpdir:
enc_vectors = {
"shakespeare": random_vector(1),
"wrote": random_vector(2),
"hamlet": random_vector(3),
"einstein": random_vector(4),
"discovered": random_vector(5),
"relativity": random_vector(6),
}
# Session 1: teach
from engine import Engine
engine = Engine(data_dir=tmpdir, dim=DIM)
engine.load_embeddings_from_dict(enc_vectors)
engine.teach_sentence("shakespeare wrote hamlet")
engine.teach_template(
"[PERSON] wrote [WORK]",
{"PERSON": "noun", "WORK": "noun"}, 0.8,
)
stats1 = engine.stats()
engine.close()
# Session 2: reopen
engine2 = Engine(data_dir=tmpdir, dim=DIM)
engine2.load_embeddings_from_dict(enc_vectors)
stats2 = engine2.stats()
assert stats2["neurons"] == stats1["neurons"], \
f"Neurons: {stats2['neurons']} != {stats1['neurons']}"
assert stats2["templates"] == stats1["templates"], \
f"Templates: {stats2['templates']} != {stats1['templates']}"
# Word mappings should be loaded
assert "shakespeare" in engine2._word_neurons
assert "hamlet" in engine2._word_neurons
# Search should still work
results = engine2.db.search(enc_vectors["shakespeare"], k=1)
assert len(results) >= 1
engine2.close()
def test_incremental_teaching(self):
"""Multiple sessions of teaching should accumulate knowledge."""
with tempfile.TemporaryDirectory() as tmpdir:
enc_vectors = {
"cat": random_vector(10),
"sat": random_vector(11),
"mat": random_vector(12),
"dog": random_vector(13),
"ran": random_vector(14),
}
# Session 1
from engine import Engine
e1 = Engine(data_dir=tmpdir, dim=DIM)
e1.load_embeddings_from_dict(enc_vectors)
e1.teach_sentence("cat sat mat")
count1 = e1.db.count()
e1.close()
# Session 2: teach more
e2 = Engine(data_dir=tmpdir, dim=DIM)
e2.load_embeddings_from_dict(enc_vectors)
assert e2.db.count() == count1 # previous neurons still there
e2.teach_sentence("dog ran")
assert e2.db.count() > count1 # new neurons added
e2.close()
# Session 3: verify all
e3 = Engine(data_dir=tmpdir, dim=DIM)
e3.load_embeddings_from_dict(enc_vectors)
assert "cat" in e3._word_neurons
assert "dog" in e3._word_neurons
e3.close()
if __name__ == "__main__":
import pytest
pytest.main([__file__, "-v"])
|