Spaces:
Sleeping
Sleeping
Update mnemo_core.py
Browse files- mnemo_core.py +70 -12
mnemo_core.py
CHANGED
|
@@ -435,6 +435,8 @@ CREATE TABLE IF NOT EXISTS memories (
|
|
| 435 |
quality_score REAL DEFAULT 0.5,
|
| 436 |
access_count INTEGER DEFAULT 0,
|
| 437 |
priority REAL DEFAULT 1.0,
|
|
|
|
|
|
|
| 438 |
created_at REAL NOT NULL,
|
| 439 |
last_accessed REAL NOT NULL,
|
| 440 |
metadata TEXT DEFAULT '{}',
|
|
@@ -442,6 +444,8 @@ CREATE TABLE IF NOT EXISTS memories (
|
|
| 442 |
);
|
| 443 |
CREATE INDEX IF NOT EXISTS idx_mem_ns ON memories(namespace);
|
| 444 |
CREATE INDEX IF NOT EXISTS idx_mem_tier ON memories(tier);
|
|
|
|
|
|
|
| 445 |
|
| 446 |
-- FTS for blob memories
|
| 447 |
CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
|
|
@@ -488,10 +492,47 @@ class MnemoDB:
|
|
| 488 |
self._write_conn.executescript(SCHEMA_SQL)
|
| 489 |
self._write_conn.commit()
|
| 490 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
self._read_pool: queue.Queue = queue.Queue(maxsize=4)
|
| 492 |
for _ in range(4):
|
| 493 |
self._read_pool.put(self._create_connection(readonly=True))
|
| 494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
def _create_connection(self, readonly: bool = False) -> sqlite3.Connection:
|
| 496 |
conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=30)
|
| 497 |
conn.execute("PRAGMA journal_mode=WAL")
|
|
@@ -1045,12 +1086,11 @@ class MnemoEngine:
|
|
| 1045 |
""", (session_id, *PROTECTED))
|
| 1046 |
cp_deleted = cur.rowcount
|
| 1047 |
|
| 1048 |
-
# Delete blobs
|
| 1049 |
-
cur2 = conn.execute("""
|
| 1050 |
DELETE FROM memories
|
| 1051 |
-
WHERE
|
| 1052 |
-
|
| 1053 |
-
(session_id, *PROTECTED))
|
| 1054 |
blob_deleted = cur2.rowcount
|
| 1055 |
|
| 1056 |
total = cp_deleted + blob_deleted
|
|
@@ -1515,7 +1555,12 @@ class MnemoEngine:
|
|
| 1515 |
def add(self, content: str, namespace: str = "default",
|
| 1516 |
metadata: dict = None, priority: float = 1.0) -> Optional[str]:
|
| 1517 |
memory_id = self._generate_id(content, namespace)
|
| 1518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1519 |
|
| 1520 |
with self.db.read() as conn:
|
| 1521 |
existing = conn.execute("SELECT id FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
@@ -1539,10 +1584,10 @@ class MnemoEngine:
|
|
| 1539 |
conn.execute("""
|
| 1540 |
INSERT INTO memories
|
| 1541 |
(id, content, tier, namespace, quality_score, access_count, priority,
|
| 1542 |
-
created_at, last_accessed, metadata, embedding)
|
| 1543 |
-
VALUES (?, ?, 'semantic', ?, ?, 0, ?, ?, ?, ?, ?)
|
| 1544 |
""", (memory_id, content, namespace, quality, priority,
|
| 1545 |
-
time.time(), time.time(), meta_json, emb_blob))
|
| 1546 |
|
| 1547 |
self._blob_faiss.add(memory_id, embedding)
|
| 1548 |
self._create_links(memory_id, embedding, namespace, content)
|
|
@@ -1721,6 +1766,7 @@ class MnemoEngine:
|
|
| 1721 |
"tier": mem["tier"], "namespace": mem["namespace"],
|
| 1722 |
"quality_score": round(mem["quality_score"], 3),
|
| 1723 |
"access_count": mem["access_count"], "priority": mem["priority"],
|
|
|
|
| 1724 |
"created_at": mem["created_at"], "last_accessed": mem["last_accessed"],
|
| 1725 |
"metadata": json.loads(mem["metadata"] or "{}"),
|
| 1726 |
}
|
|
@@ -1751,6 +1797,7 @@ class MnemoEngine:
|
|
| 1751 |
"id": r["id"], "content": r["content"], "tier": r["tier"],
|
| 1752 |
"namespace": r["namespace"], "quality_score": round(r["quality_score"], 3),
|
| 1753 |
"access_count": r["access_count"], "priority": r["priority"],
|
|
|
|
| 1754 |
"created_at": r["created_at"], "last_accessed": r["last_accessed"],
|
| 1755 |
"metadata": json.loads(r["metadata"] or "{}"),
|
| 1756 |
} for r in rows]
|
|
@@ -2037,17 +2084,28 @@ class MnemoEngine:
|
|
| 2037 |
if emb is None:
|
| 2038 |
emb = self._get_embedding(mdata.get("content", ""))
|
| 2039 |
emb_blob = emb.astype(np.float32).tobytes()
|
| 2040 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2041 |
|
| 2042 |
with self.db.write() as conn:
|
| 2043 |
conn.execute("""
|
| 2044 |
INSERT OR IGNORE INTO memories
|
| 2045 |
(id, content, tier, namespace, quality_score, access_count, priority,
|
| 2046 |
-
created_at, last_accessed, metadata, embedding)
|
| 2047 |
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 2048 |
""", (mid, mdata["content"], mdata.get("tier", "semantic"),
|
| 2049 |
mdata.get("namespace", "default"), mdata.get("quality_score", 0.5),
|
| 2050 |
mdata.get("access_count", 0), mdata.get("priority", 1.0),
|
|
|
|
| 2051 |
mdata.get("created_at", time.time()), mdata.get("last_accessed", time.time()),
|
| 2052 |
meta_json, emb_blob))
|
| 2053 |
imported += 1
|
|
|
|
| 435 |
quality_score REAL DEFAULT 0.5,
|
| 436 |
access_count INTEGER DEFAULT 0,
|
| 437 |
priority REAL DEFAULT 1.0,
|
| 438 |
+
session_id TEXT DEFAULT '',
|
| 439 |
+
source TEXT DEFAULT 'auto_extract',
|
| 440 |
created_at REAL NOT NULL,
|
| 441 |
last_accessed REAL NOT NULL,
|
| 442 |
metadata TEXT DEFAULT '{}',
|
|
|
|
| 444 |
);
|
| 445 |
CREATE INDEX IF NOT EXISTS idx_mem_ns ON memories(namespace);
|
| 446 |
CREATE INDEX IF NOT EXISTS idx_mem_tier ON memories(tier);
|
| 447 |
+
CREATE INDEX IF NOT EXISTS idx_mem_session ON memories(session_id);
|
| 448 |
+
CREATE INDEX IF NOT EXISTS idx_mem_source ON memories(source);
|
| 449 |
|
| 450 |
-- FTS for blob memories
|
| 451 |
CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
|
|
|
|
| 492 |
self._write_conn.executescript(SCHEMA_SQL)
|
| 493 |
self._write_conn.commit()
|
| 494 |
|
| 495 |
+
# Schema migration: add session_id/source columns to memories table
|
| 496 |
+
# (for existing .db files created before v7.1)
|
| 497 |
+
self._migrate_memories_columns()
|
| 498 |
+
|
| 499 |
self._read_pool: queue.Queue = queue.Queue(maxsize=4)
|
| 500 |
for _ in range(4):
|
| 501 |
self._read_pool.put(self._create_connection(readonly=True))
|
| 502 |
|
| 503 |
+
def _migrate_memories_columns(self):
|
| 504 |
+
"""Add session_id and source columns to memories if missing (v7.0→v7.1).
|
| 505 |
+
|
| 506 |
+
Also backfills existing rows by extracting values from the metadata JSON blob.
|
| 507 |
+
"""
|
| 508 |
+
cursor = self._write_conn.execute("PRAGMA table_info(memories)")
|
| 509 |
+
existing_cols = {row[1] for row in cursor.fetchall()}
|
| 510 |
+
|
| 511 |
+
if "session_id" not in existing_cols:
|
| 512 |
+
log.info("Migrating memories table: adding session_id column")
|
| 513 |
+
print("[MIGRATE] Adding session_id column to memories table")
|
| 514 |
+
self._write_conn.execute("ALTER TABLE memories ADD COLUMN session_id TEXT DEFAULT ''")
|
| 515 |
+
# Backfill from metadata JSON
|
| 516 |
+
self._write_conn.execute("""
|
| 517 |
+
UPDATE memories SET session_id = COALESCE(json_extract(metadata, '$.session_id'), '')
|
| 518 |
+
WHERE metadata LIKE '%session_id%'
|
| 519 |
+
""")
|
| 520 |
+
|
| 521 |
+
if "source" not in existing_cols:
|
| 522 |
+
log.info("Migrating memories table: adding source column")
|
| 523 |
+
print("[MIGRATE] Adding source column to memories table")
|
| 524 |
+
self._write_conn.execute("ALTER TABLE memories ADD COLUMN source TEXT DEFAULT 'auto_extract'")
|
| 525 |
+
# Backfill from metadata JSON
|
| 526 |
+
self._write_conn.execute("""
|
| 527 |
+
UPDATE memories SET source = COALESCE(json_extract(metadata, '$.source'), 'auto_extract')
|
| 528 |
+
WHERE metadata LIKE '%source%'
|
| 529 |
+
""")
|
| 530 |
+
|
| 531 |
+
# Create indexes if they don't exist (idempotent)
|
| 532 |
+
self._write_conn.execute("CREATE INDEX IF NOT EXISTS idx_mem_session ON memories(session_id)")
|
| 533 |
+
self._write_conn.execute("CREATE INDEX IF NOT EXISTS idx_mem_source ON memories(source)")
|
| 534 |
+
self._write_conn.commit()
|
| 535 |
+
|
| 536 |
def _create_connection(self, readonly: bool = False) -> sqlite3.Connection:
|
| 537 |
conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=30)
|
| 538 |
conn.execute("PRAGMA journal_mode=WAL")
|
|
|
|
| 1086 |
""", (session_id, *PROTECTED))
|
| 1087 |
cp_deleted = cur.rowcount
|
| 1088 |
|
| 1089 |
+
# Delete blobs (using proper columns now, not json_extract)
|
| 1090 |
+
cur2 = conn.execute(f"""
|
| 1091 |
DELETE FROM memories
|
| 1092 |
+
WHERE session_id = ? AND source NOT IN ({placeholders})
|
| 1093 |
+
""", (session_id, *PROTECTED))
|
|
|
|
| 1094 |
blob_deleted = cur2.rowcount
|
| 1095 |
|
| 1096 |
total = cp_deleted + blob_deleted
|
|
|
|
| 1555 |
def add(self, content: str, namespace: str = "default",
|
| 1556 |
metadata: dict = None, priority: float = 1.0) -> Optional[str]:
|
| 1557 |
memory_id = self._generate_id(content, namespace)
|
| 1558 |
+
meta = metadata or {}
|
| 1559 |
+
|
| 1560 |
+
# Extract session_id and source into proper columns (not buried in JSON)
|
| 1561 |
+
session_id = meta.pop("session_id", "")
|
| 1562 |
+
source = meta.pop("source", "auto_extract")
|
| 1563 |
+
meta_json = json.dumps(meta) # Remaining metadata only
|
| 1564 |
|
| 1565 |
with self.db.read() as conn:
|
| 1566 |
existing = conn.execute("SELECT id FROM memories WHERE id = ?", (memory_id,)).fetchone()
|
|
|
|
| 1584 |
conn.execute("""
|
| 1585 |
INSERT INTO memories
|
| 1586 |
(id, content, tier, namespace, quality_score, access_count, priority,
|
| 1587 |
+
session_id, source, created_at, last_accessed, metadata, embedding)
|
| 1588 |
+
VALUES (?, ?, 'semantic', ?, ?, 0, ?, ?, ?, ?, ?, ?, ?)
|
| 1589 |
""", (memory_id, content, namespace, quality, priority,
|
| 1590 |
+
session_id, source, time.time(), time.time(), meta_json, emb_blob))
|
| 1591 |
|
| 1592 |
self._blob_faiss.add(memory_id, embedding)
|
| 1593 |
self._create_links(memory_id, embedding, namespace, content)
|
|
|
|
| 1766 |
"tier": mem["tier"], "namespace": mem["namespace"],
|
| 1767 |
"quality_score": round(mem["quality_score"], 3),
|
| 1768 |
"access_count": mem["access_count"], "priority": mem["priority"],
|
| 1769 |
+
"session_id": mem["session_id"], "source": mem["source"],
|
| 1770 |
"created_at": mem["created_at"], "last_accessed": mem["last_accessed"],
|
| 1771 |
"metadata": json.loads(mem["metadata"] or "{}"),
|
| 1772 |
}
|
|
|
|
| 1797 |
"id": r["id"], "content": r["content"], "tier": r["tier"],
|
| 1798 |
"namespace": r["namespace"], "quality_score": round(r["quality_score"], 3),
|
| 1799 |
"access_count": r["access_count"], "priority": r["priority"],
|
| 1800 |
+
"session_id": r["session_id"], "source": r["source"],
|
| 1801 |
"created_at": r["created_at"], "last_accessed": r["last_accessed"],
|
| 1802 |
"metadata": json.loads(r["metadata"] or "{}"),
|
| 1803 |
} for r in rows]
|
|
|
|
| 2084 |
if emb is None:
|
| 2085 |
emb = self._get_embedding(mdata.get("content", ""))
|
| 2086 |
emb_blob = emb.astype(np.float32).tobytes()
|
| 2087 |
+
|
| 2088 |
+
# Extract session_id/source from old metadata blob into proper columns
|
| 2089 |
+
old_meta = mdata.get("metadata", {})
|
| 2090 |
+
if isinstance(old_meta, str):
|
| 2091 |
+
try:
|
| 2092 |
+
old_meta = json.loads(old_meta)
|
| 2093 |
+
except Exception:
|
| 2094 |
+
old_meta = {}
|
| 2095 |
+
session_id = old_meta.pop("session_id", mdata.get("session_id", ""))
|
| 2096 |
+
source = old_meta.pop("source", mdata.get("source", "auto_extract"))
|
| 2097 |
+
meta_json = json.dumps(old_meta) # Remaining metadata only
|
| 2098 |
|
| 2099 |
with self.db.write() as conn:
|
| 2100 |
conn.execute("""
|
| 2101 |
INSERT OR IGNORE INTO memories
|
| 2102 |
(id, content, tier, namespace, quality_score, access_count, priority,
|
| 2103 |
+
session_id, source, created_at, last_accessed, metadata, embedding)
|
| 2104 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 2105 |
""", (mid, mdata["content"], mdata.get("tier", "semantic"),
|
| 2106 |
mdata.get("namespace", "default"), mdata.get("quality_score", 0.5),
|
| 2107 |
mdata.get("access_count", 0), mdata.get("priority", 1.0),
|
| 2108 |
+
session_id, source,
|
| 2109 |
mdata.get("created_at", time.time()), mdata.get("last_accessed", time.time()),
|
| 2110 |
meta_json, emb_blob))
|
| 2111 |
imported += 1
|