Update app.py
Browse files
app.py
CHANGED
|
@@ -10,10 +10,6 @@ from typing import List, Dict, Any, Optional, Tuple
|
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
|
| 13 |
-
# =========================
|
| 14 |
-
# RFT Memory Receipt Engine
|
| 15 |
-
# =========================
|
| 16 |
-
|
| 17 |
BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
|
| 18 |
os.makedirs(BASE_DIR, exist_ok=True)
|
| 19 |
|
|
@@ -36,10 +32,7 @@ def atomic_write(path: str, data: bytes) -> None:
|
|
| 36 |
|
| 37 |
|
| 38 |
def safe_fts_match(user_query: str) -> str:
|
| 39 |
-
|
| 40 |
-
FTS5 MATCH can error on punctuation/special syntax.
|
| 41 |
-
Convert input into a conservative OR query: word1 OR word2 OR ...
|
| 42 |
-
"""
|
| 43 |
words = re.findall(r"[A-Za-z0-9_]+", (user_query or "").lower())
|
| 44 |
if not words:
|
| 45 |
return "___NO_HITS___"
|
|
@@ -66,9 +59,7 @@ class RetrievalHit:
|
|
| 66 |
|
| 67 |
class RFTMemoryStore:
|
| 68 |
"""
|
| 69 |
-
|
| 70 |
-
Index: SQLite (events table + FTS5) for lexical retrieval.
|
| 71 |
-
Receipts: JSON files saved per assistant turn, downloadable and verifiable.
|
| 72 |
"""
|
| 73 |
|
| 74 |
def __init__(self, base_dir: str):
|
|
@@ -76,9 +67,12 @@ class RFTMemoryStore:
|
|
| 76 |
self.db_path = os.path.join(base_dir, "index.sqlite")
|
| 77 |
self._init_db()
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
def _init_db(self):
|
| 80 |
os.makedirs(self.base_dir, exist_ok=True)
|
| 81 |
-
con =
|
| 82 |
cur = con.cursor()
|
| 83 |
|
| 84 |
cur.execute("""
|
|
@@ -96,15 +90,6 @@ class RFTMemoryStore:
|
|
| 96 |
)
|
| 97 |
""")
|
| 98 |
|
| 99 |
-
cur.execute("""
|
| 100 |
-
CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5(
|
| 101 |
-
event_id,
|
| 102 |
-
session_id,
|
| 103 |
-
text,
|
| 104 |
-
content=''
|
| 105 |
-
)
|
| 106 |
-
""")
|
| 107 |
-
|
| 108 |
cur.execute("""
|
| 109 |
CREATE TABLE IF NOT EXISTS receipts (
|
| 110 |
receipt_id TEXT PRIMARY KEY,
|
|
@@ -116,9 +101,43 @@ class RFTMemoryStore:
|
|
| 116 |
)
|
| 117 |
""")
|
| 118 |
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
con.close()
|
| 121 |
|
|
|
|
|
|
|
|
|
|
| 122 |
def session_dir(self, session_id: str) -> str:
|
| 123 |
d = os.path.join(self.base_dir, "sessions", session_id)
|
| 124 |
os.makedirs(d, exist_ok=True)
|
|
@@ -132,8 +151,11 @@ class RFTMemoryStore:
|
|
| 132 |
os.makedirs(d, exist_ok=True)
|
| 133 |
return d
|
| 134 |
|
|
|
|
|
|
|
|
|
|
| 135 |
def get_events(self, session_id: str, limit: int = 400) -> List[Dict[str, Any]]:
|
| 136 |
-
con =
|
| 137 |
cur = con.cursor()
|
| 138 |
cur.execute("""
|
| 139 |
SELECT event_id, seq, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
|
|
@@ -178,8 +200,11 @@ class RFTMemoryStore:
|
|
| 178 |
score = role_w * (0.65 * novelty + 0.35 * length_factor)
|
| 179 |
return float(max(0.0, min(1.0, score)))
|
| 180 |
|
|
|
|
|
|
|
|
|
|
| 181 |
def _get_last_seq_and_chain(self, session_id: str) -> Tuple[int, str]:
|
| 182 |
-
con =
|
| 183 |
cur = con.cursor()
|
| 184 |
|
| 185 |
cur.execute("SELECT COALESCE(MAX(seq), 0) FROM events WHERE session_id=?", (session_id,))
|
|
@@ -225,7 +250,7 @@ class RFTMemoryStore:
|
|
| 225 |
"collapse": collapse
|
| 226 |
}
|
| 227 |
|
| 228 |
-
# Append-only JSONL
|
| 229 |
log_path = self.session_log_path(session_id)
|
| 230 |
line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
|
| 231 |
with open(log_path, "ab") as f:
|
|
@@ -233,8 +258,8 @@ class RFTMemoryStore:
|
|
| 233 |
f.flush()
|
| 234 |
os.fsync(f.fileno())
|
| 235 |
|
| 236 |
-
# Index
|
| 237 |
-
con =
|
| 238 |
cur = con.cursor()
|
| 239 |
cur.execute("""
|
| 240 |
INSERT INTO events(session_id,event_id,seq,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
|
|
@@ -246,11 +271,17 @@ class RFTMemoryStore:
|
|
| 246 |
|
| 247 |
return rec
|
| 248 |
|
|
|
|
|
|
|
|
|
|
| 249 |
def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
|
| 250 |
match = safe_fts_match(query)
|
|
|
|
|
|
|
| 251 |
|
| 252 |
-
con =
|
| 253 |
cur = con.cursor()
|
|
|
|
| 254 |
cur.execute("""
|
| 255 |
SELECT e.event_id, e.seq, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
|
| 256 |
bm25(events_fts) as rank
|
|
@@ -263,13 +294,36 @@ class RFTMemoryStore:
|
|
| 263 |
rows = cur.fetchall()
|
| 264 |
con.close()
|
| 265 |
|
| 266 |
-
hits = []
|
| 267 |
for (eid, seq, role, text, ts, digest, chain_hash, rank) in rows:
|
| 268 |
r = float(rank if rank is not None else 0.0)
|
| 269 |
score = 1.0 / (1.0 + max(0.0, r))
|
| 270 |
hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, score))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
return hits
|
| 272 |
|
|
|
|
|
|
|
|
|
|
| 273 |
def write_receipt(self, session_id: str, user_text: str, retrieved: List[RetrievalHit], prompt: str, response: str) -> str:
|
| 274 |
receipt_id = uuid.uuid4().hex
|
| 275 |
ts = now_ms()
|
|
@@ -292,7 +346,7 @@ class RFTMemoryStore:
|
|
| 292 |
"response_hash": sha256_str(response),
|
| 293 |
"engine": {
|
| 294 |
"name": "RFT Memory Receipt Engine",
|
| 295 |
-
"version": "0.
|
| 296 |
"method": "append-only ledger + FTS retrieval + hash-chain receipts"
|
| 297 |
}
|
| 298 |
}
|
|
@@ -300,7 +354,7 @@ class RFTMemoryStore:
|
|
| 300 |
path = os.path.join(self.receipts_dir(session_id), f"{receipt_id}.json")
|
| 301 |
atomic_write(path, json.dumps(receipt, indent=2, ensure_ascii=False).encode("utf-8"))
|
| 302 |
|
| 303 |
-
con =
|
| 304 |
cur = con.cursor()
|
| 305 |
cur.execute("""
|
| 306 |
INSERT INTO receipts(receipt_id, session_id, ts_ms, prompt_hash, response_hash, receipt_path)
|
|
@@ -316,7 +370,7 @@ class RFTMemoryStore:
|
|
| 316 |
if not session_id:
|
| 317 |
return False, "Missing session_id."
|
| 318 |
|
| 319 |
-
con =
|
| 320 |
cur = con.cursor()
|
| 321 |
|
| 322 |
for item in receipt_json.get("retrieval", []):
|
|
@@ -378,12 +432,6 @@ HOW_TO_MD = """
|
|
| 378 |
- Each turn generates a receipt listing exactly what was retrieved.
|
| 379 |
- Receipt verification proves the referenced events exist and hashes match.
|
| 380 |
|
| 381 |
-
## Who this is useful for
|
| 382 |
-
- Agent builders needing restart continuity
|
| 383 |
-
- Teams needing auditability (“show what influenced the output”)
|
| 384 |
-
- Debugging long-lived workflows without replaying full history
|
| 385 |
-
- Anyone controlling token spend via fixed retrieval budgets
|
| 386 |
-
|
| 387 |
## Why it matters
|
| 388 |
Storing logs is easy. Proving influence is the hard part.
|
| 389 |
Receipts turn memory into something inspectable and defensible.
|
|
@@ -395,12 +443,7 @@ def new_session_id() -> str:
|
|
| 395 |
|
| 396 |
|
| 397 |
def events_to_messages(events: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
| 398 |
-
|
| 399 |
-
msgs = []
|
| 400 |
-
for e in events:
|
| 401 |
-
if e["role"] in ("user", "assistant"):
|
| 402 |
-
msgs.append({"role": e["role"], "content": e["text"]})
|
| 403 |
-
return msgs
|
| 404 |
|
| 405 |
|
| 406 |
def format_ledger(events: List[Dict[str, Any]]) -> str:
|
|
@@ -427,7 +470,65 @@ def build_prompt(user_msg: str, hits: List[RetrievalHit]) -> str:
|
|
| 427 |
)
|
| 428 |
|
| 429 |
|
| 430 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
if not hits:
|
| 432 |
return "No matching memory slices were retrieved for this query."
|
| 433 |
lines = ["Retrieved memory slices:"]
|
|
@@ -438,13 +539,13 @@ def response_from_retrieval(hits: List[RetrievalHit]) -> str:
|
|
| 438 |
|
| 439 |
def chat_turn(session_id: str, user_msg: str, retrieval_k: int):
|
| 440 |
if not session_id:
|
| 441 |
-
session_id =
|
| 442 |
|
| 443 |
store.append_event(session_id, "user", user_msg)
|
| 444 |
|
| 445 |
hits = store.search_lexical(session_id, user_msg, k=int(retrieval_k))
|
| 446 |
prompt = build_prompt(user_msg, hits)
|
| 447 |
-
response =
|
| 448 |
|
| 449 |
store.append_event(session_id, "assistant", response)
|
| 450 |
|
|
@@ -455,13 +556,13 @@ def chat_turn(session_id: str, user_msg: str, retrieval_k: int):
|
|
| 455 |
retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
|
| 456 |
messages = events_to_messages(events)
|
| 457 |
|
| 458 |
-
#
|
| 459 |
return session_id, messages, retrieved_view, ledger, receipt_path, receipt_path
|
| 460 |
|
| 461 |
|
| 462 |
def run_guided_demo(session_id: str, retrieval_k: int):
|
| 463 |
if not session_id:
|
| 464 |
-
session_id =
|
| 465 |
|
| 466 |
last = (session_id, [], "", "", "", None)
|
| 467 |
for step in GUIDED_DEMO_STEPS:
|
|
@@ -499,7 +600,7 @@ def verify_uploaded_receipt(file_obj) -> str:
|
|
| 499 |
|
| 500 |
|
| 501 |
def reset_session():
|
| 502 |
-
sid =
|
| 503 |
return sid, [], "", "", "", None
|
| 504 |
|
| 505 |
|
|
@@ -508,7 +609,7 @@ def fill_example(selected: str) -> str:
|
|
| 508 |
|
| 509 |
|
| 510 |
with gr.Blocks(title="RFT Memory Receipt Engine") as demo:
|
| 511 |
-
gr.Markdown("# RFT Memory Receipt Engine\nLocal persistence +
|
| 512 |
|
| 513 |
with gr.Row():
|
| 514 |
session_id = gr.Textbox(label="Session ID", value=new_session_id())
|
|
@@ -518,7 +619,6 @@ with gr.Blocks(title="RFT Memory Receipt Engine") as demo:
|
|
| 518 |
|
| 519 |
with gr.Tabs():
|
| 520 |
with gr.Tab("Chat"):
|
| 521 |
-
# DO NOT pass type=... (your Gradio doesn't accept it)
|
| 522 |
chatbot = gr.Chatbot(label="Conversation", height=320)
|
| 523 |
|
| 524 |
with gr.Row():
|
|
@@ -543,7 +643,7 @@ with gr.Blocks(title="RFT Memory Receipt Engine") as demo:
|
|
| 543 |
)
|
| 544 |
|
| 545 |
with gr.Tab("Guided Demo"):
|
| 546 |
-
gr.Markdown("Runs a scripted
|
| 547 |
run_demo_btn = gr.Button("Run Guided Demo", variant="primary")
|
| 548 |
|
| 549 |
demo_chatbot = gr.Chatbot(label="Demo conversation", height=320)
|
|
|
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
|
| 14 |
os.makedirs(BASE_DIR, exist_ok=True)
|
| 15 |
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def safe_fts_match(user_query: str) -> str:
|
| 35 |
+
# Conservative FTS query: token1 OR token2 OR ...
|
|
|
|
|
|
|
|
|
|
| 36 |
words = re.findall(r"[A-Za-z0-9_]+", (user_query or "").lower())
|
| 37 |
if not words:
|
| 38 |
return "___NO_HITS___"
|
|
|
|
| 59 |
|
| 60 |
class RFTMemoryStore:
|
| 61 |
"""
|
| 62 |
+
Append-only session ledger + SQLite FTS retrieval + receipt verification.
|
|
|
|
|
|
|
| 63 |
"""
|
| 64 |
|
| 65 |
def __init__(self, base_dir: str):
|
|
|
|
| 67 |
self.db_path = os.path.join(base_dir, "index.sqlite")
|
| 68 |
self._init_db()
|
| 69 |
|
| 70 |
+
def _connect(self) -> sqlite3.Connection:
|
| 71 |
+
return sqlite3.connect(self.db_path)
|
| 72 |
+
|
| 73 |
def _init_db(self):
|
| 74 |
os.makedirs(self.base_dir, exist_ok=True)
|
| 75 |
+
con = self._connect()
|
| 76 |
cur = con.cursor()
|
| 77 |
|
| 78 |
cur.execute("""
|
|
|
|
| 90 |
)
|
| 91 |
""")
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
cur.execute("""
|
| 94 |
CREATE TABLE IF NOT EXISTS receipts (
|
| 95 |
receipt_id TEXT PRIMARY KEY,
|
|
|
|
| 101 |
)
|
| 102 |
""")
|
| 103 |
|
| 104 |
+
# --- Ensure FTS exists in a join-safe form ---
|
| 105 |
+
cur.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name='events_fts'")
|
| 106 |
+
row = cur.fetchone()
|
| 107 |
+
needs_rebuild = False
|
| 108 |
+
|
| 109 |
+
if row is None:
|
| 110 |
+
needs_rebuild = True
|
| 111 |
+
else:
|
| 112 |
+
sql = (row[0] or "").lower()
|
| 113 |
+
# If it contains content='' or content="" then it's contentless and join-on-columns is unreliable
|
| 114 |
+
if "content=''" in sql or 'content=""' in sql:
|
| 115 |
+
needs_rebuild = True
|
| 116 |
+
|
| 117 |
+
if needs_rebuild:
|
| 118 |
+
cur.execute("DROP TABLE IF EXISTS events_fts")
|
| 119 |
+
# Stored-content FTS table (simple and reliable)
|
| 120 |
+
cur.execute("""
|
| 121 |
+
CREATE VIRTUAL TABLE events_fts USING fts5(
|
| 122 |
+
event_id,
|
| 123 |
+
session_id,
|
| 124 |
+
text
|
| 125 |
+
)
|
| 126 |
+
""")
|
| 127 |
+
con.commit()
|
| 128 |
+
# Reindex from events
|
| 129 |
+
cur.execute("DELETE FROM events_fts")
|
| 130 |
+
cur.execute("""
|
| 131 |
+
INSERT INTO events_fts(event_id, session_id, text)
|
| 132 |
+
SELECT event_id, session_id, text FROM events
|
| 133 |
+
""")
|
| 134 |
+
con.commit()
|
| 135 |
+
|
| 136 |
con.close()
|
| 137 |
|
| 138 |
+
# ----------------
|
| 139 |
+
# Filesystem layout
|
| 140 |
+
# ----------------
|
| 141 |
def session_dir(self, session_id: str) -> str:
|
| 142 |
d = os.path.join(self.base_dir, "sessions", session_id)
|
| 143 |
os.makedirs(d, exist_ok=True)
|
|
|
|
| 151 |
os.makedirs(d, exist_ok=True)
|
| 152 |
return d
|
| 153 |
|
| 154 |
+
# -------------------
|
| 155 |
+
# RFT collapse scoring
|
| 156 |
+
# -------------------
|
| 157 |
def get_events(self, session_id: str, limit: int = 400) -> List[Dict[str, Any]]:
|
| 158 |
+
con = self._connect()
|
| 159 |
cur = con.cursor()
|
| 160 |
cur.execute("""
|
| 161 |
SELECT event_id, seq, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
|
|
|
|
| 200 |
score = role_w * (0.65 * novelty + 0.35 * length_factor)
|
| 201 |
return float(max(0.0, min(1.0, score)))
|
| 202 |
|
| 203 |
+
# ----------------
|
| 204 |
+
# Append-only write
|
| 205 |
+
# ----------------
|
| 206 |
def _get_last_seq_and_chain(self, session_id: str) -> Tuple[int, str]:
|
| 207 |
+
con = self._connect()
|
| 208 |
cur = con.cursor()
|
| 209 |
|
| 210 |
cur.execute("SELECT COALESCE(MAX(seq), 0) FROM events WHERE session_id=?", (session_id,))
|
|
|
|
| 250 |
"collapse": collapse
|
| 251 |
}
|
| 252 |
|
| 253 |
+
# Append-only JSONL source of truth
|
| 254 |
log_path = self.session_log_path(session_id)
|
| 255 |
line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
|
| 256 |
with open(log_path, "ab") as f:
|
|
|
|
| 258 |
f.flush()
|
| 259 |
os.fsync(f.fileno())
|
| 260 |
|
| 261 |
+
# Index
|
| 262 |
+
con = self._connect()
|
| 263 |
cur = con.cursor()
|
| 264 |
cur.execute("""
|
| 265 |
INSERT INTO events(session_id,event_id,seq,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
|
|
|
|
| 271 |
|
| 272 |
return rec
|
| 273 |
|
| 274 |
+
# -------------------------
|
| 275 |
+
# Retrieval (FTS + fallback)
|
| 276 |
+
# -------------------------
|
| 277 |
def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
|
| 278 |
match = safe_fts_match(query)
|
| 279 |
+
if match == "___NO_HITS___":
|
| 280 |
+
return []
|
| 281 |
|
| 282 |
+
con = self._connect()
|
| 283 |
cur = con.cursor()
|
| 284 |
+
|
| 285 |
cur.execute("""
|
| 286 |
SELECT e.event_id, e.seq, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
|
| 287 |
bm25(events_fts) as rank
|
|
|
|
| 294 |
rows = cur.fetchall()
|
| 295 |
con.close()
|
| 296 |
|
| 297 |
+
hits: List[RetrievalHit] = []
|
| 298 |
for (eid, seq, role, text, ts, digest, chain_hash, rank) in rows:
|
| 299 |
r = float(rank if rank is not None else 0.0)
|
| 300 |
score = 1.0 / (1.0 + max(0.0, r))
|
| 301 |
hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, score))
|
| 302 |
+
|
| 303 |
+
# Fallback if FTS returns nothing (protects demo UX)
|
| 304 |
+
if not hits:
|
| 305 |
+
tokens = re.findall(r"[A-Za-z0-9_]+", (query or "").lower())
|
| 306 |
+
if tokens:
|
| 307 |
+
needle = f"%{tokens[-1]}%"
|
| 308 |
+
con = self._connect()
|
| 309 |
+
cur = con.cursor()
|
| 310 |
+
cur.execute("""
|
| 311 |
+
SELECT event_id, seq, role, text, ts_ms, digest, chain_hash
|
| 312 |
+
FROM events
|
| 313 |
+
WHERE session_id=? AND LOWER(text) LIKE ?
|
| 314 |
+
ORDER BY seq DESC
|
| 315 |
+
LIMIT ?
|
| 316 |
+
""", (session_id, needle, int(k)))
|
| 317 |
+
rows2 = cur.fetchall()
|
| 318 |
+
con.close()
|
| 319 |
+
for (eid, seq, role, text, ts, digest, chain_hash) in rows2:
|
| 320 |
+
hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, 0.001))
|
| 321 |
+
|
| 322 |
return hits
|
| 323 |
|
| 324 |
+
# -------------------------
|
| 325 |
+
# Receipts + verification
|
| 326 |
+
# -------------------------
|
| 327 |
def write_receipt(self, session_id: str, user_text: str, retrieved: List[RetrievalHit], prompt: str, response: str) -> str:
|
| 328 |
receipt_id = uuid.uuid4().hex
|
| 329 |
ts = now_ms()
|
|
|
|
| 346 |
"response_hash": sha256_str(response),
|
| 347 |
"engine": {
|
| 348 |
"name": "RFT Memory Receipt Engine",
|
| 349 |
+
"version": "0.3",
|
| 350 |
"method": "append-only ledger + FTS retrieval + hash-chain receipts"
|
| 351 |
}
|
| 352 |
}
|
|
|
|
| 354 |
path = os.path.join(self.receipts_dir(session_id), f"{receipt_id}.json")
|
| 355 |
atomic_write(path, json.dumps(receipt, indent=2, ensure_ascii=False).encode("utf-8"))
|
| 356 |
|
| 357 |
+
con = self._connect()
|
| 358 |
cur = con.cursor()
|
| 359 |
cur.execute("""
|
| 360 |
INSERT INTO receipts(receipt_id, session_id, ts_ms, prompt_hash, response_hash, receipt_path)
|
|
|
|
| 370 |
if not session_id:
|
| 371 |
return False, "Missing session_id."
|
| 372 |
|
| 373 |
+
con = self._connect()
|
| 374 |
cur = con.cursor()
|
| 375 |
|
| 376 |
for item in receipt_json.get("retrieval", []):
|
|
|
|
| 432 |
- Each turn generates a receipt listing exactly what was retrieved.
|
| 433 |
- Receipt verification proves the referenced events exist and hashes match.
|
| 434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
## Why it matters
|
| 436 |
Storing logs is easy. Proving influence is the hard part.
|
| 437 |
Receipts turn memory into something inspectable and defensible.
|
|
|
|
| 443 |
|
| 444 |
|
| 445 |
def events_to_messages(events: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
| 446 |
+
return [{"role": e["role"], "content": e["text"]} for e in events if e["role"] in ("user", "assistant")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
|
| 449 |
def format_ledger(events: List[Dict[str, Any]]) -> str:
|
|
|
|
| 470 |
)
|
| 471 |
|
| 472 |
|
| 473 |
+
def extract_fact_from_hits(hits: List[RetrievalHit], key: str) -> Optional[str]:
|
| 474 |
+
"""
|
| 475 |
+
Extract simple facts from retrieved memory only.
|
| 476 |
+
Supports:
|
| 477 |
+
- "Key = Value"
|
| 478 |
+
- "Key: Value"
|
| 479 |
+
- "My name is Value"
|
| 480 |
+
"""
|
| 481 |
+
key_l = key.lower()
|
| 482 |
+
patterns = [
|
| 483 |
+
rf"\b{re.escape(key_l)}\b\s*=\s*([A-Za-z0-9 _\-']+)",
|
| 484 |
+
rf"\b{re.escape(key_l)}\b\s*:\s*([A-Za-z0-9 _\-']+)",
|
| 485 |
+
]
|
| 486 |
+
for h in hits:
|
| 487 |
+
t = (h.text or "").strip()
|
| 488 |
+
tl = t.lower()
|
| 489 |
+
|
| 490 |
+
if key_l == "name":
|
| 491 |
+
m = re.search(r"\bmy name is\b\s+([A-Za-z0-9 _\-']+)", tl)
|
| 492 |
+
if m:
|
| 493 |
+
return m.group(1).strip().title()
|
| 494 |
+
|
| 495 |
+
for p in patterns:
|
| 496 |
+
m = re.search(p, tl)
|
| 497 |
+
if m:
|
| 498 |
+
return m.group(1).strip().strip(",.")
|
| 499 |
+
return None
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def answer_from_memory(user_msg: str, hits: List[RetrievalHit]) -> str:
|
| 503 |
+
q = (user_msg or "").lower()
|
| 504 |
+
|
| 505 |
+
# Search-style queries
|
| 506 |
+
if q.startswith("search for") or q.startswith("search:"):
|
| 507 |
+
if not hits:
|
| 508 |
+
return "No matching memory slices were retrieved for this search."
|
| 509 |
+
lines = ["Search hits:"]
|
| 510 |
+
for h in hits:
|
| 511 |
+
lines.append(f"- {h.score:.4f} | {h.role} | {h.text}")
|
| 512 |
+
return "\n".join(lines)
|
| 513 |
+
|
| 514 |
+
# Simple Q&A based on retrieved memory only
|
| 515 |
+
if "dog" in q and "name" in q:
|
| 516 |
+
v = extract_fact_from_hits(hits, "dog")
|
| 517 |
+
return f"Your dog’s name (from stored memory) is: {v}" if v else "I didn’t retrieve a stored dog name for this query."
|
| 518 |
+
|
| 519 |
+
if "city" in q:
|
| 520 |
+
v = extract_fact_from_hits(hits, "city")
|
| 521 |
+
return f"Your city (from stored memory) is: {v}" if v else "I didn’t retrieve a stored city for this query."
|
| 522 |
+
|
| 523 |
+
if "drink" in q or "favourite drink" in q or "favorite drink" in q:
|
| 524 |
+
v = extract_fact_from_hits(hits, "drink") or extract_fact_from_hits(hits, "favourite drink") or extract_fact_from_hits(hits, "favorite drink")
|
| 525 |
+
return f"Your drink (from stored memory) is: {v}" if v else "I didn’t retrieve a stored drink for this query."
|
| 526 |
+
|
| 527 |
+
if "my name" in q:
|
| 528 |
+
v = extract_fact_from_hits(hits, "name")
|
| 529 |
+
return f"Your name (from stored memory) is: {v}" if v else "I didn’t retrieve a stored name for this query."
|
| 530 |
+
|
| 531 |
+
# Default: show what was retrieved (transparent)
|
| 532 |
if not hits:
|
| 533 |
return "No matching memory slices were retrieved for this query."
|
| 534 |
lines = ["Retrieved memory slices:"]
|
|
|
|
| 539 |
|
| 540 |
def chat_turn(session_id: str, user_msg: str, retrieval_k: int):
|
| 541 |
if not session_id:
|
| 542 |
+
session_id = uuid.uuid4().hex
|
| 543 |
|
| 544 |
store.append_event(session_id, "user", user_msg)
|
| 545 |
|
| 546 |
hits = store.search_lexical(session_id, user_msg, k=int(retrieval_k))
|
| 547 |
prompt = build_prompt(user_msg, hits)
|
| 548 |
+
response = answer_from_memory(user_msg, hits)
|
| 549 |
|
| 550 |
store.append_event(session_id, "assistant", response)
|
| 551 |
|
|
|
|
| 556 |
retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
|
| 557 |
messages = events_to_messages(events)
|
| 558 |
|
| 559 |
+
# Return receipt path twice: textbox + downloadable file
|
| 560 |
return session_id, messages, retrieved_view, ledger, receipt_path, receipt_path
|
| 561 |
|
| 562 |
|
| 563 |
def run_guided_demo(session_id: str, retrieval_k: int):
|
| 564 |
if not session_id:
|
| 565 |
+
session_id = uuid.uuid4().hex
|
| 566 |
|
| 567 |
last = (session_id, [], "", "", "", None)
|
| 568 |
for step in GUIDED_DEMO_STEPS:
|
|
|
|
| 600 |
|
| 601 |
|
| 602 |
def reset_session():
|
| 603 |
+
sid = uuid.uuid4().hex
|
| 604 |
return sid, [], "", "", "", None
|
| 605 |
|
| 606 |
|
|
|
|
| 609 |
|
| 610 |
|
| 611 |
with gr.Blocks(title="RFT Memory Receipt Engine") as demo:
|
| 612 |
+
gr.Markdown("# RFT Memory Receipt Engine\nLocal persistence + retrieval + verifiable receipts.")
|
| 613 |
|
| 614 |
with gr.Row():
|
| 615 |
session_id = gr.Textbox(label="Session ID", value=new_session_id())
|
|
|
|
| 619 |
|
| 620 |
with gr.Tabs():
|
| 621 |
with gr.Tab("Chat"):
|
|
|
|
| 622 |
chatbot = gr.Chatbot(label="Conversation", height=320)
|
| 623 |
|
| 624 |
with gr.Row():
|
|
|
|
| 643 |
)
|
| 644 |
|
| 645 |
with gr.Tab("Guided Demo"):
|
| 646 |
+
gr.Markdown("Runs a scripted demo to show storage, recall, overrides, search, and receipts.")
|
| 647 |
run_demo_btn = gr.Button("Run Guided Demo", variant="primary")
|
| 648 |
|
| 649 |
demo_chatbot = gr.Chatbot(label="Demo conversation", height=320)
|