RFTSystems commited on
Commit
fb52d44
·
verified ·
1 Parent(s): 7978307

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +334 -135
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import json
3
  import time
4
  import uuid
@@ -13,19 +14,18 @@ import gradio as gr
13
  # RFT Memory Receipt Engine
14
  # =========================
15
 
16
- # For HF Spaces w/ persistent storage, set BASE_DIR to /data/rftmem in Space settings (env var)
17
  BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
18
  os.makedirs(BASE_DIR, exist_ok=True)
19
 
20
- def sha256_bytes(b: bytes) -> str:
21
- return hashlib.sha256(b).hexdigest()
22
 
23
  def sha256_str(s: str) -> str:
24
- return sha256_bytes(s.encode("utf-8"))
 
25
 
26
  def now_ms() -> int:
27
  return int(time.time() * 1000)
28
 
 
29
  def atomic_write(path: str, data: bytes) -> None:
30
  tmp = path + ".tmp"
31
  with open(tmp, "wb") as f:
@@ -34,9 +34,30 @@ def atomic_write(path: str, data: bytes) -> None:
34
  os.fsync(f.fileno())
35
  os.replace(tmp, path)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  @dataclass
38
  class RetrievalHit:
39
  event_id: str
 
40
  role: str
41
  text: str
42
  ts_ms: int
@@ -44,11 +65,12 @@ class RetrievalHit:
44
  chain_hash: str
45
  score: float
46
 
 
47
  class RFTMemoryStore:
48
  """
49
  Source of truth: append-only JSONL per session + hash-chained ledger.
50
- Index: SQLite (events table + FTS5) for lexical search.
51
- Receipts: JSON files saved per assistant turn.
52
  """
53
 
54
  def __init__(self, base_dir: str):
@@ -61,11 +83,11 @@ class RFTMemoryStore:
61
  con = sqlite3.connect(self.db_path)
62
  cur = con.cursor()
63
 
64
- # Main event table
65
  cur.execute("""
66
  CREATE TABLE IF NOT EXISTS events (
67
  session_id TEXT,
68
  event_id TEXT PRIMARY KEY,
 
69
  ts_ms INTEGER,
70
  role TEXT,
71
  text TEXT,
@@ -75,7 +97,16 @@ class RFTMemoryStore:
75
  collapse REAL
76
  )
77
  """)
78
- # FTS (lexical retrieval)
 
 
 
 
 
 
 
 
 
79
  cur.execute("""
80
  CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5(
81
  event_id,
@@ -84,7 +115,8 @@ class RFTMemoryStore:
84
  content=''
85
  )
86
  """)
87
- # Receipts table (optional convenience)
 
88
  cur.execute("""
89
  CREATE TABLE IF NOT EXISTS receipts (
90
  receipt_id TEXT PRIMARY KEY,
@@ -95,6 +127,7 @@ class RFTMemoryStore:
95
  receipt_path TEXT
96
  )
97
  """)
 
98
  con.commit()
99
  con.close()
100
 
@@ -119,58 +152,79 @@ class RFTMemoryStore:
119
  # -------------------
120
  def collapse_score(self, session_id: str, role: str, text: str) -> float:
121
  """
122
- Simple, effective heuristic:
123
- - novelty vs last ~20 events (token Jaccard)
124
- - role weight (user/tool > assistant)
125
  """
126
  role_w = {"user": 1.0, "tool": 0.9, "assistant": 0.6}.get(role, 0.7)
127
- tokens = set(t.lower() for t in text.split() if t.strip())
 
128
  if not tokens:
129
  return 0.0
130
 
131
  recent = self.get_events(session_id, limit=20)
132
  recent_tokens = set()
133
  for e in recent:
134
- recent_tokens |= set(t.lower() for t in e["text"].split() if t.strip())
135
 
136
- # novelty = fraction of tokens not seen recently
137
  unseen = len(tokens - recent_tokens)
138
  novelty = unseen / max(1, len(tokens))
139
 
140
- # length sanity cap (avoid 1-word spikes)
141
  length_factor = min(1.0, len(tokens) / 30.0)
142
-
143
  score = role_w * (0.65 * novelty + 0.35 * length_factor)
144
  return float(max(0.0, min(1.0, score)))
145
 
146
  # ----------------
147
  # Append-only write
148
  # ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def append_event(self, session_id: str, role: str, text: str) -> Dict[str, Any]:
150
  event_id = uuid.uuid4().hex
151
  ts = now_ms()
 
 
 
 
152
  payload = {
153
  "session_id": session_id,
154
  "event_id": event_id,
 
155
  "ts_ms": ts,
156
  "role": role,
157
  "text": text
158
  }
159
- digest = sha256_str(json.dumps(payload, sort_keys=True))
160
- prev_hash = self.get_last_chain_hash(session_id) or "0"*64
161
- chain_hash = sha256_str(prev_hash + digest)
162
 
 
 
163
  collapse = self.collapse_score(session_id, role, text)
164
 
165
  rec = {
166
  **payload,
167
  "digest": digest,
168
- "prev_hash": prev_hash,
169
  "chain_hash": chain_hash,
170
  "collapse": collapse
171
  }
172
 
173
- # 1) Write JSONL source-of-truth
174
  log_path = self.session_log_path(session_id)
175
  line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
176
  with open(log_path, "ab") as f:
@@ -178,83 +232,75 @@ class RFTMemoryStore:
178
  f.flush()
179
  os.fsync(f.fileno())
180
 
181
- # 2) Update SQLite index (synchronous for demo simplicity)
182
  con = sqlite3.connect(self.db_path)
183
  cur = con.cursor()
184
  cur.execute("""
185
- INSERT INTO events(session_id,event_id,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
186
- VALUES(?,?,?,?,?,?,?,?,?)
187
- """, (session_id, event_id, ts, role, text, digest, prev_hash, chain_hash, collapse))
188
  cur.execute("INSERT INTO events_fts(event_id, session_id, text) VALUES(?,?,?)", (event_id, session_id, text))
189
  con.commit()
190
  con.close()
191
 
192
  return rec
193
 
194
- def get_last_chain_hash(self, session_id: str) -> Optional[str]:
195
  con = sqlite3.connect(self.db_path)
196
  cur = con.cursor()
197
  cur.execute("""
198
- SELECT chain_hash FROM events
199
- WHERE session_id=?
200
- ORDER BY ts_ms DESC
201
- LIMIT 1
202
- """, (session_id,))
203
- row = cur.fetchone()
204
- con.close()
205
- return row[0] if row else None
206
-
207
- def get_events(self, session_id: str, limit: int = 200) -> List[Dict[str, Any]]:
208
- con = sqlite3.connect(self.db_path)
209
- cur = con.cursor()
210
- cur.execute("""
211
- SELECT event_id, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
212
  FROM events
213
  WHERE session_id=?
214
- ORDER BY ts_ms ASC
215
  LIMIT ?
216
  """, (session_id, limit))
217
  rows = cur.fetchall()
218
  con.close()
 
219
  out = []
220
  for r in rows:
221
  out.append({
222
  "event_id": r[0],
223
- "ts_ms": r[1],
224
- "role": r[2],
225
- "text": r[3],
226
- "digest": r[4],
227
- "prev_hash": r[5],
228
- "chain_hash": r[6],
229
- "collapse": r[7],
 
230
  })
231
  return out
232
 
233
  # -------------------------
234
- # Hybrid retrieval (lexical)
235
  # -------------------------
236
  def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
 
 
237
  con = sqlite3.connect(self.db_path)
238
  cur = con.cursor()
239
 
240
- # FTS5 bm25 ranking (lower is better). We'll invert into a positive score.
241
  cur.execute("""
242
- SELECT e.event_id, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
243
  bm25(events_fts) as rank
244
  FROM events_fts
245
  JOIN events e ON e.event_id = events_fts.event_id
246
  WHERE events_fts.text MATCH ? AND e.session_id=?
247
  ORDER BY rank ASC
248
  LIMIT ?
249
- """, (query, session_id, k))
250
 
251
  rows = cur.fetchall()
252
  con.close()
253
 
254
  hits = []
255
- for (eid, role, text, ts, digest, chain_hash, rank) in rows:
256
- score = 1.0 / (1.0 + float(rank if rank is not None else 0.0))
257
- hits.append(RetrievalHit(eid, role, text, ts, digest, chain_hash, score))
 
258
  return hits
259
 
260
  # -------------------------
@@ -271,18 +317,19 @@ class RFTMemoryStore:
271
  "query": user_text,
272
  "retrieval": [{
273
  "event_id": h.event_id,
274
- "digest": h.digest,
275
- "chain_hash": h.chain_hash,
276
- "score": h.score,
277
  "role": h.role,
278
- "text": h.text
 
 
 
279
  } for h in retrieved],
280
  "prompt_hash": sha256_str(prompt),
281
  "response_hash": sha256_str(response),
282
  "engine": {
283
  "name": "RFT Memory Receipt Engine",
284
- "version": "0.1-demo",
285
- "notes": "append-only JSONL + SQLite FTS + hash-chained ledger"
286
  }
287
  }
288
 
@@ -304,7 +351,6 @@ class RFTMemoryStore:
304
  if not session_id:
305
  return False, "Missing session_id."
306
 
307
- # 1) Verify the referenced events exist and digests match
308
  con = sqlite3.connect(self.db_path)
309
  cur = con.cursor()
310
 
@@ -326,81 +372,145 @@ class RFTMemoryStore:
326
  return False, f"Chain hash mismatch for {eid}"
327
 
328
  con.close()
329
- return True, "Receipt verified: referenced events exist and hashes match."
 
330
 
331
  store = RFTMemoryStore(BASE_DIR)
332
 
333
  # --------------------
334
- # Gradio demonstration
335
  # --------------------
336
- def new_session():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  return uuid.uuid4().hex
338
 
339
- def format_events(events: List[Dict[str, Any]]) -> str:
 
340
  lines = []
341
- for e in events[-200:]:
 
342
  lines.append(
343
- f"{time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(e['ts_ms']/1000))} | {e['role']}\n"
344
  f"{e['text']}\n"
345
  f"event_id={e['event_id']} collapse={e['collapse']:.2f}\n"
346
  f"digest={e['digest']}\n"
347
  f"chain={e['chain_hash']}\n"
348
- f"{'-'*60}"
349
  )
350
  return "\n".join(lines)
351
 
352
- def chat_turn(session_id: str, user_msg: str, retrieval_k: int, use_generation: bool):
353
- if not session_id:
354
- session_id = new_session()
355
-
356
- store.append_event(session_id, "user", user_msg)
357
-
358
- # Lexical retrieval (demo: per-session scope)
359
- hits = store.search_lexical(session_id, user_msg, k=retrieval_k)
360
 
 
361
  memories = "\n".join([f"- ({h.role}) {h.text}" for h in hits]) if hits else "(none)"
362
- prompt = (
363
- "SYSTEM: You are a transparent assistant. Use retrieved memories if relevant.\n"
364
  f"RETRIEVED MEMORIES:\n{memories}\n\n"
365
  f"USER:\n{user_msg}\n"
366
  )
367
 
368
- # Demo response (deterministic by default). Optional "generation" placeholder.
369
- if use_generation:
370
- # Keep this lightweight: no external APIs in demo.
371
- response = (
372
- "I’m running in demo mode (no external model calls). "
373
- "Here are the most relevant retrieved memories I would use:\n\n"
374
- f"{memories}\n\n"
375
- "If you want real generation, plug your local model call here."
376
- )
377
- else:
378
- response = (
379
- "RFT Memory Engine (demo): I retrieved the following memory slices:\n\n"
380
- f"{memories}\n\n"
381
- "I can verify exactly which memories were used via the receipt."
382
- )
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  store.append_event(session_id, "assistant", response)
385
 
 
386
  receipt_path = store.write_receipt(session_id, user_msg, hits, prompt, response)
387
 
388
- events = store.get_events(session_id, limit=500)
389
- ledger = format_events(events)
390
-
391
- # Show retrieved block + receipt for transparency
392
  retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
393
- return session_id, response, retrieved_view, ledger, receipt_path
394
 
395
- def search_memory(session_id: str, query: str, k: int):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  if not session_id:
397
  return "(no session)"
398
- hits = store.search_lexical(session_id, query, k=k)
399
  if not hits:
400
  return "(no hits)"
401
- return "\n".join([f"{h.score:.4f} | {h.role} | {h.text}\n event_id={h.event_id}\n digest={h.digest}\n" for h in hits])
 
 
 
 
 
 
 
 
 
 
 
402
 
403
- def verify_uploaded_receipt(file_obj):
404
  if file_obj is None:
405
  return "Upload a receipt JSON file."
406
  with open(file_obj.name, "r", encoding="utf-8") as f:
@@ -408,42 +518,131 @@ def verify_uploaded_receipt(file_obj):
408
  ok, msg = store.verify_receipt(data)
409
  return f"{'✅' if ok else '❌'} {msg}"
410
 
411
- with gr.Blocks(title="RFT Memory Receipt Engine (Local Persistence Demo)") as demo:
412
- gr.Markdown(
413
- "# RFT Memory Receipt Engine\n"
414
- "Append-only session ledger + SQLite FTS retrieval + cryptographic receipts.\n\n"
415
- "**Goal:** demonstrate durable memory + verifiable retrieval lineage."
416
- )
417
 
418
- session_id = gr.Textbox(label="Session ID", value=new_session())
419
- with gr.Row():
420
- retrieval_k = gr.Slider(1, 20, value=8, step=1, label="Retrieval K")
421
- use_generation = gr.Checkbox(value=False, label="Show 'generation' placeholder")
 
 
 
 
422
 
423
- user_msg = gr.Textbox(label="User message", placeholder="Type a message…")
424
- send = gr.Button("Send")
425
 
426
- assistant_out = gr.Textbox(label="Assistant output", lines=8)
427
- retrieved_out = gr.Textbox(label="Retrieved memory slices (what influenced this turn)", lines=8)
428
- ledger_out = gr.Textbox(label="Session Ledger (hash-chained)", lines=16)
429
- receipt_path = gr.Textbox(label="Receipt saved at (server path)", lines=1)
 
430
 
431
- send.click(
432
- chat_turn,
433
- inputs=[session_id, user_msg, retrieval_k, use_generation],
434
- outputs=[session_id, assistant_out, retrieved_out, ledger_out, receipt_path],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  )
436
 
437
- gr.Markdown("## Manual Search")
438
- search_q = gr.Textbox(label="Search query")
439
- search_btn = gr.Button("Search")
440
- search_results = gr.Textbox(label="Search results", lines=10)
441
- search_btn.click(search_memory, inputs=[session_id, search_q, retrieval_k], outputs=[search_results])
442
-
443
- gr.Markdown("## Verify a Receipt")
444
- receipt_file = gr.File(label="Upload receipt JSON")
445
- verify_btn = gr.Button("Verify Receipt")
446
- verify_out = gr.Textbox(label="Verification result")
447
- verify_btn.click(verify_uploaded_receipt, inputs=[receipt_file], outputs=[verify_out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  demo.launch()
 
1
  import os
2
+ import re
3
  import json
4
  import time
5
  import uuid
 
14
  # RFT Memory Receipt Engine
15
  # =========================
16
 
 
17
  BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
18
  os.makedirs(BASE_DIR, exist_ok=True)
19
 
 
 
20
 
21
  def sha256_str(s: str) -> str:
22
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()
23
+
24
 
25
  def now_ms() -> int:
26
  return int(time.time() * 1000)
27
 
28
+
29
  def atomic_write(path: str, data: bytes) -> None:
30
  tmp = path + ".tmp"
31
  with open(tmp, "wb") as f:
 
34
  os.fsync(f.fileno())
35
  os.replace(tmp, path)
36
 
37
+
38
+ def safe_fts_match(user_query: str) -> str:
39
+ """
40
+ FTS5 MATCH can error on punctuation/special syntax.
41
+ This converts input into a conservative OR query: word1 OR word2 OR ...
42
+ """
43
+ words = re.findall(r"[A-Za-z0-9_]+", (user_query or "").lower())
44
+ # If empty, return something that yields zero hits
45
+ if not words:
46
+ return "___NO_HITS___"
47
+ # De-dupe while preserving order
48
+ seen = set()
49
+ uniq = []
50
+ for w in words:
51
+ if w not in seen:
52
+ seen.add(w)
53
+ uniq.append(w)
54
+ return " OR ".join(uniq)
55
+
56
+
57
  @dataclass
58
  class RetrievalHit:
59
  event_id: str
60
+ seq: int
61
  role: str
62
  text: str
63
  ts_ms: int
 
65
  chain_hash: str
66
  score: float
67
 
68
+
69
  class RFTMemoryStore:
70
  """
71
  Source of truth: append-only JSONL per session + hash-chained ledger.
72
+ Index: SQLite (events table + FTS5) for lexical retrieval.
73
+ Receipts: JSON files saved per assistant turn, downloadable and verifiable.
74
  """
75
 
76
  def __init__(self, base_dir: str):
 
83
  con = sqlite3.connect(self.db_path)
84
  cur = con.cursor()
85
 
 
86
  cur.execute("""
87
  CREATE TABLE IF NOT EXISTS events (
88
  session_id TEXT,
89
  event_id TEXT PRIMARY KEY,
90
+ seq INTEGER,
91
  ts_ms INTEGER,
92
  role TEXT,
93
  text TEXT,
 
97
  collapse REAL
98
  )
99
  """)
100
+
101
+ # Ensure seq exists for older DBs
102
+ cur.execute("PRAGMA table_info(events)")
103
+ cols = {row[1] for row in cur.fetchall()}
104
+ if "seq" not in cols:
105
+ cur.execute("ALTER TABLE events ADD COLUMN seq INTEGER")
106
+ if "collapse" not in cols:
107
+ cur.execute("ALTER TABLE events ADD COLUMN collapse REAL")
108
+
109
+ # FTS index (lexical)
110
  cur.execute("""
111
  CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5(
112
  event_id,
 
115
  content=''
116
  )
117
  """)
118
+
119
+ # Receipts registry
120
  cur.execute("""
121
  CREATE TABLE IF NOT EXISTS receipts (
122
  receipt_id TEXT PRIMARY KEY,
 
127
  receipt_path TEXT
128
  )
129
  """)
130
+
131
  con.commit()
132
  con.close()
133
 
 
152
  # -------------------
153
  def collapse_score(self, session_id: str, role: str, text: str) -> float:
154
  """
155
+ Compact salience signal:
156
+ - novelty vs last 20 events (token novelty)
157
+ - role weighting (user/tool carry more signal)
158
  """
159
  role_w = {"user": 1.0, "tool": 0.9, "assistant": 0.6}.get(role, 0.7)
160
+
161
+ tokens = set(t.lower() for t in re.findall(r"[A-Za-z0-9_]+", text or ""))
162
  if not tokens:
163
  return 0.0
164
 
165
  recent = self.get_events(session_id, limit=20)
166
  recent_tokens = set()
167
  for e in recent:
168
+ recent_tokens |= set(t.lower() for t in re.findall(r"[A-Za-z0-9_]+", e["text"] or ""))
169
 
 
170
  unseen = len(tokens - recent_tokens)
171
  novelty = unseen / max(1, len(tokens))
172
 
 
173
  length_factor = min(1.0, len(tokens) / 30.0)
 
174
  score = role_w * (0.65 * novelty + 0.35 * length_factor)
175
  return float(max(0.0, min(1.0, score)))
176
 
177
  # ----------------
178
  # Append-only write
179
  # ----------------
180
+ def _get_last_seq_and_chain(self, session_id: str) -> Tuple[int, str]:
181
+ con = sqlite3.connect(self.db_path)
182
+ cur = con.cursor()
183
+ cur.execute("""
184
+ SELECT COALESCE(MAX(seq), 0) FROM events WHERE session_id=?
185
+ """, (session_id,))
186
+ last_seq = int(cur.fetchone()[0] or 0)
187
+
188
+ cur.execute("""
189
+ SELECT chain_hash FROM events
190
+ WHERE session_id=?
191
+ ORDER BY seq DESC
192
+ LIMIT 1
193
+ """, (session_id,))
194
+ row = cur.fetchone()
195
+ con.close()
196
+ last_chain = row[0] if row and row[0] else ("0" * 64)
197
+ return last_seq, last_chain
198
+
199
  def append_event(self, session_id: str, role: str, text: str) -> Dict[str, Any]:
200
  event_id = uuid.uuid4().hex
201
  ts = now_ms()
202
+
203
+ last_seq, prev_chain = self._get_last_seq_and_chain(session_id)
204
+ seq = last_seq + 1
205
+
206
  payload = {
207
  "session_id": session_id,
208
  "event_id": event_id,
209
+ "seq": seq,
210
  "ts_ms": ts,
211
  "role": role,
212
  "text": text
213
  }
 
 
 
214
 
215
+ digest = sha256_str(json.dumps(payload, sort_keys=True, ensure_ascii=False))
216
+ chain_hash = sha256_str(prev_chain + digest)
217
  collapse = self.collapse_score(session_id, role, text)
218
 
219
  rec = {
220
  **payload,
221
  "digest": digest,
222
+ "prev_hash": prev_chain,
223
  "chain_hash": chain_hash,
224
  "collapse": collapse
225
  }
226
 
227
+ # JSONL source of truth (append-only)
228
  log_path = self.session_log_path(session_id)
229
  line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
230
  with open(log_path, "ab") as f:
 
232
  f.flush()
233
  os.fsync(f.fileno())
234
 
235
+ # Index for retrieval
236
  con = sqlite3.connect(self.db_path)
237
  cur = con.cursor()
238
  cur.execute("""
239
+ INSERT INTO events(session_id,event_id,seq,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
240
+ VALUES(?,?,?,?,?,?,?,?,?,?)
241
+ """, (session_id, event_id, seq, ts, role, text, digest, prev_chain, chain_hash, collapse))
242
  cur.execute("INSERT INTO events_fts(event_id, session_id, text) VALUES(?,?,?)", (event_id, session_id, text))
243
  con.commit()
244
  con.close()
245
 
246
  return rec
247
 
248
+ def get_events(self, session_id: str, limit: int = 300) -> List[Dict[str, Any]]:
249
  con = sqlite3.connect(self.db_path)
250
  cur = con.cursor()
251
  cur.execute("""
252
+ SELECT event_id, seq, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  FROM events
254
  WHERE session_id=?
255
+ ORDER BY seq ASC
256
  LIMIT ?
257
  """, (session_id, limit))
258
  rows = cur.fetchall()
259
  con.close()
260
+
261
  out = []
262
  for r in rows:
263
  out.append({
264
  "event_id": r[0],
265
+ "seq": int(r[1] or 0),
266
+ "ts_ms": r[2],
267
+ "role": r[3],
268
+ "text": r[4],
269
+ "digest": r[5],
270
+ "prev_hash": r[6],
271
+ "chain_hash": r[7],
272
+ "collapse": float(r[8] or 0.0),
273
  })
274
  return out
275
 
276
  # -------------------------
277
+ # Lexical retrieval (FTS5)
278
  # -------------------------
279
  def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
280
+ match = safe_fts_match(query)
281
+
282
  con = sqlite3.connect(self.db_path)
283
  cur = con.cursor()
284
 
285
+ # bm25 lower is better; invert into a score
286
  cur.execute("""
287
+ SELECT e.event_id, e.seq, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
288
  bm25(events_fts) as rank
289
  FROM events_fts
290
  JOIN events e ON e.event_id = events_fts.event_id
291
  WHERE events_fts.text MATCH ? AND e.session_id=?
292
  ORDER BY rank ASC
293
  LIMIT ?
294
+ """, (match, session_id, int(k)))
295
 
296
  rows = cur.fetchall()
297
  con.close()
298
 
299
  hits = []
300
+ for (eid, seq, role, text, ts, digest, chain_hash, rank) in rows:
301
+ r = float(rank if rank is not None else 0.0)
302
+ score = 1.0 / (1.0 + max(0.0, r))
303
+ hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, score))
304
  return hits
305
 
306
  # -------------------------
 
317
  "query": user_text,
318
  "retrieval": [{
319
  "event_id": h.event_id,
320
+ "seq": h.seq,
 
 
321
  "role": h.role,
322
+ "text": h.text,
323
+ "score": h.score,
324
+ "digest": h.digest,
325
+ "chain_hash": h.chain_hash
326
  } for h in retrieved],
327
  "prompt_hash": sha256_str(prompt),
328
  "response_hash": sha256_str(response),
329
  "engine": {
330
  "name": "RFT Memory Receipt Engine",
331
+ "version": "0.2",
332
+ "method": "append-only ledger + FTS retrieval + hash-chain receipts"
333
  }
334
  }
335
 
 
351
  if not session_id:
352
  return False, "Missing session_id."
353
 
 
354
  con = sqlite3.connect(self.db_path)
355
  cur = con.cursor()
356
 
 
372
  return False, f"Chain hash mismatch for {eid}"
373
 
374
  con.close()
375
+ return True, "Receipt verified: all referenced events exist and hashes match."
376
+
377
 
378
  store = RFTMemoryStore(BASE_DIR)
379
 
380
  # --------------------
381
+ # Demo + UI utilities
382
  # --------------------
383
+
384
+ EXAMPLE_PROMPTS = [
385
+ "Store this: Dog=Nova, City=Manchester, Drink=Pepsi Max.",
386
+ "What is my dog's name?",
387
+ "What city did I say?",
388
+ "My drink is Coke Zero now. This overrides earlier.",
389
+ "What is my favourite drink?",
390
+ "Search for: Nova",
391
+ "Search for: Manchester",
392
+ "Explain which memory slices influenced your last answer.",
393
+ ]
394
+
395
+ GUIDED_DEMO_STEPS = [
396
+ "My name is Liam. Remember that.",
397
+ "Store these exactly: Dog = Nova. City = Manchester. Favourite drink = Pepsi Max.",
398
+ "What’s my dog’s name?",
399
+ "What city did I say?",
400
+ "My favourite drink is Coke Zero now. This overrides earlier.",
401
+ "What’s my favourite drink?",
402
+ "Search for 'Nova' and show the matching memory line.",
403
+ "Search for 'Coke' and show the matching memory line.",
404
+ ]
405
+
406
+
407
+ def new_session_id() -> str:
408
  return uuid.uuid4().hex
409
 
410
+
411
+ def format_ledger(events: List[Dict[str, Any]]) -> str:
412
  lines = []
413
+ for e in events[-250:]:
414
+ t = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(e["ts_ms"] / 1000))
415
  lines.append(
416
+ f"{t} | seq={e['seq']} | {e['role']}\n"
417
  f"{e['text']}\n"
418
  f"event_id={e['event_id']} collapse={e['collapse']:.2f}\n"
419
  f"digest={e['digest']}\n"
420
  f"chain={e['chain_hash']}\n"
421
+ f"{'-'*72}"
422
  )
423
  return "\n".join(lines)
424
 
 
 
 
 
 
 
 
 
425
 
426
+ def build_prompt(user_msg: str, hits: List[RetrievalHit]) -> str:
427
  memories = "\n".join([f"- ({h.role}) {h.text}" for h in hits]) if hits else "(none)"
428
+ return (
429
+ "SYSTEM: Use retrieved memory slices if relevant. Prefer exact stored facts.\n"
430
  f"RETRIEVED MEMORIES:\n{memories}\n\n"
431
  f"USER:\n{user_msg}\n"
432
  )
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
+ def response_from_retrieval(user_msg: str, hits: List[RetrievalHit]) -> str:
436
+ # Transparent output: shows what would be fed to a model + what was retrieved
437
+ if not hits:
438
+ return "No matching memory slices were retrieved for this query."
439
+ lines = ["Retrieved memory slices:"]
440
+ for h in hits:
441
+ lines.append(f"- {h.score:.4f} | {h.role} | {h.text}")
442
+ return "\n".join(lines)
443
+
444
+
445
+ def chat_turn(session_id: str, user_msg: str, retrieval_k: int) -> Tuple[str, List[List[str]], str, str, str]:
446
+ if not session_id:
447
+ session_id = new_session_id()
448
+
449
+ # Write user event
450
+ store.append_event(session_id, "user", user_msg)
451
+
452
+ # Retrieve
453
+ hits = store.search_lexical(session_id, user_msg, k=int(retrieval_k))
454
+ prompt = build_prompt(user_msg, hits)
455
+ response = response_from_retrieval(user_msg, hits)
456
+
457
+ # Write assistant event
458
  store.append_event(session_id, "assistant", response)
459
 
460
+ # Receipt
461
  receipt_path = store.write_receipt(session_id, user_msg, hits, prompt, response)
462
 
463
+ # UI outputs
464
+ events = store.get_events(session_id, limit=800)
465
+ ledger = format_ledger(events)
 
466
  retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
 
467
 
468
+ # Build chatbot view from stored events (last ~60)
469
+ convo = []
470
+ for e in events[-120:]:
471
+ if e["role"] == "user":
472
+ convo.append([e["text"], None])
473
+ elif e["role"] == "assistant":
474
+ # attach assistant response to the last user turn if possible
475
+ if convo and convo[-1][1] is None:
476
+ convo[-1][1] = e["text"]
477
+ else:
478
+ convo.append([None, e["text"]])
479
+
480
+ return session_id, convo, retrieved_view, ledger, receipt_path
481
+
482
+
483
+ def run_guided_demo(session_id: str, retrieval_k: int) -> Tuple[str, List[List[str]], str, str, str]:
484
+ if not session_id:
485
+ session_id = new_session_id()
486
+
487
+ last_receipt = ""
488
+ for step in GUIDED_DEMO_STEPS:
489
+ session_id, convo, retrieved_view, ledger, receipt = chat_turn(session_id, step, retrieval_k)
490
+ last_receipt = receipt
491
+ return session_id, convo, retrieved_view, ledger, last_receipt
492
+
493
+
494
+ def manual_search(session_id: str, query: str, k: int) -> str:
495
  if not session_id:
496
  return "(no session)"
497
+ hits = store.search_lexical(session_id, query, k=int(k))
498
  if not hits:
499
  return "(no hits)"
500
+ out = []
501
+ for h in hits:
502
+ out.append(
503
+ f"{h.score:.4f} | seq={h.seq} | {h.role}\n"
504
+ f"{h.text}\n"
505
+ f"event_id={h.event_id}\n"
506
+ f"digest={h.digest}\n"
507
+ f"chain={h.chain_hash}\n"
508
+ f"{'-'*60}"
509
+ )
510
+ return "\n".join(out)
511
+
512
 
513
+ def verify_uploaded_receipt(file_obj) -> str:
514
  if file_obj is None:
515
  return "Upload a receipt JSON file."
516
  with open(file_obj.name, "r", encoding="utf-8") as f:
 
518
  ok, msg = store.verify_receipt(data)
519
  return f"{'✅' if ok else '❌'} {msg}"
520
 
 
 
 
 
 
 
521
 
522
+ def reset_session() -> Tuple[str, List[List[str]], str, str, Optional[str]]:
523
+ sid = new_session_id()
524
+ return sid, [], "", "", None
525
+
526
+
527
+ def fill_example(selected: str) -> str:
528
+ return selected or ""
529
+
530
 
531
+ HOW_TO_MD = """
532
+ # How to use this Space
533
 
534
+ ## What to write
535
+ ### Store facts
536
+ Write plain statements you want durable memory for, for example:
537
+ - “My name is Liam.”
538
+ - “Store these: Dog=Nova, City=Manchester, Drink=Pepsi Max.”
539
 
540
+ ### Ask for recall
541
+ Ask direct questions that should be answered from memory:
542
+ - “What’s my dog’s name?”
543
+ - “What city did I say?”
544
+
545
+ ### Override old facts
546
+ State the new truth in one clear sentence:
547
+ - “My favourite drink is Coke Zero now. This overrides earlier.”
548
+
549
+ ### Search memory on purpose
550
+ Use keywords you expect to exist in the log:
551
+ - “Nova”
552
+ - “Manchester”
553
+ - “Coke Zero”
554
+
555
+ ## What to expect
556
+ - Each turn writes to an append-only ledger.
557
+ - Each turn retrieves a fixed number of relevant memory slices (K).
558
+ - Each turn generates a receipt that lists exactly what was retrieved.
559
+ - Receipt verification checks that referenced events exist and hashes match.
560
+
561
+ ## Who this is useful for
562
+ - Agent builders who need restart continuity and memory outside context
563
+ - Teams who need auditability (“show me what influenced the output”)
564
+ - Anyone debugging long-lived workflows where invisible prompt history becomes a liability
565
+ - Systems that must control token spend by keeping prompts on a fixed budget
566
+
567
+ ## Why it matters
568
+ Most “memory” layers save chat logs. That is not the hard problem.
569
+ The hard problem is proving which past information was used and being able to verify it later.
570
+ Receipts turn agent memory into something inspectable and defensible.
571
+ """
572
+
573
+
574
+ with gr.Blocks(title="RFT Memory Receipt Engine") as demo:
575
+ gr.Markdown(
576
+ "# RFT Memory Receipt Engine\n"
577
+ "Local persistence + lexical retrieval + verifiable receipts.\n"
578
  )
579
 
580
+ with gr.Row():
581
+ session_id = gr.Textbox(label="Session ID", value=new_session_id())
582
+ new_sess_btn = gr.Button("New Session", variant="secondary")
583
+
584
+ retrieval_k = gr.Slider(1, 20, value=8, step=1, label="Retrieval K")
585
+
586
+ with gr.Tabs():
587
+ with gr.Tab("Chat"):
588
+ chatbot = gr.Chatbot(label="Conversation", height=320)
589
+ with gr.Row():
590
+ example_pick = gr.Dropdown(label="Example prompts", choices=EXAMPLE_PROMPTS, value=EXAMPLE_PROMPTS[0])
591
+ use_example = gr.Button("Use Example", variant="secondary")
592
+
593
+ user_msg = gr.Textbox(label="Message", placeholder="Type a message…")
594
+ send = gr.Button("Send", variant="primary")
595
+
596
+ retrieved_out = gr.Textbox(label="Retrieved memory slices", lines=8)
597
+ ledger_out = gr.Textbox(label="Session ledger (hash-chained)", lines=14)
598
+
599
+ receipt_path = gr.Textbox(label="Last receipt path (server)", lines=1)
600
+ receipt_file = gr.File(label="Download last receipt JSON")
601
+
602
+ use_example.click(fill_example, inputs=[example_pick], outputs=[user_msg])
603
+ send.click(
604
+ chat_turn,
605
+ inputs=[session_id, user_msg, retrieval_k],
606
+ outputs=[session_id, chatbot, retrieved_out, ledger_out, receipt_path],
607
+ ).then(lambda p: p, inputs=[receipt_path], outputs=[receipt_file])
608
+
609
+ with gr.Tab("Guided Demo"):
610
+ gr.Markdown(
611
+ "This runs a short script that stores facts, asks for recall, performs an override, and searches memory.\n"
612
+ "It’s designed to show what to write and what receipts capture."
613
+ )
614
+ run_demo_btn = gr.Button("Run Guided Demo", variant="primary")
615
+ demo_chatbot = gr.Chatbot(label="Demo conversation", height=320)
616
+ demo_retrieved = gr.Textbox(label="Last retrieved memory slices", lines=8)
617
+ demo_ledger = gr.Textbox(label="Ledger after demo", lines=14)
618
+ demo_receipt_path = gr.Textbox(label="Last demo receipt path (server)", lines=1)
619
+ demo_receipt_file = gr.File(label="Download last demo receipt JSON")
620
+
621
+ run_demo_btn.click(
622
+ run_guided_demo,
623
+ inputs=[session_id, retrieval_k],
624
+ outputs=[session_id, demo_chatbot, demo_retrieved, demo_ledger, demo_receipt_path],
625
+ ).then(lambda p: p, inputs=[demo_receipt_path], outputs=[demo_receipt_file])
626
+
627
+ with gr.Tab("Manual Search"):
628
+ q = gr.Textbox(label="Search query", placeholder="Type keywords…")
629
+ do_search = gr.Button("Search", variant="primary")
630
+ results = gr.Textbox(label="Results", lines=14)
631
+ do_search.click(manual_search, inputs=[session_id, q, retrieval_k], outputs=[results])
632
+
633
+ with gr.Tab("Verify Receipt"):
634
+ receipt_upload = gr.File(label="Upload a receipt JSON")
635
+ verify_btn = gr.Button("Verify", variant="primary")
636
+ verify_out = gr.Textbox(label="Verification result")
637
+ verify_btn.click(verify_uploaded_receipt, inputs=[receipt_upload], outputs=[verify_out])
638
+
639
+ with gr.Tab("How to Use"):
640
+ gr.Markdown(HOW_TO_MD)
641
+
642
+ new_sess_btn.click(
643
+ reset_session,
644
+ inputs=[],
645
+ outputs=[session_id, chatbot, retrieved_out, ledger_out, receipt_file],
646
+ )
647
 
648
  demo.launch()