RFTSystems commited on
Commit
f602dc7
·
verified ·
1 Parent(s): 24de4fe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +776 -0
app.py ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import io
4
+ import json
5
+ import time
6
+ import uuid
7
+ import zipfile
8
+ import sqlite3
9
+ import hashlib
10
+ from dataclasses import dataclass
11
+ from typing import List, Dict, Any, Optional, Tuple
12
+
13
+ import gradio as gr
14
+
15
+ BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
16
+ os.makedirs(BASE_DIR, exist_ok=True)
17
+
18
+
19
+ def sha256_str(s: str) -> str:
20
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()
21
+
22
+
23
+ def now_ms() -> int:
24
+ return int(time.time() * 1000)
25
+
26
+
27
+ def atomic_write(path: str, data: bytes) -> None:
28
+ tmp = path + ".tmp"
29
+ with open(tmp, "wb") as f:
30
+ f.write(data)
31
+ f.flush()
32
+ os.fsync(f.fileno())
33
+ os.replace(tmp, path)
34
+
35
+
36
+ def safe_fts_match(user_query: str) -> str:
37
+ words = re.findall(r"[A-Za-z0-9_]+", (user_query or "").lower())
38
+ if not words:
39
+ return "___NO_HITS___"
40
+ seen = set()
41
+ uniq = []
42
+ for w in words:
43
+ if w not in seen:
44
+ seen.add(w)
45
+ uniq.append(w)
46
+ return " OR ".join(uniq)
47
+
48
+
49
+ @dataclass
50
+ class RetrievalHit:
51
+ event_id: str
52
+ seq: int
53
+ role: str
54
+ text: str
55
+ ts_ms: int
56
+ digest: str
57
+ chain_hash: str
58
+ score: float
59
+
60
+
61
+ class RFTMemoryStore:
62
+ """
63
+ Append-only ledger + SQLite FTS retrieval + hash-chained integrity.
64
+ Produces per-turn receipts that can be verified against stored events.
65
+ """
66
+
67
+ def __init__(self, base_dir: str):
68
+ self.base_dir = base_dir
69
+ self.db_path = os.path.join(base_dir, "index.sqlite")
70
+ self._init_db()
71
+
72
+ def _connect(self) -> sqlite3.Connection:
73
+ return sqlite3.connect(self.db_path)
74
+
75
+ def _init_db(self):
76
+ os.makedirs(self.base_dir, exist_ok=True)
77
+ con = self._connect()
78
+ cur = con.cursor()
79
+
80
+ cur.execute("""
81
+ CREATE TABLE IF NOT EXISTS events (
82
+ session_id TEXT,
83
+ event_id TEXT PRIMARY KEY,
84
+ seq INTEGER,
85
+ ts_ms INTEGER,
86
+ role TEXT,
87
+ text TEXT,
88
+ digest TEXT,
89
+ prev_hash TEXT,
90
+ chain_hash TEXT,
91
+ collapse REAL
92
+ )
93
+ """)
94
+
95
+ cur.execute("""
96
+ CREATE TABLE IF NOT EXISTS receipts (
97
+ receipt_id TEXT PRIMARY KEY,
98
+ session_id TEXT,
99
+ ts_ms INTEGER,
100
+ prompt_hash TEXT,
101
+ response_hash TEXT,
102
+ receipt_path TEXT
103
+ )
104
+ """)
105
+
106
+ # Ensure join-safe FTS5 (stored content)
107
+ cur.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name='events_fts'")
108
+ row = cur.fetchone()
109
+ needs_rebuild = False
110
+
111
+ if row is None:
112
+ needs_rebuild = True
113
+ else:
114
+ sql = (row[0] or "").lower()
115
+ if "content=''" in sql or 'content=""' in sql:
116
+ needs_rebuild = True
117
+
118
+ if needs_rebuild:
119
+ cur.execute("DROP TABLE IF EXISTS events_fts")
120
+ cur.execute("""
121
+ CREATE VIRTUAL TABLE events_fts USING fts5(
122
+ event_id,
123
+ session_id,
124
+ text
125
+ )
126
+ """)
127
+ con.commit()
128
+ cur.execute("DELETE FROM events_fts")
129
+ cur.execute("""
130
+ INSERT INTO events_fts(event_id, session_id, text)
131
+ SELECT event_id, session_id, text FROM events
132
+ """)
133
+ con.commit()
134
+
135
+ con.close()
136
+
137
+ # Filesystem
138
+ def session_dir(self, session_id: str) -> str:
139
+ d = os.path.join(self.base_dir, "sessions", session_id)
140
+ os.makedirs(d, exist_ok=True)
141
+ return d
142
+
143
+ def session_log_path(self, session_id: str) -> str:
144
+ return os.path.join(self.session_dir(session_id), "events.jsonl")
145
+
146
+ def receipts_dir(self, session_id: str) -> str:
147
+ d = os.path.join(self.session_dir(session_id), "receipts")
148
+ os.makedirs(d, exist_ok=True)
149
+ return d
150
+
151
+ def exports_dir(self, session_id: str) -> str:
152
+ d = os.path.join(self.session_dir(session_id), "exports")
153
+ os.makedirs(d, exist_ok=True)
154
+ return d
155
+
156
+ # Ledger ops
157
+ def get_events(self, session_id: str, limit: int = 600) -> List[Dict[str, Any]]:
158
+ con = self._connect()
159
+ cur = con.cursor()
160
+ cur.execute("""
161
+ SELECT event_id, seq, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
162
+ FROM events
163
+ WHERE session_id=?
164
+ ORDER BY seq ASC
165
+ LIMIT ?
166
+ """, (session_id, int(limit)))
167
+ rows = cur.fetchall()
168
+ con.close()
169
+
170
+ out = []
171
+ for r in rows:
172
+ out.append({
173
+ "event_id": r[0],
174
+ "seq": int(r[1] or 0),
175
+ "ts_ms": r[2],
176
+ "role": r[3],
177
+ "text": r[4],
178
+ "digest": r[5],
179
+ "prev_hash": r[6],
180
+ "chain_hash": r[7],
181
+ "collapse": float(r[8] or 0.0),
182
+ })
183
+ return out
184
+
185
+ def _get_last_seq_and_chain(self, session_id: str) -> Tuple[int, str]:
186
+ con = self._connect()
187
+ cur = con.cursor()
188
+ cur.execute("SELECT COALESCE(MAX(seq), 0) FROM events WHERE session_id=?", (session_id,))
189
+ last_seq = int(cur.fetchone()[0] or 0)
190
+ cur.execute("""
191
+ SELECT chain_hash FROM events
192
+ WHERE session_id=?
193
+ ORDER BY seq DESC
194
+ LIMIT 1
195
+ """, (session_id,))
196
+ row = cur.fetchone()
197
+ con.close()
198
+ last_chain = row[0] if row and row[0] else ("0" * 64)
199
+ return last_seq, last_chain
200
+
201
+ def collapse_score(self, session_id: str, role: str, text: str) -> float:
202
+ role_w = {"user": 1.0, "tool": 0.9, "assistant": 0.6}.get(role, 0.7)
203
+ tokens = set(t.lower() for t in re.findall(r"[A-Za-z0-9_]+", text or ""))
204
+ if not tokens:
205
+ return 0.0
206
+ recent = self.get_events(session_id, limit=20)
207
+ recent_tokens = set()
208
+ for e in recent:
209
+ recent_tokens |= set(t.lower() for t in re.findall(r"[A-Za-z0-9_]+", e["text"] or ""))
210
+ unseen = len(tokens - recent_tokens)
211
+ novelty = unseen / max(1, len(tokens))
212
+ length_factor = min(1.0, len(tokens) / 30.0)
213
+ score = role_w * (0.65 * novelty + 0.35 * length_factor)
214
+ return float(max(0.0, min(1.0, score)))
215
+
216
+ def append_event(self, session_id: str, role: str, text: str) -> Dict[str, Any]:
217
+ event_id = uuid.uuid4().hex
218
+ ts = now_ms()
219
+ last_seq, prev_chain = self._get_last_seq_and_chain(session_id)
220
+ seq = last_seq + 1
221
+
222
+ payload = {
223
+ "session_id": session_id,
224
+ "event_id": event_id,
225
+ "seq": seq,
226
+ "ts_ms": ts,
227
+ "role": role,
228
+ "text": text
229
+ }
230
+ digest = sha256_str(json.dumps(payload, sort_keys=True, ensure_ascii=False))
231
+ chain_hash = sha256_str(prev_chain + digest)
232
+ collapse = self.collapse_score(session_id, role, text)
233
+
234
+ rec = {**payload, "digest": digest, "prev_hash": prev_chain, "chain_hash": chain_hash, "collapse": collapse}
235
+
236
+ # JSONL source of truth
237
+ log_path = self.session_log_path(session_id)
238
+ line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
239
+ with open(log_path, "ab") as f:
240
+ f.write(line)
241
+ f.flush()
242
+ os.fsync(f.fileno())
243
+
244
+ # Index
245
+ con = self._connect()
246
+ cur = con.cursor()
247
+ cur.execute("""
248
+ INSERT INTO events(session_id,event_id,seq,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
249
+ VALUES(?,?,?,?,?,?,?,?,?,?)
250
+ """, (session_id, event_id, seq, ts, role, text, digest, prev_chain, chain_hash, collapse))
251
+ cur.execute("INSERT INTO events_fts(event_id, session_id, text) VALUES(?,?,?)", (event_id, session_id, text))
252
+ con.commit()
253
+ con.close()
254
+
255
+ return rec
256
+
257
+ # Retrieval
258
+ def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
259
+ match = safe_fts_match(query)
260
+ if match == "___NO_HITS___":
261
+ return []
262
+
263
+ con = self._connect()
264
+ cur = con.cursor()
265
+ cur.execute("""
266
+ SELECT e.event_id, e.seq, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
267
+ bm25(events_fts) as rank
268
+ FROM events_fts
269
+ JOIN events e ON e.event_id = events_fts.event_id
270
+ WHERE events_fts.text MATCH ? AND e.session_id=?
271
+ ORDER BY rank ASC
272
+ LIMIT ?
273
+ """, (match, session_id, int(k)))
274
+ rows = cur.fetchall()
275
+ con.close()
276
+
277
+ hits: List[RetrievalHit] = []
278
+ for (eid, seq, role, text, ts, digest, chain_hash, rank) in rows:
279
+ r = float(rank if rank is not None else 0.0)
280
+ score = 1.0 / (1.0 + max(0.0, r))
281
+ hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, score))
282
+
283
+ # UX fallback (if someone searches a single token that FTS doesn't match as expected)
284
+ if not hits:
285
+ tokens = re.findall(r"[A-Za-z0-9_]+", (query or "").lower())
286
+ if tokens:
287
+ needle = f"%{tokens[-1]}%"
288
+ con = self._connect()
289
+ cur = con.cursor()
290
+ cur.execute("""
291
+ SELECT event_id, seq, role, text, ts_ms, digest, chain_hash
292
+ FROM events
293
+ WHERE session_id=? AND LOWER(text) LIKE ?
294
+ ORDER BY seq DESC
295
+ LIMIT ?
296
+ """, (session_id, needle, int(k)))
297
+ rows2 = cur.fetchall()
298
+ con.close()
299
+ for (eid, seq, role, text, ts, digest, chain_hash) in rows2:
300
+ hits.append(RetrievalHit(eid, int(seq or 0), role, text, ts, digest, chain_hash, 0.001))
301
+
302
+ return hits
303
+
304
+ # Receipts
305
+ def write_receipt(self, session_id: str, user_text: str, retrieved: List[RetrievalHit], prompt: str, response: str) -> str:
306
+ receipt_id = uuid.uuid4().hex
307
+ ts = now_ms()
308
+ receipt = {
309
+ "receipt_id": receipt_id,
310
+ "session_id": session_id,
311
+ "ts_ms": ts,
312
+ "query": user_text,
313
+ "retrieval": [{
314
+ "event_id": h.event_id,
315
+ "seq": h.seq,
316
+ "role": h.role,
317
+ "content": h.text,
318
+ "score": h.score,
319
+ "digest": h.digest,
320
+ "chain_hash": h.chain_hash
321
+ } for h in retrieved],
322
+ "prompt_hash": sha256_str(prompt),
323
+ "response_hash": sha256_str(response),
324
+ "engine": {"name": "RFTSystems TrustStack", "version": "1.0", "method": "ledger + fts + receipts + guardrails"}
325
+ }
326
+ path = os.path.join(self.receipts_dir(session_id), f"{receipt_id}.json")
327
+ atomic_write(path, json.dumps(receipt, indent=2, ensure_ascii=False).encode("utf-8"))
328
+
329
+ con = self._connect()
330
+ cur = con.cursor()
331
+ cur.execute("""
332
+ INSERT INTO receipts(receipt_id, session_id, ts_ms, prompt_hash, response_hash, receipt_path)
333
+ VALUES(?,?,?,?,?,?)
334
+ """, (receipt_id, session_id, ts, receipt["prompt_hash"], receipt["response_hash"], path))
335
+ con.commit()
336
+ con.close()
337
+
338
+ return path
339
+
340
+ def verify_receipt(self, receipt_json: Dict[str, Any]) -> Tuple[bool, str]:
341
+ session_id = receipt_json.get("session_id")
342
+ if not session_id:
343
+ return False, "Missing session_id."
344
+
345
+ con = self._connect()
346
+ cur = con.cursor()
347
+ for item in receipt_json.get("retrieval", []):
348
+ eid = item.get("event_id")
349
+ expected_digest = item.get("digest")
350
+ expected_chain = item.get("chain_hash")
351
+ cur.execute("SELECT digest, chain_hash FROM events WHERE event_id=? AND session_id=?", (eid, session_id))
352
+ row = cur.fetchone()
353
+ if not row:
354
+ con.close()
355
+ return False, f"Event not found: {eid}"
356
+ if row[0] != expected_digest:
357
+ con.close()
358
+ return False, f"Digest mismatch: {eid}"
359
+ if row[1] != expected_chain:
360
+ con.close()
361
+ return False, f"Chain hash mismatch: {eid}"
362
+
363
+ con.close()
364
+ return True, "Receipt verified: all referenced events exist and hashes match."
365
+
366
+ # Trace export (OTel-style JSON, but self-contained)
367
+ def export_trace(self, session_id: str, receipt_path: str) -> str:
368
+ export_id = uuid.uuid4().hex
369
+ ts = now_ms()
370
+
371
+ with open(receipt_path, "r", encoding="utf-8") as f:
372
+ receipt = json.load(f)
373
+
374
+ trace = {
375
+ "trace_id": sha256_str(session_id + receipt.get("receipt_id", "") + str(ts)),
376
+ "session_id": session_id,
377
+ "ts_ms": ts,
378
+ "spans": [
379
+ {"span": "user.turn", "attrs": {"query": receipt.get("query", ""), "receipt_id": receipt.get("receipt_id", "")}},
380
+ {"span": "memory.retrieve", "attrs": {"k": len(receipt.get("retrieval", [])), "retrieval": receipt.get("retrieval", [])}},
381
+ {"span": "assistant.respond", "attrs": {"prompt_hash": receipt.get("prompt_hash", ""), "response_hash": receipt.get("response_hash", "")}},
382
+ {"span": "receipt.write", "attrs": {"receipt_id": receipt.get("receipt_id", ""), "receipt_path": receipt_path}},
383
+ ]
384
+ }
385
+
386
+ out_path = os.path.join(self.exports_dir(session_id), f"trace_{export_id}.json")
387
+ atomic_write(out_path, json.dumps(trace, indent=2, ensure_ascii=False).encode("utf-8"))
388
+ return out_path
389
+
390
+ # Audit pack export
391
+ def export_audit_pack(self, session_id: str) -> str:
392
+ export_id = uuid.uuid4().hex
393
+ out_zip = os.path.join(self.exports_dir(session_id), f"audit_pack_{export_id}.zip")
394
+
395
+ events = self.get_events(session_id, limit=5000)
396
+ receipts_dir = self.receipts_dir(session_id)
397
+ ledger_path = self.session_log_path(session_id)
398
+
399
+ summary = {
400
+ "session_id": session_id,
401
+ "export_id": export_id,
402
+ "ts_ms": now_ms(),
403
+ "events_count": len(events),
404
+ "last_chain_hash": (events[-1]["chain_hash"] if events else "0"*64),
405
+ "integrity": "hash-chained ledger + receipt verification",
406
+ }
407
+
408
+ with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
409
+ # include JSONL ledger if present
410
+ if os.path.exists(ledger_path):
411
+ z.write(ledger_path, arcname="ledger/events.jsonl")
412
+ # include receipts
413
+ for fn in os.listdir(receipts_dir):
414
+ if fn.endswith(".json"):
415
+ z.write(os.path.join(receipts_dir, fn), arcname=f"receipts/{fn}")
416
+ # include summary
417
+ z.writestr("summary.json", json.dumps(summary, indent=2, ensure_ascii=False))
418
+
419
+ return out_zip
420
+
421
+
422
+ store = RFTMemoryStore(BASE_DIR)
423
+
424
+ INVESTOR_SCRIPT = [
425
+ "Store these exactly: Dog=Nova, City=Manchester, Drink=Pepsi Max.",
426
+ "What is my dog's name?",
427
+ "What city did I say?",
428
+ "My drink is Coke Zero now. This overrides earlier.",
429
+ "What is my favourite drink?",
430
+ "Search for: Nova",
431
+ ]
432
+
433
+ EXAMPLE_PROMPTS = [
434
+ "Store this: Dog=Nova, City=Manchester, Drink=Pepsi Max.",
435
+ "What is my dog's name?",
436
+ "What city did I say?",
437
+ "My drink is Coke Zero now. This overrides earlier.",
438
+ "What is my favourite drink?",
439
+ "Search for: Nova",
440
+ "Search for: Manchester",
441
+ ]
442
+
443
+
444
+ def new_session_id() -> str:
445
+ return uuid.uuid4().hex
446
+
447
+
448
+ def events_to_messages(events: List[Dict[str, Any]]) -> List[Dict[str, str]]:
449
+ return [{"role": e["role"], "content": e["text"]} for e in events if e["role"] in ("user", "assistant")]
450
+
451
+
452
+ def format_ledger(events: List[Dict[str, Any]]) -> str:
453
+ lines = []
454
+ for e in events[-250:]:
455
+ t = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(e["ts_ms"] / 1000))
456
+ lines.append(
457
+ f"{t} | seq={e['seq']} | {e['role']}\n"
458
+ f"{e['text']}\n"
459
+ f"event_id={e['event_id']} collapse={e['collapse']:.2f}\n"
460
+ f"digest={e['digest']}\n"
461
+ f"chain={e['chain_hash']}\n"
462
+ f"{'-'*72}"
463
+ )
464
+ return "\n".join(lines)
465
+
466
+
467
+ def build_prompt(user_msg: str, hits: List[RetrievalHit]) -> str:
468
+ memories = "\n".join([f"- ({h.role}) {h.text}" for h in hits]) if hits else "(none)"
469
+ return (
470
+ "SYSTEM: Use retrieved memory slices if relevant. Prefer exact stored facts.\n"
471
+ f"RETRIEVED MEMORIES:\n{memories}\n\n"
472
+ f"USER:\n{user_msg}\n"
473
+ )
474
+
475
+
476
+ def extract_fact_from_hits(hits: List[RetrievalHit], key: str) -> Optional[str]:
477
+ key_l = key.lower()
478
+ patterns = [
479
+ rf"\b{re.escape(key_l)}\b\s*=\s*([A-Za-z0-9 _\-']+)",
480
+ rf"\b{re.escape(key_l)}\b\s*:\s*([A-Za-z0-9 _\-']+)",
481
+ ]
482
+ for h in hits:
483
+ tl = (h.text or "").strip().lower()
484
+ if key_l == "name":
485
+ m = re.search(r"\bmy name is\b\s+([A-Za-z0-9 _\-']+)", tl)
486
+ if m:
487
+ return m.group(1).strip().title()
488
+ for p in patterns:
489
+ m = re.search(p, tl)
490
+ if m:
491
+ return m.group(1).strip().strip(",.")
492
+ return None
493
+
494
+
495
+ def answer_from_memory(user_msg: str, hits: List[RetrievalHit]) -> str:
496
+ q = (user_msg or "").lower()
497
+
498
+ if q.startswith("search for") or q.startswith("search:"):
499
+ if not hits:
500
+ return "No matching memory slices were retrieved for this search."
501
+ return "Search hits:\n" + "\n".join([f"- {h.score:.4f} | {h.role} | {h.text}" for h in hits])
502
+
503
+ if "dog" in q and "name" in q:
504
+ v = extract_fact_from_hits(hits, "dog")
505
+ return f"Your dog’s name (from stored memory) is: {v}" if v else "I didn’t retrieve a stored dog name for this query."
506
+
507
+ if "city" in q:
508
+ v = extract_fact_from_hits(hits, "city")
509
+ return f"Your city (from stored memory) is: {v}" if v else "I didn’t retrieve a stored city for this query."
510
+
511
+ if "drink" in q:
512
+ v = extract_fact_from_hits(hits, "drink")
513
+ return f"Your drink (from stored memory) is: {v}" if v else "I didn’t retrieve a stored drink for this query."
514
+
515
+ if not hits:
516
+ return "No matching memory slices were retrieved for this query."
517
+ return "Retrieved memory slices:\n" + "\n".join([f"- {h.score:.4f} | {h.role} | {h.text}" for h in hits])
518
+
519
+
520
+ def chat_turn(session_id: str, user_msg: str, retrieval_k: int):
521
+ if not session_id:
522
+ session_id = new_session_id()
523
+
524
+ store.append_event(session_id, "user", user_msg)
525
+ hits = store.search_lexical(session_id, user_msg, k=int(retrieval_k))
526
+
527
+ prompt = build_prompt(user_msg, hits)
528
+ response = answer_from_memory(user_msg, hits)
529
+
530
+ store.append_event(session_id, "assistant", response)
531
+ receipt_path = store.write_receipt(session_id, user_msg, hits, prompt, response)
532
+
533
+ events = store.get_events(session_id, limit=1200)
534
+ ledger = format_ledger(events)
535
+ retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
536
+ messages = events_to_messages(events)
537
+
538
+ # Export trace for the latest receipt
539
+ trace_path = store.export_trace(session_id, receipt_path)
540
+
541
+ return session_id, messages, retrieved_view, ledger, receipt_path, receipt_path, trace_path, trace_path
542
+
543
+
544
+ def run_investor_demo(session_id: str, retrieval_k: int):
545
+ if not session_id:
546
+ session_id = new_session_id()
547
+
548
+ last = (session_id, [], "", "", "", None, "", None, "", None)
549
+ for step in INVESTOR_SCRIPT:
550
+ last = chat_turn(session_id, step, retrieval_k)
551
+ session_id = last[0]
552
+ return last
553
+
554
+
555
+ def verify_receipt_upload(file_obj) -> str:
556
+ if file_obj is None:
557
+ return "Upload a receipt JSON file."
558
+ with open(file_obj.name, "r", encoding="utf-8") as f:
559
+ data = json.load(f)
560
+ ok, msg = store.verify_receipt(data)
561
+ return f"{'✅' if ok else '❌'} {msg}"
562
+
563
+
564
+ def guardrail_tool_call(receipt_file, requested_action: str) -> str:
565
+ if receipt_file is None:
566
+ return "❌ Blocked: no receipt provided."
567
+
568
+ with open(receipt_file.name, "r", encoding="utf-8") as f:
569
+ receipt = json.load(f)
570
+
571
+ ok, msg = store.verify_receipt(receipt)
572
+ if not ok:
573
+ return f"❌ Blocked: receipt failed verification. Reason: {msg}"
574
+
575
+ # Allowed actions are deliberately boring in the demo.
576
+ # The point is the gate, not the tool.
577
+ return f"✅ Allowed: receipt verified. Executed action: {requested_action}"
578
+
579
+
580
+ def export_audit_pack(session_id: str):
581
+ if not session_id:
582
+ session_id = new_session_id()
583
+ path = store.export_audit_pack(session_id)
584
+ return path, path
585
+
586
+
587
+ def api_playground(session_id: str, action: str, payload_json: str, retrieval_k: int):
588
+ if not session_id:
589
+ session_id = new_session_id()
590
+
591
+ payload = {}
592
+ if payload_json and payload_json.strip():
593
+ payload = json.loads(payload_json)
594
+
595
+ if action == "memory.write":
596
+ role = payload.get("role", "user")
597
+ text = payload.get("text", "")
598
+ ev = store.append_event(session_id, role, text)
599
+ return session_id, json.dumps({"ok": True, "event": ev}, indent=2, ensure_ascii=False)
600
+
601
+ if action == "memory.search":
602
+ q = payload.get("query", "")
603
+ k = int(payload.get("k", retrieval_k))
604
+ hits = store.search_lexical(session_id, q, k=k)
605
+ out = {
606
+ "ok": True,
607
+ "hits": [{
608
+ "event_id": h.event_id, "seq": h.seq, "role": h.role, "text": h.text,
609
+ "score": h.score, "digest": h.digest, "chain_hash": h.chain_hash
610
+ } for h in hits]
611
+ }
612
+ return session_id, json.dumps(out, indent=2, ensure_ascii=False)
613
+
614
+ if action == "receipt.verify":
615
+ receipt = payload.get("receipt", {})
616
+ ok, msg = store.verify_receipt(receipt)
617
+ return session_id, json.dumps({"ok": ok, "message": msg}, indent=2, ensure_ascii=False)
618
+
619
+ if action == "audit.export":
620
+ path = store.export_audit_pack(session_id)
621
+ return session_id, json.dumps({"ok": True, "audit_pack_path": path}, indent=2, ensure_ascii=False)
622
+
623
+ return session_id, json.dumps({"ok": False, "error": "Unknown action"}, indent=2, ensure_ascii=False)
624
+
625
+
626
+ def token_savings(n: int, B: int, M: int, W: int) -> List[List[Any]]:
627
+ n = int(n)
628
+ B = int(B)
629
+ M = int(M)
630
+ W = int(W)
631
+ baseline = n * B + M * n * (n + 1) // 2
632
+ aifs = n * (B + W)
633
+ red = 0.0 if baseline == 0 else (1.0 - (aifs / baseline))
634
+ return [["turns", n],
635
+ ["baseline_total_tokens", baseline],
636
+ ["budgeted_total_tokens", aifs],
637
+ ["reduction_percent", round(100.0 * red, 2)]]
638
+
639
+
640
+ def fill_example(selected: str) -> str:
641
+ return selected or ""
642
+
643
+
644
+ PITCH_MD = """
645
+ # RFTSystems TrustStack Console
646
+
647
+ I don’t do “trust me”. I do receipts.
648
+
649
+ This Space demonstrates an agent memory layer that is **durable, searchable, and provable**:
650
+ - Append-only session ledger (JSONL)
651
+ - SQLite FTS retrieval (fast lexical recall)
652
+ - Hash-chain integrity (tamper-evident)
653
+ - Per-turn “Memory Receipt” (what was retrieved + hashes)
654
+ - Receipt verification (pass/fail)
655
+ - Receipt-gated tool execution (guardrails)
656
+ - Trace export (what influenced what)
657
+ - Audit pack export (ZIP)
658
+
659
+ If you build agents for real users, receipts are what turns “memory” into infrastructure.
660
+ """
661
+
662
+ HOW_TO_MD = """
663
+ ## Quick demo
664
+ 1) Click **Run Investor Demo**
665
+ 2) Download the last receipt JSON
666
+ 3) Upload it into **Verify Receipt**
667
+ 4) Try the **Guardrails** tab: tool execution only passes with a verified receipt
668
+ 5) Export an **Audit Pack** ZIP
669
+
670
+ ## What this proves
671
+ - Memory persistence is not the hard part
672
+ - Retrieval is not the hard part
673
+ - **Integrity + evidence** is the hard part
674
+ """
675
+
676
+
677
+ with gr.Blocks(title="RFTSystems TrustStack Console") as demo:
678
+ gr.Markdown(PITCH_MD)
679
+
680
+ with gr.Row():
681
+ session_id = gr.Textbox(label="Session ID", value=new_session_id())
682
+ retrieval_k = gr.Slider(1, 20, value=8, step=1, label="Retrieval K")
683
+
684
+ with gr.Tabs():
685
+ with gr.Tab("Investor Demo"):
686
+ gr.Markdown("One click. Full story: storage → retrieval → override → receipt → verification → trace → audit.")
687
+ run_demo_btn = gr.Button("Run Investor Demo", variant="primary")
688
+ demo_chat = gr.Chatbot(label="Demo Conversation", height=320)
689
+ demo_retrieved = gr.Textbox(label="Retrieved memory slices", lines=8)
690
+ demo_ledger = gr.Textbox(label="Ledger (hash-chained)", lines=12)
691
+ demo_receipt_path = gr.Textbox(label="Last receipt path (server)", lines=1)
692
+ demo_receipt_file = gr.File(label="Download last receipt JSON")
693
+ demo_trace_path = gr.Textbox(label="Last trace path (server)", lines=1)
694
+ demo_trace_file = gr.File(label="Download last trace JSON")
695
+
696
+ run_demo_btn.click(
697
+ run_investor_demo,
698
+ inputs=[session_id, retrieval_k],
699
+ outputs=[session_id, demo_chat, demo_retrieved, demo_ledger,
700
+ demo_receipt_path, demo_receipt_file, demo_trace_path, demo_trace_file],
701
+ )
702
+
703
+ with gr.Tab("Chat"):
704
+ chatbot = gr.Chatbot(label="Conversation", height=320)
705
+ with gr.Row():
706
+ example_pick = gr.Dropdown(label="Example prompts", choices=EXAMPLE_PROMPTS, value=EXAMPLE_PROMPTS[0])
707
+ use_example = gr.Button("Use Example", variant="secondary")
708
+
709
+ user_msg = gr.Textbox(label="Message")
710
+ send = gr.Button("Send", variant="primary")
711
+
712
+ retrieved_out = gr.Textbox(label="Retrieved memory slices", lines=8)
713
+ ledger_out = gr.Textbox(label="Ledger (hash-chained)", lines=12)
714
+
715
+ receipt_path = gr.Textbox(label="Last receipt path (server)", lines=1)
716
+ receipt_file = gr.File(label="Download last receipt JSON")
717
+
718
+ trace_path = gr.Textbox(label="Last trace path (server)", lines=1)
719
+ trace_file = gr.File(label="Download last trace JSON")
720
+
721
+ use_example.click(fill_example, inputs=[example_pick], outputs=[user_msg])
722
+
723
+ send.click(
724
+ chat_turn,
725
+ inputs=[session_id, user_msg, retrieval_k],
726
+ outputs=[session_id, chatbot, retrieved_out, ledger_out, receipt_path, receipt_file, trace_path, trace_file],
727
+ )
728
+
729
+ with gr.Tab("Verify Receipt"):
730
+ receipt_upload = gr.File(label="Upload receipt JSON")
731
+ verify_btn = gr.Button("Verify", variant="primary")
732
+ verify_out = gr.Textbox(label="Verification result")
733
+ verify_btn.click(verify_receipt_upload, inputs=[receipt_upload], outputs=[verify_out])
734
+
735
+ with gr.Tab("Guardrails"):
736
+ gr.Markdown("Tool execution is blocked unless the receipt verifies.")
737
+ receipt_for_tool = gr.File(label="Provide a receipt JSON")
738
+ action = gr.Dropdown(label="Requested action", choices=[
739
+ "tool.send_email_simulated",
740
+ "tool.export_customer_data_simulated",
741
+ "tool.trigger_purchase_simulated",
742
+ "tool.write_file_simulated",
743
+ ], value="tool.write_file_simulated")
744
+ run_tool_btn = gr.Button("Attempt tool call", variant="primary")
745
+ tool_out = gr.Textbox(label="Result", lines=4)
746
+ run_tool_btn.click(guardrail_tool_call, inputs=[receipt_for_tool, action], outputs=[tool_out])
747
+
748
+ with gr.Tab("Audit Pack"):
749
+ gr.Markdown("One-click export: ledger + receipts + integrity summary.")
750
+ export_btn = gr.Button("Export Audit Pack ZIP", variant="primary")
751
+ audit_path = gr.Textbox(label="Audit pack path (server)", lines=1)
752
+ audit_file = gr.File(label="Download audit pack ZIP")
753
+ export_btn.click(export_audit_pack, inputs=[session_id], outputs=[audit_path, audit_file])
754
+
755
+ with gr.Tab("API Playground"):
756
+ gr.Markdown("API-style calls for the demo. JSON in, JSON out.")
757
+ action2 = gr.Dropdown(label="Action", choices=["memory.write", "memory.search", "receipt.verify", "audit.export"], value="memory.search")
758
+ payload = gr.Textbox(label="JSON payload", lines=10, value=json.dumps({"query": "Nova", "k": 8}, indent=2))
759
+ call_btn = gr.Button("Call", variant="primary")
760
+ api_out = gr.Textbox(label="Response", lines=14)
761
+ call_btn.click(api_playground, inputs=[session_id, action2, payload, retrieval_k], outputs=[session_id, api_out])
762
+
763
+ with gr.Tab("Token Budget"):
764
+ gr.Markdown("Why fixed retrieval budgets win as sessions grow.")
765
+ n = gr.Slider(1, 500, value=50, step=1, label="Turns (n)")
766
+ B = gr.Slider(0, 2000, value=500, step=50, label="Base framing tokens per call (B)")
767
+ M = gr.Slider(0, 2000, value=650, step=50, label="Avg tokens added per turn to history (M)")
768
+ W = gr.Slider(0, 8000, value=2000, step=100, label="Fixed retrieval budget tokens per call (W)")
769
+ calc = gr.Button("Compute", variant="primary")
770
+ tbl = gr.Dataframe(headers=["metric", "value"], datatype=["str", "str"], row_count=4, col_count=2)
771
+ calc.click(lambda nn, bb, mm, ww: token_savings(nn, bb, mm, ww), inputs=[n, B, M, W], outputs=[tbl])
772
+
773
+ with gr.Tab("How to Use"):
774
+ gr.Markdown(HOW_TO_MD)
775
+
776
+ demo.launch()