RFTSystems commited on
Commit
b7aa15c
·
verified ·
1 Parent(s): fd392f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +449 -0
app.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import uuid
5
+ import sqlite3
6
+ import hashlib
7
+ from dataclasses import dataclass
8
+ from typing import List, Dict, Any, Optional, Tuple
9
+
10
+ import gradio as gr
11
+
12
+ # =========================
13
+ # RFT Memory Receipt Engine
14
+ # =========================
15
+
16
+ # For HF Spaces w/ persistent storage, set BASE_DIR to /data/rftmem in Space settings (env var)
17
+ BASE_DIR = os.environ.get("RFT_MEM_BASE", "var/rftmem")
18
+ os.makedirs(BASE_DIR, exist_ok=True)
19
+
20
+ def sha256_bytes(b: bytes) -> str:
21
+ return hashlib.sha256(b).hexdigest()
22
+
23
+ def sha256_str(s: str) -> str:
24
+ return sha256_bytes(s.encode("utf-8"))
25
+
26
+ def now_ms() -> int:
27
+ return int(time.time() * 1000)
28
+
29
+ def atomic_write(path: str, data: bytes) -> None:
30
+ tmp = path + ".tmp"
31
+ with open(tmp, "wb") as f:
32
+ f.write(data)
33
+ f.flush()
34
+ os.fsync(f.fileno())
35
+ os.replace(tmp, path)
36
+
37
+ @dataclass
38
+ class RetrievalHit:
39
+ event_id: str
40
+ role: str
41
+ text: str
42
+ ts_ms: int
43
+ digest: str
44
+ chain_hash: str
45
+ score: float
46
+
47
+ class RFTMemoryStore:
48
+ """
49
+ Source of truth: append-only JSONL per session + hash-chained ledger.
50
+ Index: SQLite (events table + FTS5) for lexical search.
51
+ Receipts: JSON files saved per assistant turn.
52
+ """
53
+
54
+ def __init__(self, base_dir: str):
55
+ self.base_dir = base_dir
56
+ self.db_path = os.path.join(base_dir, "index.sqlite")
57
+ self._init_db()
58
+
59
+ def _init_db(self):
60
+ os.makedirs(self.base_dir, exist_ok=True)
61
+ con = sqlite3.connect(self.db_path)
62
+ cur = con.cursor()
63
+
64
+ # Main event table
65
+ cur.execute("""
66
+ CREATE TABLE IF NOT EXISTS events (
67
+ session_id TEXT,
68
+ event_id TEXT PRIMARY KEY,
69
+ ts_ms INTEGER,
70
+ role TEXT,
71
+ text TEXT,
72
+ digest TEXT,
73
+ prev_hash TEXT,
74
+ chain_hash TEXT,
75
+ collapse REAL
76
+ )
77
+ """)
78
+ # FTS (lexical retrieval)
79
+ cur.execute("""
80
+ CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5(
81
+ event_id,
82
+ session_id,
83
+ text,
84
+ content=''
85
+ )
86
+ """)
87
+ # Receipts table (optional convenience)
88
+ cur.execute("""
89
+ CREATE TABLE IF NOT EXISTS receipts (
90
+ receipt_id TEXT PRIMARY KEY,
91
+ session_id TEXT,
92
+ ts_ms INTEGER,
93
+ prompt_hash TEXT,
94
+ response_hash TEXT,
95
+ receipt_path TEXT
96
+ )
97
+ """)
98
+ con.commit()
99
+ con.close()
100
+
101
+ # -------------
102
+ # Session layout
103
+ # -------------
104
+ def session_dir(self, session_id: str) -> str:
105
+ d = os.path.join(self.base_dir, "sessions", session_id)
106
+ os.makedirs(d, exist_ok=True)
107
+ return d
108
+
109
+ def session_log_path(self, session_id: str) -> str:
110
+ return os.path.join(self.session_dir(session_id), "events.jsonl")
111
+
112
+ def receipts_dir(self, session_id: str) -> str:
113
+ d = os.path.join(self.session_dir(session_id), "receipts")
114
+ os.makedirs(d, exist_ok=True)
115
+ return d
116
+
117
+ # -------------------
118
+ # RFT collapse scoring
119
+ # -------------------
120
+ def collapse_score(self, session_id: str, role: str, text: str) -> float:
121
+ """
122
+ Simple, effective heuristic:
123
+ - novelty vs last ~20 events (token Jaccard)
124
+ - role weight (user/tool > assistant)
125
+ """
126
+ role_w = {"user": 1.0, "tool": 0.9, "assistant": 0.6}.get(role, 0.7)
127
+ tokens = set(t.lower() for t in text.split() if t.strip())
128
+ if not tokens:
129
+ return 0.0
130
+
131
+ recent = self.get_events(session_id, limit=20)
132
+ recent_tokens = set()
133
+ for e in recent:
134
+ recent_tokens |= set(t.lower() for t in e["text"].split() if t.strip())
135
+
136
+ # novelty = fraction of tokens not seen recently
137
+ unseen = len(tokens - recent_tokens)
138
+ novelty = unseen / max(1, len(tokens))
139
+
140
+ # length sanity cap (avoid 1-word spikes)
141
+ length_factor = min(1.0, len(tokens) / 30.0)
142
+
143
+ score = role_w * (0.65 * novelty + 0.35 * length_factor)
144
+ return float(max(0.0, min(1.0, score)))
145
+
146
+ # ----------------
147
+ # Append-only write
148
+ # ----------------
149
+ def append_event(self, session_id: str, role: str, text: str) -> Dict[str, Any]:
150
+ event_id = uuid.uuid4().hex
151
+ ts = now_ms()
152
+ payload = {
153
+ "session_id": session_id,
154
+ "event_id": event_id,
155
+ "ts_ms": ts,
156
+ "role": role,
157
+ "text": text
158
+ }
159
+ digest = sha256_str(json.dumps(payload, sort_keys=True))
160
+ prev_hash = self.get_last_chain_hash(session_id) or "0"*64
161
+ chain_hash = sha256_str(prev_hash + digest)
162
+
163
+ collapse = self.collapse_score(session_id, role, text)
164
+
165
+ rec = {
166
+ **payload,
167
+ "digest": digest,
168
+ "prev_hash": prev_hash,
169
+ "chain_hash": chain_hash,
170
+ "collapse": collapse
171
+ }
172
+
173
+ # 1) Write JSONL source-of-truth
174
+ log_path = self.session_log_path(session_id)
175
+ line = (json.dumps(rec, ensure_ascii=False) + "\n").encode("utf-8")
176
+ with open(log_path, "ab") as f:
177
+ f.write(line)
178
+ f.flush()
179
+ os.fsync(f.fileno())
180
+
181
+ # 2) Update SQLite index (synchronous for demo simplicity)
182
+ con = sqlite3.connect(self.db_path)
183
+ cur = con.cursor()
184
+ cur.execute("""
185
+ INSERT INTO events(session_id,event_id,ts_ms,role,text,digest,prev_hash,chain_hash,collapse)
186
+ VALUES(?,?,?,?,?,?,?,?,?)
187
+ """, (session_id, event_id, ts, role, text, digest, prev_hash, chain_hash, collapse))
188
+ cur.execute("INSERT INTO events_fts(event_id, session_id, text) VALUES(?,?,?)", (event_id, session_id, text))
189
+ con.commit()
190
+ con.close()
191
+
192
+ return rec
193
+
194
+ def get_last_chain_hash(self, session_id: str) -> Optional[str]:
195
+ con = sqlite3.connect(self.db_path)
196
+ cur = con.cursor()
197
+ cur.execute("""
198
+ SELECT chain_hash FROM events
199
+ WHERE session_id=?
200
+ ORDER BY ts_ms DESC
201
+ LIMIT 1
202
+ """, (session_id,))
203
+ row = cur.fetchone()
204
+ con.close()
205
+ return row[0] if row else None
206
+
207
+ def get_events(self, session_id: str, limit: int = 200) -> List[Dict[str, Any]]:
208
+ con = sqlite3.connect(self.db_path)
209
+ cur = con.cursor()
210
+ cur.execute("""
211
+ SELECT event_id, ts_ms, role, text, digest, prev_hash, chain_hash, collapse
212
+ FROM events
213
+ WHERE session_id=?
214
+ ORDER BY ts_ms ASC
215
+ LIMIT ?
216
+ """, (session_id, limit))
217
+ rows = cur.fetchall()
218
+ con.close()
219
+ out = []
220
+ for r in rows:
221
+ out.append({
222
+ "event_id": r[0],
223
+ "ts_ms": r[1],
224
+ "role": r[2],
225
+ "text": r[3],
226
+ "digest": r[4],
227
+ "prev_hash": r[5],
228
+ "chain_hash": r[6],
229
+ "collapse": r[7],
230
+ })
231
+ return out
232
+
233
+ # -------------------------
234
+ # Hybrid retrieval (lexical)
235
+ # -------------------------
236
+ def search_lexical(self, session_id: str, query: str, k: int = 8) -> List[RetrievalHit]:
237
+ con = sqlite3.connect(self.db_path)
238
+ cur = con.cursor()
239
+
240
+ # FTS5 bm25 ranking (lower is better). We'll invert into a positive score.
241
+ cur.execute("""
242
+ SELECT e.event_id, e.role, e.text, e.ts_ms, e.digest, e.chain_hash,
243
+ bm25(events_fts) as rank
244
+ FROM events_fts
245
+ JOIN events e ON e.event_id = events_fts.event_id
246
+ WHERE events_fts.text MATCH ? AND e.session_id=?
247
+ ORDER BY rank ASC
248
+ LIMIT ?
249
+ """, (query, session_id, k))
250
+
251
+ rows = cur.fetchall()
252
+ con.close()
253
+
254
+ hits = []
255
+ for (eid, role, text, ts, digest, chain_hash, rank) in rows:
256
+ score = 1.0 / (1.0 + float(rank if rank is not None else 0.0))
257
+ hits.append(RetrievalHit(eid, role, text, ts, digest, chain_hash, score))
258
+ return hits
259
+
260
+ # -------------------------
261
+ # Receipts + verification
262
+ # -------------------------
263
+ def write_receipt(self, session_id: str, user_text: str, retrieved: List[RetrievalHit], prompt: str, response: str) -> str:
264
+ receipt_id = uuid.uuid4().hex
265
+ ts = now_ms()
266
+
267
+ receipt = {
268
+ "receipt_id": receipt_id,
269
+ "session_id": session_id,
270
+ "ts_ms": ts,
271
+ "query": user_text,
272
+ "retrieval": [{
273
+ "event_id": h.event_id,
274
+ "digest": h.digest,
275
+ "chain_hash": h.chain_hash,
276
+ "score": h.score,
277
+ "role": h.role,
278
+ "text": h.text
279
+ } for h in retrieved],
280
+ "prompt_hash": sha256_str(prompt),
281
+ "response_hash": sha256_str(response),
282
+ "engine": {
283
+ "name": "RFT Memory Receipt Engine",
284
+ "version": "0.1-demo",
285
+ "notes": "append-only JSONL + SQLite FTS + hash-chained ledger"
286
+ }
287
+ }
288
+
289
+ path = os.path.join(self.receipts_dir(session_id), f"{receipt_id}.json")
290
+ atomic_write(path, json.dumps(receipt, indent=2, ensure_ascii=False).encode("utf-8"))
291
+
292
+ con = sqlite3.connect(self.db_path)
293
+ cur = con.cursor()
294
+ cur.execute("""
295
+ INSERT INTO receipts(receipt_id, session_id, ts_ms, prompt_hash, response_hash, receipt_path)
296
+ VALUES(?,?,?,?,?,?)
297
+ """, (receipt_id, session_id, ts, receipt["prompt_hash"], receipt["response_hash"], path))
298
+ con.commit()
299
+ con.close()
300
+ return path
301
+
302
+ def verify_receipt(self, receipt_json: Dict[str, Any]) -> Tuple[bool, str]:
303
+ session_id = receipt_json.get("session_id")
304
+ if not session_id:
305
+ return False, "Missing session_id."
306
+
307
+ # 1) Verify the referenced events exist and digests match
308
+ con = sqlite3.connect(self.db_path)
309
+ cur = con.cursor()
310
+
311
+ for item in receipt_json.get("retrieval", []):
312
+ eid = item.get("event_id")
313
+ expected_digest = item.get("digest")
314
+ expected_chain = item.get("chain_hash")
315
+
316
+ cur.execute("SELECT digest, chain_hash FROM events WHERE event_id=? AND session_id=?", (eid, session_id))
317
+ row = cur.fetchone()
318
+ if not row:
319
+ con.close()
320
+ return False, f"Event not found: {eid}"
321
+ if row[0] != expected_digest:
322
+ con.close()
323
+ return False, f"Digest mismatch for {eid}"
324
+ if row[1] != expected_chain:
325
+ con.close()
326
+ return False, f"Chain hash mismatch for {eid}"
327
+
328
+ con.close()
329
+ return True, "Receipt verified: referenced events exist and hashes match."
330
+
331
+ store = RFTMemoryStore(BASE_DIR)
332
+
333
+ # --------------------
334
+ # Gradio demonstration
335
+ # --------------------
336
+ def new_session():
337
+ return uuid.uuid4().hex
338
+
339
+ def format_events(events: List[Dict[str, Any]]) -> str:
340
+ lines = []
341
+ for e in events[-200:]:
342
+ lines.append(
343
+ f"{time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(e['ts_ms']/1000))} | {e['role']}\n"
344
+ f"{e['text']}\n"
345
+ f"event_id={e['event_id']} collapse={e['collapse']:.2f}\n"
346
+ f"digest={e['digest']}\n"
347
+ f"chain={e['chain_hash']}\n"
348
+ f"{'-'*60}"
349
+ )
350
+ return "\n".join(lines)
351
+
352
+ def chat_turn(session_id: str, user_msg: str, retrieval_k: int, use_generation: bool):
353
+ if not session_id:
354
+ session_id = new_session()
355
+
356
+ store.append_event(session_id, "user", user_msg)
357
+
358
+ # Lexical retrieval (demo: per-session scope)
359
+ hits = store.search_lexical(session_id, user_msg, k=retrieval_k)
360
+
361
+ memories = "\n".join([f"- ({h.role}) {h.text}" for h in hits]) if hits else "(none)"
362
+ prompt = (
363
+ "SYSTEM: You are a transparent assistant. Use retrieved memories if relevant.\n"
364
+ f"RETRIEVED MEMORIES:\n{memories}\n\n"
365
+ f"USER:\n{user_msg}\n"
366
+ )
367
+
368
+ # Demo response (deterministic by default). Optional "generation" placeholder.
369
+ if use_generation:
370
+ # Keep this lightweight: no external APIs in demo.
371
+ response = (
372
+ "I’m running in demo mode (no external model calls). "
373
+ "Here are the most relevant retrieved memories I would use:\n\n"
374
+ f"{memories}\n\n"
375
+ "If you want real generation, plug your local model call here."
376
+ )
377
+ else:
378
+ response = (
379
+ "RFT Memory Engine (demo): I retrieved the following memory slices:\n\n"
380
+ f"{memories}\n\n"
381
+ "I can verify exactly which memories were used via the receipt."
382
+ )
383
+
384
+ store.append_event(session_id, "assistant", response)
385
+
386
+ receipt_path = store.write_receipt(session_id, user_msg, hits, prompt, response)
387
+
388
+ events = store.get_events(session_id, limit=500)
389
+ ledger = format_events(events)
390
+
391
+ # Show retrieved block + receipt for transparency
392
+ retrieved_view = "\n".join([f"{h.score:.4f} | {h.role} | {h.text}" for h in hits]) if hits else "(none)"
393
+ return session_id, response, retrieved_view, ledger, receipt_path
394
+
395
+ def search_memory(session_id: str, query: str, k: int):
396
+ if not session_id:
397
+ return "(no session)"
398
+ hits = store.search_lexical(session_id, query, k=k)
399
+ if not hits:
400
+ return "(no hits)"
401
+ return "\n".join([f"{h.score:.4f} | {h.role} | {h.text}\n event_id={h.event_id}\n digest={h.digest}\n" for h in hits])
402
+
403
+ def verify_uploaded_receipt(file_obj):
404
+ if file_obj is None:
405
+ return "Upload a receipt JSON file."
406
+ with open(file_obj.name, "r", encoding="utf-8") as f:
407
+ data = json.load(f)
408
+ ok, msg = store.verify_receipt(data)
409
+ return f"{'✅' if ok else '❌'} {msg}"
410
+
411
+ with gr.Blocks(title="RFT Memory Receipt Engine (Local Persistence Demo)") as demo:
412
+ gr.Markdown(
413
+ "# RFT Memory Receipt Engine\n"
414
+ "Append-only session ledger + SQLite FTS retrieval + cryptographic receipts.\n\n"
415
+ "**Goal:** demonstrate durable memory + verifiable retrieval lineage."
416
+ )
417
+
418
+ session_id = gr.Textbox(label="Session ID", value=new_session())
419
+ with gr.Row():
420
+ retrieval_k = gr.Slider(1, 20, value=8, step=1, label="Retrieval K")
421
+ use_generation = gr.Checkbox(value=False, label="Show 'generation' placeholder")
422
+
423
+ user_msg = gr.Textbox(label="User message", placeholder="Type a message…")
424
+ send = gr.Button("Send")
425
+
426
+ assistant_out = gr.Textbox(label="Assistant output", lines=8)
427
+ retrieved_out = gr.Textbox(label="Retrieved memory slices (what influenced this turn)", lines=8)
428
+ ledger_out = gr.Textbox(label="Session Ledger (hash-chained)", lines=16)
429
+ receipt_path = gr.Textbox(label="Receipt saved at (server path)", lines=1)
430
+
431
+ send.click(
432
+ chat_turn,
433
+ inputs=[session_id, user_msg, retrieval_k, use_generation],
434
+ outputs=[session_id, assistant_out, retrieved_out, ledger_out, receipt_path],
435
+ )
436
+
437
+ gr.Markdown("## Manual Search")
438
+ search_q = gr.Textbox(label="Search query")
439
+ search_btn = gr.Button("Search")
440
+ search_results = gr.Textbox(label="Search results", lines=10)
441
+ search_btn.click(search_memory, inputs=[session_id, search_q, retrieval_k], outputs=[search_results])
442
+
443
+ gr.Markdown("## Verify a Receipt")
444
+ receipt_file = gr.File(label="Upload receipt JSON")
445
+ verify_btn = gr.Button("Verify Receipt")
446
+ verify_out = gr.Textbox(label="Verification result")
447
+ verify_btn.click(verify_uploaded_receipt, inputs=[receipt_file], outputs=[verify_out])
448
+
449
+ demo.launch()