RFTSystems commited on
Commit
d081da0
·
verified ·
1 Parent(s): c5dc8bb

Update rft_flightrecorder.py

Browse files
Files changed (1) hide show
  1. rft_flightrecorder.py +112 -805
rft_flightrecorder.py CHANGED
@@ -1,837 +1,144 @@
1
- import json
2
  import os
3
- import uuid
4
- import hashlib
 
5
  import zipfile
6
- from datetime import datetime, timezone
7
- from typing import Any, Dict, List, Optional, Tuple
8
 
9
- from filelock import FileLock, Timeout
10
 
11
- from cryptography.hazmat.primitives.asymmetric.ed25519 import (
12
- Ed25519PrivateKey,
13
- Ed25519PublicKey,
14
- )
15
- from cryptography.hazmat.primitives import serialization
16
 
 
 
 
 
17
 
18
- EVENT_SPEC = "rft-flight-event-v0"
19
- ROOT_SPEC = "rft-flight-session-root-v0"
20
- DEFAULT_LOG_PATH = "flightlog.jsonl"
21
 
22
- # ---------- Locking ----------
23
- LOCK_TIMEOUT_S = 10 # short on purpose; UI should not hang
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # ---------- Public abuse limits (Import Bundle) ----------
26
- MAX_BUNDLE_BYTES = 15 * 1024 * 1024 # 15 MB zip upload cap
27
- MAX_ZIP_MEMBER_BYTES = 25 * 1024 * 1024 # 25 MB decompressed member cap
28
- MAX_EVENTS_PER_BUNDLE = 50_000
29
- MAX_EVENT_LINE_BYTES = 200_000 # 200 KB per JSONL line (prevents huge single lines)
30
- MAX_ZIP_RATIO = 200.0 # zip-bomb heuristic (file_size / compress_size)
31
 
 
32
 
33
- # ============================================================
34
- # Canonical JSON + hashing
35
- # ============================================================
36
 
37
- def canon(obj) -> bytes:
38
- """Canonical JSON encoding (stable for hashing/verifying across machines)."""
39
- return json.dumps(
40
- obj,
41
- ensure_ascii=False,
42
- sort_keys=True,
43
- separators=(",", ":"),
44
- ).encode("utf-8")
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- def sha256_hex(b: bytes) -> str:
48
- return hashlib.sha256(b).hexdigest()
49
 
 
50
 
51
- def now_utc_iso() -> str:
52
- return datetime.now(timezone.utc).isoformat()
53
 
 
 
 
 
54
 
55
- def short(h: str, n: int = 12) -> str:
56
- h = (h or "").strip()
57
- return h[:n] if len(h) > n else h
 
 
 
 
 
 
 
 
 
58
 
 
59
 
60
- # ============================================================
61
- # File locking helpers
62
- # ============================================================
63
 
64
- def _lock_for(path: str) -> FileLock:
65
- return FileLock(path + ".lock", timeout=LOCK_TIMEOUT_S)
 
66
 
 
 
 
 
 
 
67
 
68
- def _read_jsonl_locked(path: str) -> Tuple[List[Dict[str, Any]], int]:
69
- with _lock_for(path):
70
- return read_jsonl(path)
71
 
 
 
 
 
 
 
72
 
73
- # ============================================================
74
- # Key management (Ed25519)
75
- # ============================================================
76
 
77
- def gen_keys() -> Tuple[str, str]:
78
- sk = Ed25519PrivateKey.generate()
79
- pk = sk.public_key()
80
 
81
- sk_b = sk.private_bytes(
82
- encoding=serialization.Encoding.Raw,
83
- format=serialization.PrivateFormat.Raw,
84
- encryption_algorithm=serialization.NoEncryption(),
85
- )
86
- pk_b = pk.public_bytes(
87
- encoding=serialization.Encoding.Raw,
88
- format=serialization.PublicFormat.Raw,
89
- )
90
- return sk_b.hex(), pk_b.hex()
91
-
92
-
93
- def load_sk(sk_hex: str) -> Ed25519PrivateKey:
94
- return Ed25519PrivateKey.from_private_bytes(bytes.fromhex(sk_hex.strip()))
95
-
96
-
97
- def load_pk(pk_hex: str) -> Ed25519PublicKey:
98
- return Ed25519PublicKey.from_public_bytes(bytes.fromhex(pk_hex.strip()))
99
-
100
-
101
- # ============================================================
102
- # Payload parsing
103
- # ============================================================
104
-
105
- def parse_payload_text(payload_text: str) -> Dict[str, Any]:
106
- """Accept JSON or plain text; store as structured payload either way."""
107
- txt = (payload_text or "").strip()
108
- if not txt:
109
- return {}
110
- try:
111
- v = json.loads(txt)
112
- if isinstance(v, dict):
113
- return v
114
- return {"_value": v}
115
- except Exception:
116
- return {"_text": txt}
117
-
118
-
119
- # ============================================================
120
- # Log IO
121
- # ============================================================
122
-
123
- def read_jsonl(path: str) -> Tuple[List[Dict[str, Any]], int]:
124
- """Read JSONL. Returns (records, corrupt_lines_count)."""
125
- if not os.path.exists(path):
126
- return [], 0
127
-
128
- out: List[Dict[str, Any]] = []
129
- corrupt = 0
130
- with open(path, "r", encoding="utf-8") as f:
131
- for ln in f:
132
- ln = ln.strip()
133
- if not ln:
134
- continue
135
- try:
136
- obj = json.loads(ln)
137
- if isinstance(obj, dict):
138
- out.append(obj)
139
- else:
140
- corrupt += 1
141
- except Exception:
142
- corrupt += 1
143
- return out, corrupt
144
-
145
-
146
- def append_jsonl(path: str, obj: Dict[str, Any]) -> None:
147
- with open(path, "a", encoding="utf-8") as f:
148
- f.write(json.dumps(obj, ensure_ascii=False) + "\n")
149
-
150
-
151
- # ============================================================
152
- # Core event model
153
- # ============================================================
154
-
155
- def make_event_core(
156
- session_id: str,
157
- seq: int,
158
- event_type: str,
159
- payload: Dict[str, Any],
160
- parent_event_hash: str,
161
- prev_event_hash: str,
162
- meta: Dict[str, Any],
163
- ) -> Dict[str, Any]:
164
- return {
165
- "spec": EVENT_SPEC,
166
- "ts_utc": now_utc_iso(),
167
- "session_id": session_id,
168
- "seq": int(seq),
169
- "event_type": (event_type or "note").strip(),
170
- "parent_event_hash_sha256": (parent_event_hash or "").strip(),
171
- "prev_event_hash_sha256": (prev_event_hash or "").strip() if prev_event_hash else ("0" * 64),
172
- "payload": payload if isinstance(payload, dict) else {"_payload": payload},
173
- "meta": meta if isinstance(meta, dict) else {},
174
- }
175
-
176
-
177
- def events_for_session(all_events: List[Dict[str, Any]], session_id: str) -> List[Dict[str, Any]]:
178
- sid = (session_id or "").strip()
179
- if not sid:
180
- return []
181
- return [
182
- e for e in all_events
183
- if isinstance(e, dict)
184
- and e.get("session_id") == sid
185
- and e.get("spec") == EVENT_SPEC
186
- and "event_hash_sha256" in e
187
- ]
188
-
189
-
190
- def last_event_for_session(all_events: List[Dict[str, Any]], session_id: str) -> Optional[Dict[str, Any]]:
191
- evs = events_for_session(all_events, session_id)
192
- if not evs:
193
- return None
194
- evs.sort(key=lambda x: int(x.get("seq", 0)))
195
- return evs[-1]
196
-
197
-
198
- def next_seq(all_events: List[Dict[str, Any]], session_id: str) -> int:
199
- last = last_event_for_session(all_events, session_id)
200
- if not last:
201
- return 1
202
- try:
203
- return int(last.get("seq", 0)) + 1
204
- except Exception:
205
- return 1
206
-
207
-
208
- # ============================================================
209
- # Append events (hash-chained; optional signature)
210
- # ============================================================
211
-
212
- def append_event(
213
- log_path: str,
214
- session_id: str,
215
- event_type: str,
216
- payload_text: str,
217
- parent_event_hash: str,
218
- sign_event: bool,
219
- sk_hex: str,
220
- model_id: str,
221
- run_mode: str,
222
- ) -> Tuple[Optional[Dict[str, Any]], str]:
223
- sid = (session_id or "").strip()
224
- if not sid:
225
- return None, "Missing session_id."
226
-
227
- # IMPORTANT: Atomic critical section:
228
- # read -> compute seq/prev -> build event -> append
229
- try:
230
- with _lock_for(log_path):
231
- all_events, _corrupt = read_jsonl(log_path)
232
-
233
- payload = parse_payload_text(payload_text)
234
- meta = {
235
- "model_id": (model_id or "unknown").strip(),
236
- "run_mode": (run_mode or "unknown").strip(),
237
- }
238
-
239
- last = last_event_for_session(all_events, sid)
240
- prev_hash = last.get("event_hash_sha256") if last else ("0" * 64)
241
- seq = next_seq(all_events, sid)
242
-
243
- # If parent hash not provided, default to prev hash (simple linear causality)
244
- parent = (parent_event_hash or "").strip()
245
- if not parent and seq > 1:
246
- parent = prev_hash
247
-
248
- core = make_event_core(
249
- session_id=sid,
250
- seq=seq,
251
- event_type=event_type,
252
- payload=payload,
253
- parent_event_hash=parent,
254
- prev_event_hash=prev_hash,
255
- meta=meta,
256
- )
257
 
258
- event_hash = sha256_hex(canon(core))
259
- event = dict(core)
260
- event["event_hash_sha256"] = event_hash
261
-
262
- if sign_event:
263
- if not sk_hex or not sk_hex.strip():
264
- return None, "Signing enabled but private key is missing."
265
- try:
266
- sk = load_sk(sk_hex)
267
- sig = sk.sign(bytes.fromhex(event_hash))
268
- event["signature_ed25519"] = sig.hex()
269
- except Exception:
270
- return None, "Failed to sign event (invalid private key?)."
271
-
272
- append_jsonl(log_path, event)
273
-
274
- return event, f"OK. Appended event #{seq} ({event_type}). Hash: {event_hash}"
275
-
276
- except Timeout:
277
- return None, "Busy: log is locked (try again)."
278
- except Exception:
279
- return None, "Failed to append event (unexpected error)."
280
-
281
-
282
- def start_session(
283
- log_path: str,
284
- model_id: str,
285
- run_mode: str,
286
- notes: str,
287
- sign_start: bool,
288
- sk_hex: str,
289
- ) -> Tuple[str, str]:
290
- sid = uuid.uuid4().hex
291
- payload = {"notes": (notes or "").strip()}
292
-
293
- ev, msg = append_event(
294
- log_path=log_path,
295
- session_id=sid,
296
- event_type="session_start",
297
- payload_text=json.dumps(payload, ensure_ascii=False),
298
- parent_event_hash="",
299
- sign_event=sign_start,
300
- sk_hex=sk_hex,
301
- model_id=model_id,
302
- run_mode=run_mode,
303
- )
304
- if not ev:
305
- return "", msg
306
- return sid, f"OK. Session started: {sid} (first hash: {ev['event_hash_sha256']})"
307
-
308
-
309
- def finalise_session(
310
- log_path: str,
311
- session_id: str,
312
- sign_anchor: bool,
313
- sk_hex: str,
314
- model_id: str,
315
- run_mode: str,
316
- ) -> Tuple[Optional[Dict[str, Any]], str]:
317
- sid = (session_id or "").strip()
318
- if not sid:
319
- return None, "Missing session_id."
320
-
321
- try:
322
- # Lock the entire finalise operation so nothing can append between:
323
- # anchor computation -> session_end append.
324
- with _lock_for(log_path):
325
- all_events, _corrupt = read_jsonl(log_path)
326
- evs = events_for_session(all_events, sid)
327
- if not evs:
328
- return None, "No events found for this session."
329
-
330
- evs.sort(key=lambda x: int(x.get("seq", 0)))
331
-
332
- # Anchor must NOT include the session_end event itself (avoids circularity).
333
- first_hash = evs[0]["event_hash_sha256"]
334
- last_hash = evs[-1]["event_hash_sha256"]
335
- count = len(evs)
336
-
337
- root_core = {
338
- "spec": ROOT_SPEC,
339
- "session_id": sid,
340
- "first_event_hash_sha256": first_hash,
341
- "last_event_hash_sha256": last_hash,
342
- "event_count": count,
343
- }
344
- root_hash = sha256_hex(canon(root_core))
345
-
346
- anchor = dict(root_core)
347
- anchor["root_hash_sha256"] = root_hash
348
- anchor["created_utc"] = now_utc_iso()
349
- anchor["model_id"] = (model_id or "unknown").strip()
350
- anchor["run_mode"] = (run_mode or "unknown").strip()
351
-
352
- if sign_anchor:
353
- if not sk_hex or not sk_hex.strip():
354
- return None, "Anchor signing enabled but private key is missing."
355
- try:
356
- sk = load_sk(sk_hex)
357
- sig = sk.sign(bytes.fromhex(root_hash))
358
- anchor["signature_ed25519"] = sig.hex()
359
- except Exception:
360
- return None, "Failed to sign anchor (invalid private key?)."
361
-
362
- # Append session_end event (manually, inside the same lock)
363
- payload_text = json.dumps({"anchor": anchor}, ensure_ascii=False)
364
- meta = {
365
- "model_id": (model_id or "unknown").strip(),
366
- "run_mode": (run_mode or "unknown").strip(),
367
- }
368
-
369
- seq = int(evs[-1].get("seq", 0)) + 1
370
- prev_hash = evs[-1]["event_hash_sha256"]
371
- parent = prev_hash
372
-
373
- core = make_event_core(
374
- session_id=sid,
375
- seq=seq,
376
- event_type="session_end",
377
- payload=parse_payload_text(payload_text),
378
- parent_event_hash=parent,
379
- prev_event_hash=prev_hash,
380
- meta=meta,
381
- )
382
- event_hash = sha256_hex(canon(core))
383
- event = dict(core)
384
- event["event_hash_sha256"] = event_hash
385
-
386
- if sign_anchor:
387
- try:
388
- sk = load_sk(sk_hex)
389
- sig = sk.sign(bytes.fromhex(event_hash))
390
- event["signature_ed25519"] = sig.hex()
391
- except Exception:
392
- return None, "Failed to sign session_end event."
393
-
394
- append_jsonl(log_path, event)
395
-
396
- return anchor, f"OK. Session finalised. Root hash: {root_hash} (session_end hash: {event_hash})"
397
-
398
- except Timeout:
399
- return None, "Busy: log is locked (try again)."
400
- except Exception:
401
- return None, "Failed to finalise session (unexpected error)."
402
-
403
-
404
- # ============================================================
405
- # Verification
406
- # ============================================================
407
-
408
- def _anchor_expected_from_events(evs_sorted: List[Dict[str, Any]], session_end_index: int) -> str:
409
- """
410
- Compute the expected anchor root_hash for the chain *before* the session_end event at session_end_index.
411
- This avoids circularity (session_end payload contains anchor).
412
- """
413
- chain = evs_sorted[:session_end_index] # exclude the session_end itself
414
- first_hash = chain[0]["event_hash_sha256"]
415
- last_hash = chain[-1]["event_hash_sha256"]
416
- count = len(chain)
417
-
418
- root_core = {
419
- "spec": ROOT_SPEC,
420
- "session_id": chain[0]["session_id"],
421
- "first_event_hash_sha256": first_hash,
422
- "last_event_hash_sha256": last_hash,
423
- "event_count": count,
424
- }
425
- return sha256_hex(canon(root_core))
426
-
427
-
428
- def verify_session_from_events(
429
- evs: List[Dict[str, Any]],
430
- session_id: str,
431
- pk_hex: str = "",
432
- require_signatures: bool = False,
433
- ) -> Tuple[str, bool, str]:
434
- sid = (session_id or "").strip()
435
- if not sid:
436
- return "Missing session_id.", False, ""
437
-
438
- if not evs:
439
- return "No events found for this session.", False, ""
440
-
441
- report: List[str] = []
442
- ok = True
443
-
444
- pk = None
445
- if require_signatures:
446
- if not pk_hex or not pk_hex.strip():
447
- return "Public key required to verify signatures.", False, ""
448
- try:
449
- pk = load_pk(pk_hex)
450
- except Exception:
451
- return "Invalid public key.", False, ""
452
-
453
- # Ensure order by seq
454
- evs = [e for e in evs if isinstance(e, dict)]
455
- evs.sort(key=lambda x: int(x.get("seq", 0)))
456
-
457
- expected_prev = "0" * 64
458
- expected_seq = 1
459
-
460
- for i, e in enumerate(evs):
461
- for k in ("spec", "ts_utc", "session_id", "seq", "event_type", "prev_event_hash_sha256", "payload", "meta", "event_hash_sha256"):
462
- if k not in e:
463
- ok = False
464
- report.append(f"[FAIL] Event {i+1}: missing field '{k}'.")
465
- continue
466
-
467
- if e.get("spec") != EVENT_SPEC:
468
- ok = False
469
- report.append(f"[FAIL] Bad spec at seq {e.get('seq')}.")
470
- continue
471
-
472
- if e.get("session_id") != sid:
473
- ok = False
474
- report.append(f"[FAIL] session_id mismatch at seq {e.get('seq')}.")
475
- continue
476
-
477
- if int(e.get("seq", -1)) != expected_seq:
478
- ok = False
479
- report.append(f"[FAIL] Seq mismatch: got {e.get('seq')} expected {expected_seq}.")
480
- expected_seq += 1
481
-
482
- if e.get("prev_event_hash_sha256") != expected_prev:
483
- ok = False
484
- report.append(
485
- f"[FAIL] Chain broken at seq {e.get('seq')}: prev {short(e.get('prev_event_hash_sha256'))} expected {short(expected_prev)}."
486
- )
487
 
488
- core = {
489
- "spec": e["spec"],
490
- "ts_utc": e["ts_utc"],
491
- "session_id": e["session_id"],
492
- "seq": int(e["seq"]),
493
- "event_type": e["event_type"],
494
- "parent_event_hash_sha256": e.get("parent_event_hash_sha256", ""),
495
- "prev_event_hash_sha256": e["prev_event_hash_sha256"],
496
- "payload": e["payload"],
497
- "meta": e["meta"],
498
- }
499
- h = sha256_hex(canon(core))
500
- if h != e["event_hash_sha256"]:
501
- ok = False
502
- report.append(f"[FAIL] Hash mismatch at seq {e.get('seq')}: stored {short(e['event_hash_sha256'])} recomputed {short(h)}.")
503
-
504
- if require_signatures:
505
- sig_hex = (e.get("signature_ed25519") or "").strip()
506
- if not sig_hex:
507
- ok = False
508
- report.append(f"[FAIL] Missing signature at seq {e.get('seq')}.")
509
- else:
510
- try:
511
- pk.verify(bytes.fromhex(sig_hex), bytes.fromhex(e["event_hash_sha256"]))
512
- except Exception:
513
- ok = False
514
- report.append(f"[FAIL] Bad signature at seq {e.get('seq')}.")
515
-
516
- expected_prev = e["event_hash_sha256"]
517
-
518
- # Anchor check: validate the LAST session_end event (if present)
519
- end_idxs = [i for i, e in enumerate(evs) if e.get("event_type") == "session_end"]
520
- if end_idxs:
521
- se_idx = end_idxs[-1]
522
- se = evs[se_idx]
523
- anchor = (se.get("payload") or {}).get("anchor")
524
-
525
- if isinstance(anchor, dict) and se_idx > 0:
526
- expected_root = _anchor_expected_from_events(evs, se_idx)
527
- if anchor.get("root_hash_sha256") != expected_root:
528
- ok = False
529
- report.append("[FAIL] Session anchor root hash does not match the pre-session_end event chain.")
530
- else:
531
- report.append("[OK] Session anchor matches the pre-session_end event chain.")
532
- else:
533
- report.append("[WARN] session_end found but anchor payload is missing/invalid.")
534
-
535
- if ok:
536
- report.insert(0, f"[PASS] Session verified: {len(evs)} events, chain intact.")
537
- else:
538
- report.insert(0, f"[FAIL] Session verification failed: {len(evs)} events checked.")
539
-
540
- return ("PASS" if ok else "FAIL"), ok, "\n".join(report)
541
-
542
-
543
- def verify_session(log_path: str, session_id: str, pk_hex: str, require_signatures: bool) -> Tuple[str, bool, str]:
544
- try:
545
- all_events, _corrupt = _read_jsonl_locked(log_path)
546
- except Timeout:
547
- return "FAIL", False, "Busy: log is locked (try again)."
548
-
549
- evs = events_for_session(all_events, session_id)
550
- return verify_session_from_events(evs, session_id, pk_hex=pk_hex, require_signatures=require_signatures)
551
-
552
-
553
- # ============================================================
554
- # Timeline + listing
555
- # ============================================================
556
-
557
- def session_timeline_rows(log_path: str, session_id: str) -> Tuple[List[List[Any]], str]:
558
- sid = (session_id or "").strip()
559
- if not sid:
560
- return [], "Missing session_id."
561
-
562
- try:
563
- all_events, _corrupt = _read_jsonl_locked(log_path)
564
- except Timeout:
565
- return [], "Busy: log is locked (try again)."
566
-
567
- evs = events_for_session(all_events, sid)
568
- if not evs:
569
- return [], "No events found."
570
-
571
- evs.sort(key=lambda x: int(x.get("seq", 0)))
572
-
573
- rows: List[List[Any]] = []
574
- for e in evs:
575
- rows.append([
576
- int(e.get("seq", 0)),
577
- e.get("ts_utc", ""),
578
- e.get("event_type", ""),
579
- e.get("meta", {}).get("model_id", ""),
580
- e.get("meta", {}).get("run_mode", ""),
581
- e.get("parent_event_hash_sha256", ""),
582
- e.get("prev_event_hash_sha256", ""),
583
- e.get("event_hash_sha256", ""),
584
- "yes" if e.get("signature_ed25519") else "no",
585
- ])
586
- return rows, f"Loaded {len(rows)} events."
587
-
588
-
589
- def get_event_by_hash(log_path: str, session_id: str, event_hash: str) -> Tuple[Optional[Dict[str, Any]], str]:
590
- sid = (session_id or "").strip()
591
- h = (event_hash or "").strip()
592
- if not sid or not h:
593
- return None, "Missing session_id or event hash."
594
-
595
- try:
596
- all_events, _corrupt = _read_jsonl_locked(log_path)
597
- except Timeout:
598
- return None, "Busy: log is locked (try again)."
599
-
600
- evs = events_for_session(all_events, sid)
601
- for e in evs:
602
- if e.get("event_hash_sha256") == h:
603
- return e, "OK."
604
- return None, "Not found."
605
-
606
-
607
- def list_sessions(log_path: str) -> Tuple[List[str], str]:
608
- try:
609
- all_events, corrupt = _read_jsonl_locked(log_path)
610
- except Timeout:
611
- return [], "Busy: log is locked (try again)."
612
-
613
- counts: Dict[str, int] = {}
614
- for e in all_events:
615
- if isinstance(e, dict) and e.get("spec") == EVENT_SPEC:
616
- sid = e.get("session_id")
617
- if sid:
618
- counts[sid] = counts.get(sid, 0) + 1
619
- sessions = sorted(counts.keys())
620
- msg = f"Found {len(sessions)} sessions. Corrupt lines ignored: {corrupt}."
621
- return sessions, msg
622
-
623
-
624
- def diagnostics(log_path: str) -> Dict[str, Any]:
625
- try:
626
- all_events, corrupt = _read_jsonl_locked(log_path)
627
- except Timeout:
628
- return {
629
- "log_path": log_path,
630
- "exists": os.path.exists(log_path),
631
- "bytes": os.path.getsize(log_path) if os.path.exists(log_path) else 0,
632
- "total_events": 0,
633
- "sessions": 0,
634
- "signed_events": 0,
635
- "corrupt_lines_ignored": 0,
636
- "error": "Busy: log is locked (try again).",
637
- }
638
-
639
- size = os.path.getsize(log_path) if os.path.exists(log_path) else 0
640
- sessions = set()
641
- signed = 0
642
- total = 0
643
- for e in all_events:
644
- if isinstance(e, dict) and e.get("spec") == EVENT_SPEC:
645
- total += 1
646
- if e.get("session_id"):
647
- sessions.add(e["session_id"])
648
- if e.get("signature_ed25519"):
649
- signed += 1
650
- return {
651
- "log_path": log_path,
652
- "exists": os.path.exists(log_path),
653
- "bytes": size,
654
- "total_events": total,
655
- "sessions": len(sessions),
656
- "signed_events": signed,
657
- "corrupt_lines_ignored": corrupt,
658
- }
659
-
660
-
661
- # ============================================================
662
- # Export bundle
663
- # ============================================================
664
-
665
- def export_session_bundle(log_path: str, session_id: str) -> Tuple[Optional[str], str]:
666
- sid = (session_id or "").strip()
667
- if not sid:
668
- return None, "Missing session_id."
669
-
670
- try:
671
- all_events, _corrupt = _read_jsonl_locked(log_path)
672
- except Timeout:
673
- return None, "Busy: log is locked (try again)."
674
-
675
- evs = events_for_session(all_events, sid)
676
- if not evs:
677
- return None, "No events found."
678
-
679
- evs.sort(key=lambda x: int(x.get("seq", 0)))
680
- status, ok, report = verify_session_from_events(evs, sid, pk_hex="", require_signatures=False)
681
-
682
- zip_name = f"rft_flight_bundle_{sid}.zip"
683
- tmp_dir = "tmp_export"
684
- os.makedirs(tmp_dir, exist_ok=True)
685
-
686
- events_path = os.path.join(tmp_dir, f"{sid}_events.jsonl")
687
- report_path = os.path.join(tmp_dir, f"{sid}_verify_report.txt")
688
-
689
- with open(events_path, "w", encoding="utf-8") as f:
690
- for e in evs:
691
- f.write(json.dumps(e, ensure_ascii=False) + "\n")
692
-
693
- with open(report_path, "w", encoding="utf-8") as f:
694
- f.write(f"session_id: {sid}\n")
695
- f.write(f"status: {status}\n")
696
- f.write(f"ok: {ok}\n\n")
697
- f.write(report + "\n")
698
-
699
- with zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as z:
700
- z.write(events_path, arcname=os.path.basename(events_path))
701
- z.write(report_path, arcname=os.path.basename(report_path))
702
-
703
- return zip_name, f"OK. Exported {zip_name} ({len(evs)} events)."
704
-
705
-
706
- # ============================================================
707
- # Import bundle (third-party verification)
708
- # ============================================================
709
-
710
- def _zip_member_safe(z: zipfile.ZipFile, member: str) -> Tuple[bool, str]:
711
- try:
712
- info = z.getinfo(member)
713
- except Exception:
714
- return False, "Missing events member in zip."
715
-
716
- # Decompressed size cap
717
- if info.file_size > MAX_ZIP_MEMBER_BYTES:
718
- return False, "Events file too large (decompressed)."
719
-
720
- # Zip-bomb heuristic: insane compression ratio
721
- if info.compress_size and info.compress_size > 0:
722
- ratio = float(info.file_size) / float(info.compress_size)
723
- if ratio > MAX_ZIP_RATIO:
724
- return False, "Suspicious zip compression ratio (possible zip bomb)."
725
-
726
- return True, "OK"
727
-
728
-
729
- def _read_jsonl_from_zip(z: zipfile.ZipFile, member: str) -> List[Dict[str, Any]]:
730
- out: List[Dict[str, Any]] = []
731
- raw_text = z.read(member).decode("utf-8", errors="replace")
732
-
733
- for raw in raw_text.splitlines():
734
- raw = raw.strip()
735
- if not raw:
736
- continue
737
-
738
- # single-line cap
739
- if len(raw.encode("utf-8", errors="ignore")) > MAX_EVENT_LINE_BYTES:
740
- continue
741
-
742
- try:
743
- obj = json.loads(raw)
744
- if isinstance(obj, dict):
745
- out.append(obj)
746
- except Exception:
747
- continue
748
-
749
- if len(out) > MAX_EVENTS_PER_BUNDLE:
750
- break
751
-
752
- return out
753
-
754
-
755
- def import_bundle_verify(
756
- bundle_path: str,
757
- pk_hex: str = "",
758
- require_signatures: bool = False,
759
- store_into_log: bool = False,
760
- log_path: str = DEFAULT_LOG_PATH,
761
- ) -> Tuple[str, bool, str, Optional[str]]:
762
- if not bundle_path or not os.path.exists(bundle_path):
763
- return "Missing bundle file.", False, "", None
764
-
765
- # zip upload cap
766
- try:
767
- if os.path.getsize(bundle_path) > MAX_BUNDLE_BYTES:
768
- return "FAIL", False, "Bundle too large.", None
769
- except Exception:
770
- return "FAIL", False, "Unable to read bundle size.", None
771
-
772
- try:
773
- with zipfile.ZipFile(bundle_path, "r") as z:
774
- members = z.namelist()
775
-
776
- events_member = None
777
- for m in members:
778
- if m.endswith("_events.jsonl"):
779
- events_member = m
780
- break
781
- if not events_member:
782
- for m in members:
783
- if m.endswith(".jsonl"):
784
- events_member = m
785
- break
786
- if not events_member:
787
- return "No .jsonl events file found in bundle.", False, "", None
788
-
789
- safe, why = _zip_member_safe(z, events_member)
790
- if not safe:
791
- return "FAIL", False, why, None
792
-
793
- evs = _read_jsonl_from_zip(z, events_member)
794
-
795
- except Exception:
796
- return "Failed to read bundle (invalid zip?).", False, "", None
797
-
798
- if not evs:
799
- return "Bundle contains no usable events.", False, "", None
800
-
801
- if len(evs) > MAX_EVENTS_PER_BUNDLE:
802
- return "FAIL", False, "Too many events in bundle.", None
803
-
804
- # Determine session_id
805
- sid = ""
806
- for e in evs:
807
- if e.get("spec") == EVENT_SPEC and e.get("session_id"):
808
- sid = e["session_id"]
809
- break
810
- if not sid:
811
- return "Bundle contains no valid flight events.", False, "", None
812
-
813
- evs = [e for e in evs if e.get("spec") == EVENT_SPEC and e.get("session_id") == sid]
814
- evs.sort(key=lambda x: int(x.get("seq", 0)))
815
-
816
- status, ok, report = verify_session_from_events(
817
- evs, sid, pk_hex=pk_hex, require_signatures=require_signatures
818
- )
819
 
820
- if store_into_log and ok:
821
- # Store under a lock + dedupe by event_hash_sha256 to avoid log pollution
822
- try:
823
- with _lock_for(log_path):
824
- existing, _ = read_jsonl(log_path)
825
- seen = {e.get("event_hash_sha256") for e in existing if isinstance(e, dict)}
 
 
 
826
 
827
- for e in evs:
828
- h = e.get("event_hash_sha256")
829
- if h and h not in seen:
830
- append_jsonl(log_path, e)
831
- seen.add(h)
832
 
833
- except Timeout:
834
- return "FAIL", False, "Busy: log is locked (try again).", None
835
 
836
- stored_msg = "Stored into local flightlog.jsonl." if (store_into_log and ok) else None
837
- return status, ok, report, stored_msg
 
 
1
  import os
2
+ import json
3
+ import threading
4
+ import tempfile
5
  import zipfile
 
 
6
 
7
+ import rft_flightrecorder as fr
8
 
 
 
 
 
 
9
 
10
+ def two_tab_spam_test(log_path: str, threads: int = 6, events_per_thread: int = 80):
11
+ sk_hex, pk_hex = fr.gen_keys()
12
+ sid, msg = fr.start_session(log_path, "brutal-test", "deterministic", "spam test", False, sk_hex)
13
+ assert sid, f"Failed to start session: {msg}"
14
 
15
+ errors = []
 
 
16
 
17
+ def worker(tid: int):
18
+ for i in range(events_per_thread):
19
+ payload = json.dumps({"thread": tid, "i": i}, ensure_ascii=False)
20
+ ev, m = fr.append_event(
21
+ log_path=log_path,
22
+ session_id=sid,
23
+ event_type="note",
24
+ payload_text=payload,
25
+ parent_event_hash="",
26
+ sign_event=False,
27
+ sk_hex=sk_hex,
28
+ model_id="brutal-test",
29
+ run_mode="deterministic",
30
+ )
31
+ if not ev:
32
+ errors.append(m)
33
 
34
+ ts = [threading.Thread(target=worker, args=(t,)) for t in range(threads)]
35
+ for t in ts:
36
+ t.start()
37
+ for t in ts:
38
+ t.join()
 
39
 
40
+ assert not errors, f"Append errors occurred (first 5): {errors[:5]}"
41
 
42
+ status, ok, report = fr.verify_session(log_path, sid, pk_hex="", require_signatures=False)
43
+ assert ok, f"Session failed verification after spam.\n{status}\n{report}"
 
44
 
45
+ anchor, m = fr.finalise_session(log_path, sid, False, sk_hex, "brutal-test", "deterministic")
46
+ assert anchor, f"Finalise failed: {m}"
 
 
 
 
 
 
47
 
48
+ # Must refuse writes after end
49
+ ev, m = fr.append_event(
50
+ log_path=log_path,
51
+ session_id=sid,
52
+ event_type="note",
53
+ payload_text='{"post_end": true}',
54
+ parent_event_hash="",
55
+ sign_event=False,
56
+ sk_hex=sk_hex,
57
+ model_id="brutal-test",
58
+ run_mode="deterministic",
59
+ )
60
+ assert ev is None and "ended" in m.lower(), f"Expected append to be refused after session_end, got: {m}"
61
 
62
+ status, ok, report = fr.verify_session(log_path, sid, pk_hex="", require_signatures=False)
63
+ assert ok, f"Session failed verification after finalise.\n{status}\n{report}"
64
 
65
+ return sid
66
 
 
 
67
 
68
+ def tamper_zip_test(log_path: str):
69
+ sk_hex, _pk_hex = fr.gen_keys()
70
+ sid, msg = fr.start_session(log_path, "brutal-test", "deterministic", "tamper test", False, sk_hex)
71
+ assert sid, f"Failed to start session: {msg}"
72
 
73
+ for i in range(25):
74
+ fr.append_event(
75
+ log_path=log_path,
76
+ session_id=sid,
77
+ event_type="note",
78
+ payload_text=json.dumps({"i": i}),
79
+ parent_event_hash="",
80
+ sign_event=False,
81
+ sk_hex=sk_hex,
82
+ model_id="brutal-test",
83
+ run_mode="deterministic",
84
+ )
85
 
86
+ fr.finalise_session(log_path, sid, False, sk_hex, "brutal-test", "deterministic")
87
 
88
+ zip_name, msg = fr.export_session_bundle(log_path, sid)
89
+ assert zip_name and os.path.exists(zip_name), f"Export failed: {msg}"
 
90
 
91
+ with tempfile.TemporaryDirectory() as td:
92
+ with zipfile.ZipFile(zip_name, "r") as z:
93
+ z.extractall(td)
94
 
95
+ events_file = None
96
+ for fn in os.listdir(td):
97
+ if fn.endswith("_events.jsonl"):
98
+ events_file = os.path.join(td, fn)
99
+ break
100
+ assert events_file, "No events jsonl found inside exported zip"
101
 
102
+ lines = open(events_file, "r", encoding="utf-8").read().splitlines()
103
+ assert len(lines) > 5, "Not enough events in bundle to tamper"
 
104
 
105
+ # Tamper: modify payload of an early event but keep stored hash/signature -> MUST FAIL verification
106
+ obj = json.loads(lines[4])
107
+ obj.setdefault("payload", {})
108
+ if isinstance(obj["payload"], dict):
109
+ obj["payload"]["tampered"] = True
110
+ lines[4] = json.dumps(obj, ensure_ascii=False)
111
 
112
+ open(events_file, "w", encoding="utf-8").write("\n".join(lines) + "\n")
 
 
113
 
114
+ tampered_zip = os.path.join(td, "tampered.zip")
115
+ with zipfile.ZipFile(tampered_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
116
+ z.write(events_file, arcname=os.path.basename(events_file))
117
 
118
+ status, ok, report, _ = fr.import_bundle_verify(
119
+ bundle_path=tampered_zip,
120
+ pk_hex="",
121
+ require_signatures=False,
122
+ store_into_log=False,
123
+ log_path=log_path,
124
+ )
125
+ assert not ok, f"Tampered bundle incorrectly verified PASS.\n{status}\n{report}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ def main():
131
+ with tempfile.TemporaryDirectory() as td:
132
+ log_path = os.path.join(td, "flightlog.jsonl")
133
+
134
+ sid1 = two_tab_spam_test(log_path, threads=8, events_per_thread=60)
135
+ print(f"[PASS] two_tab_spam_test: session_id={sid1}")
136
+
137
+ ok = tamper_zip_test(log_path)
138
+ print(f"[PASS] tamper_zip_test: {ok}")
139
 
140
+ print("[ALL PASS]")
 
 
 
 
141
 
 
 
142
 
143
+ if __name__ == "__main__":
144
+ main()