import json import os import uuid import hashlib import zipfile from datetime import datetime, timezone from typing import Any, Dict, List, Optional, Tuple from filelock import FileLock, Timeout from cryptography.hazmat.primitives.asymmetric.ed25519 import ( Ed25519PrivateKey, Ed25519PublicKey, ) from cryptography.hazmat.primitives import serialization EVENT_SPEC = "rft-flight-event-v0" ROOT_SPEC = "rft-flight-session-root-v0" DEFAULT_LOG_PATH = "flightlog.jsonl" # Lock timeouts (seconds) READ_LOCK_TIMEOUT = 5.0 WRITE_LOCK_TIMEOUT = 5.0 # ============================================================ # Canonical JSON + hashing # ============================================================ def canon(obj) -> bytes: """Canonical JSON encoding (stable for hashing/verifying across machines).""" return json.dumps( obj, ensure_ascii=False, sort_keys=True, separators=(",", ":"), ).encode("utf-8") def sha256_hex(b: bytes) -> str: return hashlib.sha256(b).hexdigest() def now_utc_iso() -> str: return datetime.now(timezone.utc).isoformat() def short(h: str, n: int = 12) -> str: h = (h or "").strip() return h[:n] if len(h) > n else h def _lock_path(log_path: str) -> str: return f"{log_path}.lock" # ============================================================ # Key management (Ed25519) # ============================================================ def gen_keys() -> Tuple[str, str]: sk = Ed25519PrivateKey.generate() pk = sk.public_key() sk_b = sk.private_bytes( encoding=serialization.Encoding.Raw, format=serialization.PrivateFormat.Raw, encryption_algorithm=serialization.NoEncryption(), ) pk_b = pk.public_bytes( encoding=serialization.Encoding.Raw, format=serialization.PublicFormat.Raw, ) return sk_b.hex(), pk_b.hex() def load_sk(sk_hex: str) -> Ed25519PrivateKey: return Ed25519PrivateKey.from_private_bytes(bytes.fromhex(sk_hex.strip())) def load_pk(pk_hex: str) -> Ed25519PublicKey: return Ed25519PublicKey.from_public_bytes(bytes.fromhex(pk_hex.strip())) # ============================================================ # Payload parsing # ============================================================ def parse_payload_text(payload_text: str) -> Dict[str, Any]: """Accept JSON or plain text; store as structured payload either way.""" txt = (payload_text or "").strip() if not txt: return {} try: v = json.loads(txt) if isinstance(v, dict): return v return {"_value": v} except Exception: return {"_text": txt} # ============================================================ # Log IO (locked) # ============================================================ def read_jsonl(path: str) -> Tuple[List[Dict[str, Any]], int]: """Read JSONL. Returns (records, corrupt_lines_count). Locked to avoid torn reads.""" if not os.path.exists(path): return [], 0 lock = FileLock(_lock_path(path)) try: with lock.acquire(timeout=READ_LOCK_TIMEOUT): return _read_jsonl_unlocked(path) except Timeout: # If we cannot lock, refuse to guess. return [], 0 def _read_jsonl_unlocked(path: str) -> Tuple[List[Dict[str, Any]], int]: out: List[Dict[str, Any]] = [] corrupt = 0 with open(path, "r", encoding="utf-8") as f: for ln in f: ln = ln.strip() if not ln: continue try: obj = json.loads(ln) if isinstance(obj, dict): out.append(obj) else: corrupt += 1 except Exception: corrupt += 1 return out, corrupt def append_jsonl(path: str, obj: Dict[str, Any]) -> None: """Append one JSON object as a single line. Locked by caller for atomic sequences.""" with open(path, "a", encoding="utf-8") as f: f.write(json.dumps(obj, ensure_ascii=False) + "\n") # ============================================================ # Core event model # ============================================================ def make_event_core( session_id: str, seq: int, event_type: str, payload: Dict[str, Any], parent_event_hash: str, prev_event_hash: str, meta: Dict[str, Any], ) -> Dict[str, Any]: return { "spec": EVENT_SPEC, "ts_utc": now_utc_iso(), "session_id": session_id, "seq": int(seq), "event_type": (event_type or "note").strip(), "parent_event_hash_sha256": (parent_event_hash or "").strip(), "prev_event_hash_sha256": (prev_event_hash or "").strip() if prev_event_hash else ("0" * 64), "payload": payload if isinstance(payload, dict) else {"_payload": payload}, "meta": meta if isinstance(meta, dict) else {}, } def events_for_session(all_events: List[Dict[str, Any]], session_id: str) -> List[Dict[str, Any]]: sid = (session_id or "").strip() if not sid: return [] return [ e for e in all_events if isinstance(e, dict) and e.get("spec") == EVENT_SPEC and e.get("session_id") == sid and "event_hash_sha256" in e ] def _session_has_ended(evs: List[Dict[str, Any]]) -> bool: return any((e.get("event_type") == "session_end") for e in evs) # ============================================================ # Append events (hash-chained; optional signature) # ============================================================ def append_event( log_path: str, session_id: str, event_type: str, payload_text: str, parent_event_hash: str, sign_event: bool, sk_hex: str, model_id: str, run_mode: str, ) -> Tuple[Optional[Dict[str, Any]], str]: sid = (session_id or "").strip() if not sid: return None, "Missing session_id." lock = FileLock(_lock_path(log_path)) try: with lock.acquire(timeout=WRITE_LOCK_TIMEOUT): all_events, _corrupt = _read_jsonl_unlocked(log_path) if os.path.exists(log_path) else ([], 0) evs = events_for_session(all_events, sid) evs.sort(key=lambda x: int(x.get("seq", 0))) if _session_has_ended(evs): return None, "Refused: session has ended (session_end already recorded)." payload = parse_payload_text(payload_text) meta = { "model_id": (model_id or "unknown").strip(), "run_mode": (run_mode or "unknown").strip(), } last = evs[-1] if evs else None prev_hash = last.get("event_hash_sha256") if last else ("0" * 64) seq = (int(last.get("seq", 0)) + 1) if last else 1 parent = (parent_event_hash or "").strip() if not parent and seq > 1: parent = prev_hash core = make_event_core( session_id=sid, seq=seq, event_type=event_type, payload=payload, parent_event_hash=parent, prev_event_hash=prev_hash, meta=meta, ) event_hash = sha256_hex(canon(core)) event = dict(core) event["event_hash_sha256"] = event_hash if sign_event: if not sk_hex or not sk_hex.strip(): return None, "Signing enabled but private key is missing." try: sk = load_sk(sk_hex) sig = sk.sign(bytes.fromhex(event_hash)) event["signature_ed25519"] = sig.hex() except Exception: return None, "Failed to sign event (invalid private key?)." append_jsonl(log_path, event) return event, f"OK. Appended event #{seq} ({event_type}). Hash: {event_hash}" except Timeout: return None, "Busy: log file is locked. Retry." def start_session( log_path: str, model_id: str, run_mode: str, notes: str, sign_start: bool, sk_hex: str, ) -> Tuple[str, str]: sid = uuid.uuid4().hex payload = {"notes": (notes or "").strip()} ev, msg = append_event( log_path=log_path, session_id=sid, event_type="session_start", payload_text=json.dumps(payload, ensure_ascii=False), parent_event_hash="", sign_event=sign_start, sk_hex=sk_hex, model_id=model_id, run_mode=run_mode, ) if not ev: return "", msg return sid, f"OK. Session started: {sid} (first hash: {ev['event_hash_sha256']})" def finalise_session( log_path: str, session_id: str, sign_anchor: bool, sk_hex: str, model_id: str, run_mode: str, ) -> Tuple[Optional[Dict[str, Any]], str]: sid = (session_id or "").strip() if not sid: return None, "Missing session_id." lock = FileLock(_lock_path(log_path)) try: with lock.acquire(timeout=WRITE_LOCK_TIMEOUT): all_events, _corrupt = _read_jsonl_unlocked(log_path) if os.path.exists(log_path) else ([], 0) evs = events_for_session(all_events, sid) if not evs: return None, "No events found for this session." evs.sort(key=lambda x: int(x.get("seq", 0))) if _session_has_ended(evs): return None, "Refused: session already finalised (session_end exists)." # Anchor must describe the chain BEFORE session_end (avoids circular dependency) first_hash = evs[0]["event_hash_sha256"] last_hash = evs[-1]["event_hash_sha256"] count = len(evs) root_core = { "spec": ROOT_SPEC, "session_id": sid, "first_event_hash_sha256": first_hash, "last_event_hash_sha256": last_hash, "event_count": count, } root_hash = sha256_hex(canon(root_core)) anchor = dict(root_core) anchor["root_hash_sha256"] = root_hash anchor["created_utc"] = now_utc_iso() anchor["model_id"] = (model_id or "unknown").strip() anchor["run_mode"] = (run_mode or "unknown").strip() if sign_anchor: if not sk_hex or not sk_hex.strip(): return None, "Anchor signing enabled but private key is missing." try: sk = load_sk(sk_hex) sig = sk.sign(bytes.fromhex(root_hash)) anchor["signature_ed25519"] = sig.hex() except Exception: return None, "Failed to sign anchor (invalid private key?)." # Append session_end event containing the anchor (under same lock) payload_text = json.dumps({"anchor": anchor}, ensure_ascii=False) seq = int(evs[-1].get("seq", 0)) + 1 core = make_event_core( session_id=sid, seq=seq, event_type="session_end", payload=parse_payload_text(payload_text), parent_event_hash=last_hash, # must point to last pre-end event prev_event_hash=last_hash, meta={ "model_id": (model_id or "unknown").strip(), "run_mode": (run_mode or "unknown").strip(), }, ) event_hash = sha256_hex(canon(core)) event = dict(core) event["event_hash_sha256"] = event_hash if sign_anchor: try: sk = load_sk(sk_hex) sig = sk.sign(bytes.fromhex(event_hash)) event["signature_ed25519"] = sig.hex() except Exception: return None, "Failed to sign session_end event (invalid private key?)." append_jsonl(log_path, event) return anchor, f"OK. Session finalised. Root hash: {root_hash} (last event hash: {event_hash})" except Timeout: return None, "Busy: log file is locked. Retry." # ============================================================ # Verification # ============================================================ def verify_session_from_events( evs: List[Dict[str, Any]], session_id: str, pk_hex: str = "", require_signatures: bool = False, ) -> Tuple[str, bool, str]: sid = (session_id or "").strip() if not sid: return "Missing session_id.", False, "" if not evs: return "No events found for this session.", False, "" report: List[str] = [] ok = True pk = None if require_signatures: if not pk_hex or not pk_hex.strip(): return "Public key required to verify signatures.", False, "" try: pk = load_pk(pk_hex) except Exception: return "Invalid public key.", False, "" expected_prev = "0" * 64 expected_seq = 1 for i, e in enumerate(evs): for k in ("spec", "ts_utc", "session_id", "seq", "event_type", "prev_event_hash_sha256", "payload", "meta", "event_hash_sha256"): if k not in e: ok = False report.append(f"[FAIL] Event {i+1}: missing field '{k}'.") continue if e.get("spec") != EVENT_SPEC: ok = False report.append(f"[FAIL] Bad spec at seq {e.get('seq')}.") continue if int(e.get("seq", -1)) != expected_seq: ok = False report.append(f"[FAIL] Seq mismatch: got {e.get('seq')} expected {expected_seq}.") expected_seq += 1 if e.get("prev_event_hash_sha256") != expected_prev: ok = False report.append( f"[FAIL] Chain broken at seq {e.get('seq')}: prev {short(e.get('prev_event_hash_sha256'))} expected {short(expected_prev)}." ) core = { "spec": e["spec"], "ts_utc": e["ts_utc"], "session_id": e["session_id"], "seq": int(e["seq"]), "event_type": e["event_type"], "parent_event_hash_sha256": e.get("parent_event_hash_sha256", ""), "prev_event_hash_sha256": e["prev_event_hash_sha256"], "payload": e["payload"], "meta": e["meta"], } h = sha256_hex(canon(core)) if h != e["event_hash_sha256"]: ok = False report.append(f"[FAIL] Hash mismatch at seq {e.get('seq')}: stored {short(e['event_hash_sha256'])} recomputed {short(h)}.") if require_signatures: sig_hex = (e.get("signature_ed25519") or "").strip() if not sig_hex: ok = False report.append(f"[FAIL] Missing signature at seq {e.get('seq')}.") else: try: pk.verify(bytes.fromhex(sig_hex), bytes.fromhex(e["event_hash_sha256"])) except Exception: ok = False report.append(f"[FAIL] Bad signature at seq {e.get('seq')}.") expected_prev = e["event_hash_sha256"] # Anchor check: anchor describes chain BEFORE session_end (avoids circular dependency) end_events = [e for e in evs if e.get("event_type") == "session_end"] if end_events: end_events.sort(key=lambda x: int(x.get("seq", 0))) se = end_events[-1] se_seq = int(se.get("seq", 0)) pre = [e for e in evs if int(e.get("seq", 0)) < se_seq] pre.sort(key=lambda x: int(x.get("seq", 0))) anchor = (se.get("payload") or {}).get("anchor") if isinstance(anchor, dict) and pre: first_hash = pre[0]["event_hash_sha256"] last_hash = pre[-1]["event_hash_sha256"] count = len(pre) if (se.get("parent_event_hash_sha256") or "") != last_hash: ok = False report.append("[FAIL] session_end parent hash does not match last pre-end event.") root_core = { "spec": ROOT_SPEC, "session_id": sid, "first_event_hash_sha256": first_hash, "last_event_hash_sha256": last_hash, "event_count": count, } root_hash = sha256_hex(canon(root_core)) if anchor.get("root_hash_sha256") != root_hash: ok = False report.append("[FAIL] Session anchor root hash does not match pre-end event chain.") else: report.append("[OK] Session anchor matches pre-end event chain.") else: report.append("[WARN] session_end found but anchor payload is missing/invalid, or chain is empty.") if ok: report.insert(0, f"[PASS] Session verified: {len(evs)} events, chain intact.") else: report.insert(0, f"[FAIL] Session verification failed: {len(evs)} events checked.") return ("PASS" if ok else "FAIL"), ok, "\n".join(report) def verify_session(log_path: str, session_id: str, pk_hex: str, require_signatures: bool) -> Tuple[str, bool, str]: all_events, _corrupt = read_jsonl(log_path) evs = events_for_session(all_events, session_id) evs.sort(key=lambda x: int(x.get("seq", 0))) return verify_session_from_events(evs, session_id, pk_hex=pk_hex, require_signatures=require_signatures) # ============================================================ # Timeline + listing # ============================================================ def session_timeline_rows(log_path: str, session_id: str) -> Tuple[List[List[Any]], str]: sid = (session_id or "").strip() if not sid: return [], "Missing session_id." all_events, _corrupt = read_jsonl(log_path) evs = events_for_session(all_events, sid) if not evs: return [], "No events found." evs.sort(key=lambda x: int(x.get("seq", 0))) rows: List[List[Any]] = [] for e in evs: rows.append([ int(e.get("seq", 0)), e.get("ts_utc", ""), e.get("event_type", ""), e.get("meta", {}).get("model_id", ""), e.get("meta", {}).get("run_mode", ""), e.get("parent_event_hash_sha256", ""), e.get("prev_event_hash_sha256", ""), e.get("event_hash_sha256", ""), "yes" if e.get("signature_ed25519") else "no", ]) return rows, f"Loaded {len(rows)} events." def get_event_by_hash(log_path: str, session_id: str, event_hash: str) -> Tuple[Optional[Dict[str, Any]], str]: sid = (session_id or "").strip() h = (event_hash or "").strip() if not sid or not h: return None, "Missing session_id or event hash." all_events, _corrupt = read_jsonl(log_path) evs = events_for_session(all_events, sid) for e in evs: if e.get("event_hash_sha256") == h: return e, "OK." return None, "Not found." def list_sessions(log_path: str) -> Tuple[List[str], str]: all_events, corrupt = read_jsonl(log_path) counts: Dict[str, int] = {} for e in all_events: if isinstance(e, dict) and e.get("spec") == EVENT_SPEC: sid = e.get("session_id") if sid: counts[sid] = counts.get(sid, 0) + 1 sessions = sorted(counts.keys()) msg = f"Found {len(sessions)} sessions. Corrupt lines ignored: {corrupt}." return sessions, msg def diagnostics(log_path: str) -> Dict[str, Any]: all_events, corrupt = read_jsonl(log_path) size = os.path.getsize(log_path) if os.path.exists(log_path) else 0 sessions = set() signed = 0 total = 0 for e in all_events: if isinstance(e, dict) and e.get("spec") == EVENT_SPEC: total += 1 if e.get("session_id"): sessions.add(e["session_id"]) if e.get("signature_ed25519"): signed += 1 return { "log_path": log_path, "exists": os.path.exists(log_path), "bytes": size, "total_events": total, "sessions": len(sessions), "signed_events": signed, "corrupt_lines_ignored": corrupt, } # ============================================================ # Export bundle # ============================================================ def export_session_bundle(log_path: str, session_id: str) -> Tuple[Optional[str], str]: sid = (session_id or "").strip() if not sid: return None, "Missing session_id." all_events, _corrupt = read_jsonl(log_path) evs = events_for_session(all_events, sid) if not evs: return None, "No events found." evs.sort(key=lambda x: int(x.get("seq", 0))) status, ok, report = verify_session_from_events(evs, sid, pk_hex="", require_signatures=False) zip_name = f"rft_flight_bundle_{sid}.zip" tmp_dir = "tmp_export" os.makedirs(tmp_dir, exist_ok=True) events_path = os.path.join(tmp_dir, f"{sid}_events.jsonl") report_path = os.path.join(tmp_dir, f"{sid}_verify_report.txt") with open(events_path, "w", encoding="utf-8") as f: for e in evs: f.write(json.dumps(e, ensure_ascii=False) + "\n") with open(report_path, "w", encoding="utf-8") as f: f.write(f"session_id: {sid}\n") f.write(f"status: {status}\n") f.write(f"ok: {ok}\n\n") f.write(report + "\n") with zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as z: z.write(events_path, arcname=os.path.basename(events_path)) z.write(report_path, arcname=os.path.basename(report_path)) return zip_name, f"OK. Exported {zip_name} ({len(evs)} events)." # ============================================================ # Import bundle (third-party verification) # ============================================================ def _read_jsonl_from_zip(z: zipfile.ZipFile, member: str) -> List[Dict[str, Any]]: out: List[Dict[str, Any]] = [] raw_text = z.read(member).decode("utf-8", errors="replace") for raw in raw_text.splitlines(): raw = raw.strip() if not raw: continue try: obj = json.loads(raw) if isinstance(obj, dict): out.append(obj) except Exception: continue return out def import_bundle_verify( bundle_path: str, pk_hex: str = "", require_signatures: bool = False, store_into_log: bool = False, log_path: str = DEFAULT_LOG_PATH, ) -> Tuple[str, bool, str, Optional[str]]: if not bundle_path or not os.path.exists(bundle_path): return "Missing bundle file.", False, "", None try: with zipfile.ZipFile(bundle_path, "r") as z: members = z.namelist() events_member = None for m in members: if m.endswith("_events.jsonl"): events_member = m break if not events_member: for m in members: if m.endswith(".jsonl"): events_member = m break if not events_member: return "No .jsonl events file found in bundle.", False, "", None evs = _read_jsonl_from_zip(z, events_member) except Exception: return "Failed to read bundle (invalid zip?).", False, "", None sid = "" for e in evs: if e.get("spec") == EVENT_SPEC and e.get("session_id"): sid = e["session_id"] break if not sid: return "Bundle contains no valid flight events.", False, "", None evs = [e for e in evs if e.get("spec") == EVENT_SPEC and e.get("session_id") == sid] evs.sort(key=lambda x: int(x.get("seq", 0))) status, ok, report = verify_session_from_events( evs, sid, pk_hex=pk_hex, require_signatures=require_signatures ) stored_msg = None if store_into_log and ok: lock = FileLock(_lock_path(log_path)) try: with lock.acquire(timeout=WRITE_LOCK_TIMEOUT): for e in evs: append_jsonl(log_path, e) stored_msg = "Stored into local flightlog.jsonl." except Timeout: stored_msg = "Could not store: log file is locked." return status, ok, report, stored_msg