Agent_Flight_Recorder / rft_flightrecorder.py
RFTSystems's picture
Update rft_flightrecorder.py
adb9c3b verified
import json
import os
import uuid
import hashlib
import zipfile
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
from filelock import FileLock, Timeout
from cryptography.hazmat.primitives.asymmetric.ed25519 import (
Ed25519PrivateKey,
Ed25519PublicKey,
)
from cryptography.hazmat.primitives import serialization
EVENT_SPEC = "rft-flight-event-v0"
ROOT_SPEC = "rft-flight-session-root-v0"
DEFAULT_LOG_PATH = "flightlog.jsonl"
# Lock timeouts (seconds)
READ_LOCK_TIMEOUT = 5.0
WRITE_LOCK_TIMEOUT = 5.0
# ============================================================
# Canonical JSON + hashing
# ============================================================
def canon(obj) -> bytes:
"""Canonical JSON encoding (stable for hashing/verifying across machines)."""
return json.dumps(
obj,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
).encode("utf-8")
def sha256_hex(b: bytes) -> str:
return hashlib.sha256(b).hexdigest()
def now_utc_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def short(h: str, n: int = 12) -> str:
h = (h or "").strip()
return h[:n] if len(h) > n else h
def _lock_path(log_path: str) -> str:
return f"{log_path}.lock"
# ============================================================
# Key management (Ed25519)
# ============================================================
def gen_keys() -> Tuple[str, str]:
sk = Ed25519PrivateKey.generate()
pk = sk.public_key()
sk_b = sk.private_bytes(
encoding=serialization.Encoding.Raw,
format=serialization.PrivateFormat.Raw,
encryption_algorithm=serialization.NoEncryption(),
)
pk_b = pk.public_bytes(
encoding=serialization.Encoding.Raw,
format=serialization.PublicFormat.Raw,
)
return sk_b.hex(), pk_b.hex()
def load_sk(sk_hex: str) -> Ed25519PrivateKey:
return Ed25519PrivateKey.from_private_bytes(bytes.fromhex(sk_hex.strip()))
def load_pk(pk_hex: str) -> Ed25519PublicKey:
return Ed25519PublicKey.from_public_bytes(bytes.fromhex(pk_hex.strip()))
# ============================================================
# Payload parsing
# ============================================================
def parse_payload_text(payload_text: str) -> Dict[str, Any]:
"""Accept JSON or plain text; store as structured payload either way."""
txt = (payload_text or "").strip()
if not txt:
return {}
try:
v = json.loads(txt)
if isinstance(v, dict):
return v
return {"_value": v}
except Exception:
return {"_text": txt}
# ============================================================
# Log IO (locked)
# ============================================================
def read_jsonl(path: str) -> Tuple[List[Dict[str, Any]], int]:
"""Read JSONL. Returns (records, corrupt_lines_count). Locked to avoid torn reads."""
if not os.path.exists(path):
return [], 0
lock = FileLock(_lock_path(path))
try:
with lock.acquire(timeout=READ_LOCK_TIMEOUT):
return _read_jsonl_unlocked(path)
except Timeout:
# If we cannot lock, refuse to guess.
return [], 0
def _read_jsonl_unlocked(path: str) -> Tuple[List[Dict[str, Any]], int]:
out: List[Dict[str, Any]] = []
corrupt = 0
with open(path, "r", encoding="utf-8") as f:
for ln in f:
ln = ln.strip()
if not ln:
continue
try:
obj = json.loads(ln)
if isinstance(obj, dict):
out.append(obj)
else:
corrupt += 1
except Exception:
corrupt += 1
return out, corrupt
def append_jsonl(path: str, obj: Dict[str, Any]) -> None:
"""Append one JSON object as a single line. Locked by caller for atomic sequences."""
with open(path, "a", encoding="utf-8") as f:
f.write(json.dumps(obj, ensure_ascii=False) + "\n")
# ============================================================
# Core event model
# ============================================================
def make_event_core(
session_id: str,
seq: int,
event_type: str,
payload: Dict[str, Any],
parent_event_hash: str,
prev_event_hash: str,
meta: Dict[str, Any],
) -> Dict[str, Any]:
return {
"spec": EVENT_SPEC,
"ts_utc": now_utc_iso(),
"session_id": session_id,
"seq": int(seq),
"event_type": (event_type or "note").strip(),
"parent_event_hash_sha256": (parent_event_hash or "").strip(),
"prev_event_hash_sha256": (prev_event_hash or "").strip() if prev_event_hash else ("0" * 64),
"payload": payload if isinstance(payload, dict) else {"_payload": payload},
"meta": meta if isinstance(meta, dict) else {},
}
def events_for_session(all_events: List[Dict[str, Any]], session_id: str) -> List[Dict[str, Any]]:
sid = (session_id or "").strip()
if not sid:
return []
return [
e for e in all_events
if isinstance(e, dict)
and e.get("spec") == EVENT_SPEC
and e.get("session_id") == sid
and "event_hash_sha256" in e
]
def _session_has_ended(evs: List[Dict[str, Any]]) -> bool:
return any((e.get("event_type") == "session_end") for e in evs)
# ============================================================
# Append events (hash-chained; optional signature)
# ============================================================
def append_event(
log_path: str,
session_id: str,
event_type: str,
payload_text: str,
parent_event_hash: str,
sign_event: bool,
sk_hex: str,
model_id: str,
run_mode: str,
) -> Tuple[Optional[Dict[str, Any]], str]:
sid = (session_id or "").strip()
if not sid:
return None, "Missing session_id."
lock = FileLock(_lock_path(log_path))
try:
with lock.acquire(timeout=WRITE_LOCK_TIMEOUT):
all_events, _corrupt = _read_jsonl_unlocked(log_path) if os.path.exists(log_path) else ([], 0)
evs = events_for_session(all_events, sid)
evs.sort(key=lambda x: int(x.get("seq", 0)))
if _session_has_ended(evs):
return None, "Refused: session has ended (session_end already recorded)."
payload = parse_payload_text(payload_text)
meta = {
"model_id": (model_id or "unknown").strip(),
"run_mode": (run_mode or "unknown").strip(),
}
last = evs[-1] if evs else None
prev_hash = last.get("event_hash_sha256") if last else ("0" * 64)
seq = (int(last.get("seq", 0)) + 1) if last else 1
parent = (parent_event_hash or "").strip()
if not parent and seq > 1:
parent = prev_hash
core = make_event_core(
session_id=sid,
seq=seq,
event_type=event_type,
payload=payload,
parent_event_hash=parent,
prev_event_hash=prev_hash,
meta=meta,
)
event_hash = sha256_hex(canon(core))
event = dict(core)
event["event_hash_sha256"] = event_hash
if sign_event:
if not sk_hex or not sk_hex.strip():
return None, "Signing enabled but private key is missing."
try:
sk = load_sk(sk_hex)
sig = sk.sign(bytes.fromhex(event_hash))
event["signature_ed25519"] = sig.hex()
except Exception:
return None, "Failed to sign event (invalid private key?)."
append_jsonl(log_path, event)
return event, f"OK. Appended event #{seq} ({event_type}). Hash: {event_hash}"
except Timeout:
return None, "Busy: log file is locked. Retry."
def start_session(
log_path: str,
model_id: str,
run_mode: str,
notes: str,
sign_start: bool,
sk_hex: str,
) -> Tuple[str, str]:
sid = uuid.uuid4().hex
payload = {"notes": (notes or "").strip()}
ev, msg = append_event(
log_path=log_path,
session_id=sid,
event_type="session_start",
payload_text=json.dumps(payload, ensure_ascii=False),
parent_event_hash="",
sign_event=sign_start,
sk_hex=sk_hex,
model_id=model_id,
run_mode=run_mode,
)
if not ev:
return "", msg
return sid, f"OK. Session started: {sid} (first hash: {ev['event_hash_sha256']})"
def finalise_session(
log_path: str,
session_id: str,
sign_anchor: bool,
sk_hex: str,
model_id: str,
run_mode: str,
) -> Tuple[Optional[Dict[str, Any]], str]:
sid = (session_id or "").strip()
if not sid:
return None, "Missing session_id."
lock = FileLock(_lock_path(log_path))
try:
with lock.acquire(timeout=WRITE_LOCK_TIMEOUT):
all_events, _corrupt = _read_jsonl_unlocked(log_path) if os.path.exists(log_path) else ([], 0)
evs = events_for_session(all_events, sid)
if not evs:
return None, "No events found for this session."
evs.sort(key=lambda x: int(x.get("seq", 0)))
if _session_has_ended(evs):
return None, "Refused: session already finalised (session_end exists)."
# Anchor must describe the chain BEFORE session_end (avoids circular dependency)
first_hash = evs[0]["event_hash_sha256"]
last_hash = evs[-1]["event_hash_sha256"]
count = len(evs)
root_core = {
"spec": ROOT_SPEC,
"session_id": sid,
"first_event_hash_sha256": first_hash,
"last_event_hash_sha256": last_hash,
"event_count": count,
}
root_hash = sha256_hex(canon(root_core))
anchor = dict(root_core)
anchor["root_hash_sha256"] = root_hash
anchor["created_utc"] = now_utc_iso()
anchor["model_id"] = (model_id or "unknown").strip()
anchor["run_mode"] = (run_mode or "unknown").strip()
if sign_anchor:
if not sk_hex or not sk_hex.strip():
return None, "Anchor signing enabled but private key is missing."
try:
sk = load_sk(sk_hex)
sig = sk.sign(bytes.fromhex(root_hash))
anchor["signature_ed25519"] = sig.hex()
except Exception:
return None, "Failed to sign anchor (invalid private key?)."
# Append session_end event containing the anchor (under same lock)
payload_text = json.dumps({"anchor": anchor}, ensure_ascii=False)
seq = int(evs[-1].get("seq", 0)) + 1
core = make_event_core(
session_id=sid,
seq=seq,
event_type="session_end",
payload=parse_payload_text(payload_text),
parent_event_hash=last_hash, # must point to last pre-end event
prev_event_hash=last_hash,
meta={
"model_id": (model_id or "unknown").strip(),
"run_mode": (run_mode or "unknown").strip(),
},
)
event_hash = sha256_hex(canon(core))
event = dict(core)
event["event_hash_sha256"] = event_hash
if sign_anchor:
try:
sk = load_sk(sk_hex)
sig = sk.sign(bytes.fromhex(event_hash))
event["signature_ed25519"] = sig.hex()
except Exception:
return None, "Failed to sign session_end event (invalid private key?)."
append_jsonl(log_path, event)
return anchor, f"OK. Session finalised. Root hash: {root_hash} (last event hash: {event_hash})"
except Timeout:
return None, "Busy: log file is locked. Retry."
# ============================================================
# Verification
# ============================================================
def verify_session_from_events(
evs: List[Dict[str, Any]],
session_id: str,
pk_hex: str = "",
require_signatures: bool = False,
) -> Tuple[str, bool, str]:
sid = (session_id or "").strip()
if not sid:
return "Missing session_id.", False, ""
if not evs:
return "No events found for this session.", False, ""
report: List[str] = []
ok = True
pk = None
if require_signatures:
if not pk_hex or not pk_hex.strip():
return "Public key required to verify signatures.", False, ""
try:
pk = load_pk(pk_hex)
except Exception:
return "Invalid public key.", False, ""
expected_prev = "0" * 64
expected_seq = 1
for i, e in enumerate(evs):
for k in ("spec", "ts_utc", "session_id", "seq", "event_type", "prev_event_hash_sha256", "payload", "meta", "event_hash_sha256"):
if k not in e:
ok = False
report.append(f"[FAIL] Event {i+1}: missing field '{k}'.")
continue
if e.get("spec") != EVENT_SPEC:
ok = False
report.append(f"[FAIL] Bad spec at seq {e.get('seq')}.")
continue
if int(e.get("seq", -1)) != expected_seq:
ok = False
report.append(f"[FAIL] Seq mismatch: got {e.get('seq')} expected {expected_seq}.")
expected_seq += 1
if e.get("prev_event_hash_sha256") != expected_prev:
ok = False
report.append(
f"[FAIL] Chain broken at seq {e.get('seq')}: prev {short(e.get('prev_event_hash_sha256'))} expected {short(expected_prev)}."
)
core = {
"spec": e["spec"],
"ts_utc": e["ts_utc"],
"session_id": e["session_id"],
"seq": int(e["seq"]),
"event_type": e["event_type"],
"parent_event_hash_sha256": e.get("parent_event_hash_sha256", ""),
"prev_event_hash_sha256": e["prev_event_hash_sha256"],
"payload": e["payload"],
"meta": e["meta"],
}
h = sha256_hex(canon(core))
if h != e["event_hash_sha256"]:
ok = False
report.append(f"[FAIL] Hash mismatch at seq {e.get('seq')}: stored {short(e['event_hash_sha256'])} recomputed {short(h)}.")
if require_signatures:
sig_hex = (e.get("signature_ed25519") or "").strip()
if not sig_hex:
ok = False
report.append(f"[FAIL] Missing signature at seq {e.get('seq')}.")
else:
try:
pk.verify(bytes.fromhex(sig_hex), bytes.fromhex(e["event_hash_sha256"]))
except Exception:
ok = False
report.append(f"[FAIL] Bad signature at seq {e.get('seq')}.")
expected_prev = e["event_hash_sha256"]
# Anchor check: anchor describes chain BEFORE session_end (avoids circular dependency)
end_events = [e for e in evs if e.get("event_type") == "session_end"]
if end_events:
end_events.sort(key=lambda x: int(x.get("seq", 0)))
se = end_events[-1]
se_seq = int(se.get("seq", 0))
pre = [e for e in evs if int(e.get("seq", 0)) < se_seq]
pre.sort(key=lambda x: int(x.get("seq", 0)))
anchor = (se.get("payload") or {}).get("anchor")
if isinstance(anchor, dict) and pre:
first_hash = pre[0]["event_hash_sha256"]
last_hash = pre[-1]["event_hash_sha256"]
count = len(pre)
if (se.get("parent_event_hash_sha256") or "") != last_hash:
ok = False
report.append("[FAIL] session_end parent hash does not match last pre-end event.")
root_core = {
"spec": ROOT_SPEC,
"session_id": sid,
"first_event_hash_sha256": first_hash,
"last_event_hash_sha256": last_hash,
"event_count": count,
}
root_hash = sha256_hex(canon(root_core))
if anchor.get("root_hash_sha256") != root_hash:
ok = False
report.append("[FAIL] Session anchor root hash does not match pre-end event chain.")
else:
report.append("[OK] Session anchor matches pre-end event chain.")
else:
report.append("[WARN] session_end found but anchor payload is missing/invalid, or chain is empty.")
if ok:
report.insert(0, f"[PASS] Session verified: {len(evs)} events, chain intact.")
else:
report.insert(0, f"[FAIL] Session verification failed: {len(evs)} events checked.")
return ("PASS" if ok else "FAIL"), ok, "\n".join(report)
def verify_session(log_path: str, session_id: str, pk_hex: str, require_signatures: bool) -> Tuple[str, bool, str]:
all_events, _corrupt = read_jsonl(log_path)
evs = events_for_session(all_events, session_id)
evs.sort(key=lambda x: int(x.get("seq", 0)))
return verify_session_from_events(evs, session_id, pk_hex=pk_hex, require_signatures=require_signatures)
# ============================================================
# Timeline + listing
# ============================================================
def session_timeline_rows(log_path: str, session_id: str) -> Tuple[List[List[Any]], str]:
sid = (session_id or "").strip()
if not sid:
return [], "Missing session_id."
all_events, _corrupt = read_jsonl(log_path)
evs = events_for_session(all_events, sid)
if not evs:
return [], "No events found."
evs.sort(key=lambda x: int(x.get("seq", 0)))
rows: List[List[Any]] = []
for e in evs:
rows.append([
int(e.get("seq", 0)),
e.get("ts_utc", ""),
e.get("event_type", ""),
e.get("meta", {}).get("model_id", ""),
e.get("meta", {}).get("run_mode", ""),
e.get("parent_event_hash_sha256", ""),
e.get("prev_event_hash_sha256", ""),
e.get("event_hash_sha256", ""),
"yes" if e.get("signature_ed25519") else "no",
])
return rows, f"Loaded {len(rows)} events."
def get_event_by_hash(log_path: str, session_id: str, event_hash: str) -> Tuple[Optional[Dict[str, Any]], str]:
sid = (session_id or "").strip()
h = (event_hash or "").strip()
if not sid or not h:
return None, "Missing session_id or event hash."
all_events, _corrupt = read_jsonl(log_path)
evs = events_for_session(all_events, sid)
for e in evs:
if e.get("event_hash_sha256") == h:
return e, "OK."
return None, "Not found."
def list_sessions(log_path: str) -> Tuple[List[str], str]:
all_events, corrupt = read_jsonl(log_path)
counts: Dict[str, int] = {}
for e in all_events:
if isinstance(e, dict) and e.get("spec") == EVENT_SPEC:
sid = e.get("session_id")
if sid:
counts[sid] = counts.get(sid, 0) + 1
sessions = sorted(counts.keys())
msg = f"Found {len(sessions)} sessions. Corrupt lines ignored: {corrupt}."
return sessions, msg
def diagnostics(log_path: str) -> Dict[str, Any]:
all_events, corrupt = read_jsonl(log_path)
size = os.path.getsize(log_path) if os.path.exists(log_path) else 0
sessions = set()
signed = 0
total = 0
for e in all_events:
if isinstance(e, dict) and e.get("spec") == EVENT_SPEC:
total += 1
if e.get("session_id"):
sessions.add(e["session_id"])
if e.get("signature_ed25519"):
signed += 1
return {
"log_path": log_path,
"exists": os.path.exists(log_path),
"bytes": size,
"total_events": total,
"sessions": len(sessions),
"signed_events": signed,
"corrupt_lines_ignored": corrupt,
}
# ============================================================
# Export bundle
# ============================================================
def export_session_bundle(log_path: str, session_id: str) -> Tuple[Optional[str], str]:
sid = (session_id or "").strip()
if not sid:
return None, "Missing session_id."
all_events, _corrupt = read_jsonl(log_path)
evs = events_for_session(all_events, sid)
if not evs:
return None, "No events found."
evs.sort(key=lambda x: int(x.get("seq", 0)))
status, ok, report = verify_session_from_events(evs, sid, pk_hex="", require_signatures=False)
zip_name = f"rft_flight_bundle_{sid}.zip"
tmp_dir = "tmp_export"
os.makedirs(tmp_dir, exist_ok=True)
events_path = os.path.join(tmp_dir, f"{sid}_events.jsonl")
report_path = os.path.join(tmp_dir, f"{sid}_verify_report.txt")
with open(events_path, "w", encoding="utf-8") as f:
for e in evs:
f.write(json.dumps(e, ensure_ascii=False) + "\n")
with open(report_path, "w", encoding="utf-8") as f:
f.write(f"session_id: {sid}\n")
f.write(f"status: {status}\n")
f.write(f"ok: {ok}\n\n")
f.write(report + "\n")
with zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as z:
z.write(events_path, arcname=os.path.basename(events_path))
z.write(report_path, arcname=os.path.basename(report_path))
return zip_name, f"OK. Exported {zip_name} ({len(evs)} events)."
# ============================================================
# Import bundle (third-party verification)
# ============================================================
def _read_jsonl_from_zip(z: zipfile.ZipFile, member: str) -> List[Dict[str, Any]]:
out: List[Dict[str, Any]] = []
raw_text = z.read(member).decode("utf-8", errors="replace")
for raw in raw_text.splitlines():
raw = raw.strip()
if not raw:
continue
try:
obj = json.loads(raw)
if isinstance(obj, dict):
out.append(obj)
except Exception:
continue
return out
def import_bundle_verify(
bundle_path: str,
pk_hex: str = "",
require_signatures: bool = False,
store_into_log: bool = False,
log_path: str = DEFAULT_LOG_PATH,
) -> Tuple[str, bool, str, Optional[str]]:
if not bundle_path or not os.path.exists(bundle_path):
return "Missing bundle file.", False, "", None
try:
with zipfile.ZipFile(bundle_path, "r") as z:
members = z.namelist()
events_member = None
for m in members:
if m.endswith("_events.jsonl"):
events_member = m
break
if not events_member:
for m in members:
if m.endswith(".jsonl"):
events_member = m
break
if not events_member:
return "No .jsonl events file found in bundle.", False, "", None
evs = _read_jsonl_from_zip(z, events_member)
except Exception:
return "Failed to read bundle (invalid zip?).", False, "", None
sid = ""
for e in evs:
if e.get("spec") == EVENT_SPEC and e.get("session_id"):
sid = e["session_id"]
break
if not sid:
return "Bundle contains no valid flight events.", False, "", None
evs = [e for e in evs if e.get("spec") == EVENT_SPEC and e.get("session_id") == sid]
evs.sort(key=lambda x: int(x.get("seq", 0)))
status, ok, report = verify_session_from_events(
evs, sid, pk_hex=pk_hex, require_signatures=require_signatures
)
stored_msg = None
if store_into_log and ok:
lock = FileLock(_lock_path(log_path))
try:
with lock.acquire(timeout=WRITE_LOCK_TIMEOUT):
for e in evs:
append_jsonl(log_path, e)
stored_msg = "Stored into local flightlog.jsonl."
except Timeout:
stored_msg = "Could not store: log file is locked."
return status, ok, report, stored_msg