Spaces:
Runtime error
Runtime error
File size: 5,938 Bytes
3436bdd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/env python3
from __future__ import annotations
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
DATA_PREP = Path("/Users/jobs/Desktop/data_prep_stage")
BUILD_DIR = ROOT / "build" / "system"
OUTPUT_PATH = BUILD_DIR / "memory_readiness.json"
TYPED_MEMORY_RUNTIME_PATH = BUILD_DIR / "typed_memory_runtime.json"
HOT_GRAPH_METRICS_PATH = DATA_PREP / "hot_graph_metrics_matrix.json"
AGENTMEMORY_SIDE_BY_SIDE_PATH = DATA_PREP / "agentmemory_side_by_side_20260413.md"
USER_GOVERNANCE_PATH = BUILD_DIR / "user_governance.json"
def utc_now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def parse_markdown_metric(text: str, label: str) -> str | None:
match = re.search(rf"{re.escape(label)}:\s*`([^`]+)`", text)
return match.group(1) if match else None
def parse_rank_metrics(text: str, label: str) -> dict[str, str | None]:
line_match = re.search(rf"{re.escape(label)}:\s*(.+)", text)
line = line_match.group(1) if line_match else None
if not line:
return {"r1": None, "r5": None, "mrr": None}
return {
"r1": re.search(r"R@1\s+([0-9.]+)", line).group(1) if re.search(r"R@1\s+([0-9.]+)", line) else None,
"r5": re.search(r"R@5\s+([0-9.]+)", line).group(1) if re.search(r"R@5\s+([0-9.]+)", line) else None,
"mrr": re.search(r"MRR\s+([0-9.]+)", line).group(1) if re.search(r"MRR\s+([0-9.]+)", line) else None,
}
def latest_benchmark_summary() -> Path | None:
summaries = sorted((ROOT / "runs" / "benchmark").glob("*/summary.json"))
return summaries[-1] if summaries else None
def build() -> dict[str, Any]:
hot_graph = load_json(HOT_GRAPH_METRICS_PATH)
side_by_side = AGENTMEMORY_SIDE_BY_SIDE_PATH.read_text(encoding="utf-8")
benchmark_path = latest_benchmark_summary()
benchmark = load_json(benchmark_path) if benchmark_path else {}
governance = load_json(USER_GOVERNANCE_PATH) if USER_GOVERNANCE_PATH.exists() else {}
typed_runtime = load_json(TYPED_MEMORY_RUNTIME_PATH) if TYPED_MEMORY_RUNTIME_PATH.exists() else {}
exact_lookup = hot_graph.get("hot_graph", {}).get("exact_lookup", {})
hologram_latency = parse_markdown_metric(side_by_side, "hologram mean latency")
hologram_metrics = parse_rank_metrics(side_by_side, "hologram metrics")
raw_latency = parse_markdown_metric(side_by_side, "raw mean latency")
return {
"compiled_at": utc_now(),
"one_liner": "Current memory readiness shows strong exact lookup and structural discovery floors, but no unified typed ledger yet.",
"one_liner": "Current memory readiness shows strong exact lookup, strong structural discovery, and a live typed ledger runtime, but conversational and benchmark integration are still incomplete.",
"surfaces": {
"exact_lookup_floor": {
"status": "present",
"source": str(HOT_GRAPH_METRICS_PATH),
"mean_us": exact_lookup.get("mean_us"),
"p95_us": exact_lookup.get("p95_us"),
},
"structural_discovery_floor": {
"status": "present",
"source": str(AGENTMEMORY_SIDE_BY_SIDE_PATH),
"hologram_mean_latency_ms": hologram_latency,
"hologram_r1": hologram_metrics["r1"],
"hologram_r5": hologram_metrics["r5"],
"hologram_mrr": hologram_metrics["mrr"],
"raw_mean_latency_ms": raw_latency,
},
"audit_floor": {
"status": "present",
"source": str(benchmark_path) if benchmark_path else None,
"freshness_override_accuracy": benchmark.get("aggregate", {}).get("freshness_override_accuracy"),
"unsupported_query_abstention_rate": benchmark.get("aggregate", {}).get("unsupported_query_abstention_rate"),
},
"typed_memory_ledger": {
"status": "present_runtime" if typed_runtime else "missing_runtime",
"law_path": str(ROOT / "policy" / "typed_memory_ledger_v0.json"),
"schema_path": str(ROOT / "schemas" / "typed_memory_unit_v0.json"),
"runtime_path": str(TYPED_MEMORY_RUNTIME_PATH) if typed_runtime else None,
"unit_count": typed_runtime.get("unit_count"),
"query_commands": typed_runtime.get("runtime_commands"),
},
},
"gaps": [
"typed memory runtime is not yet integrated into the conversational front door",
"no dedicated benchmark yet for exact recall, temporal recall, and relational expand through the runtime",
"structural discovery and exact recall remain separate subsystems"
],
"next_moves": [
{
"id": "frontdoor_memory_queries",
"why": "route exact and temporal memory questions through the shell before widening to semantic reasoning"
},
{
"id": "memory_runtime_benchmark",
"why": "measure exact lookup, as-of-time recall, and relational expand as first-class control behavior"
},
{
"id": "reuse_recall_benchmark",
"why": "measure prior-tool reuse and prior-artifact recall against the PM operator"
}
],
"governance_top_candidate": (governance.get("next_moves") or [{}])[0].get("primitive"),
}
def main() -> int:
BUILD_DIR.mkdir(parents=True, exist_ok=True)
readiness = build()
OUTPUT_PATH.write_text(json.dumps(readiness, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(OUTPUT_PATH)
return 0
if __name__ == "__main__":
raise SystemExit(main())
|