Spaces:

J94
/

bit-vector-tensor-control-policy

Runtime error

File size: 5,938 Bytes

3436bdd

#!/usr/bin/env python3
from __future__ import annotations

import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

ROOT = Path(__file__).resolve().parents[1]
DATA_PREP = Path("/Users/jobs/Desktop/data_prep_stage")
BUILD_DIR = ROOT / "build" / "system"
OUTPUT_PATH = BUILD_DIR / "memory_readiness.json"
TYPED_MEMORY_RUNTIME_PATH = BUILD_DIR / "typed_memory_runtime.json"
HOT_GRAPH_METRICS_PATH = DATA_PREP / "hot_graph_metrics_matrix.json"
AGENTMEMORY_SIDE_BY_SIDE_PATH = DATA_PREP / "agentmemory_side_by_side_20260413.md"
USER_GOVERNANCE_PATH = BUILD_DIR / "user_governance.json"


def utc_now() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def load_json(path: Path) -> dict[str, Any]:
    return json.loads(path.read_text(encoding="utf-8"))


def parse_markdown_metric(text: str, label: str) -> str | None:
    match = re.search(rf"{re.escape(label)}:\s*`([^`]+)`", text)
    return match.group(1) if match else None


def parse_rank_metrics(text: str, label: str) -> dict[str, str | None]:
    line_match = re.search(rf"{re.escape(label)}:\s*(.+)", text)
    line = line_match.group(1) if line_match else None
    if not line:
        return {"r1": None, "r5": None, "mrr": None}
    return {
        "r1": re.search(r"R@1\s+([0-9.]+)", line).group(1) if re.search(r"R@1\s+([0-9.]+)", line) else None,
        "r5": re.search(r"R@5\s+([0-9.]+)", line).group(1) if re.search(r"R@5\s+([0-9.]+)", line) else None,
        "mrr": re.search(r"MRR\s+([0-9.]+)", line).group(1) if re.search(r"MRR\s+([0-9.]+)", line) else None,
    }


def latest_benchmark_summary() -> Path | None:
    summaries = sorted((ROOT / "runs" / "benchmark").glob("*/summary.json"))
    return summaries[-1] if summaries else None


def build() -> dict[str, Any]:
    hot_graph = load_json(HOT_GRAPH_METRICS_PATH)
    side_by_side = AGENTMEMORY_SIDE_BY_SIDE_PATH.read_text(encoding="utf-8")
    benchmark_path = latest_benchmark_summary()
    benchmark = load_json(benchmark_path) if benchmark_path else {}
    governance = load_json(USER_GOVERNANCE_PATH) if USER_GOVERNANCE_PATH.exists() else {}
    typed_runtime = load_json(TYPED_MEMORY_RUNTIME_PATH) if TYPED_MEMORY_RUNTIME_PATH.exists() else {}

    exact_lookup = hot_graph.get("hot_graph", {}).get("exact_lookup", {})
    hologram_latency = parse_markdown_metric(side_by_side, "hologram mean latency")
    hologram_metrics = parse_rank_metrics(side_by_side, "hologram metrics")
    raw_latency = parse_markdown_metric(side_by_side, "raw mean latency")

    return {
        "compiled_at": utc_now(),
        "one_liner": "Current memory readiness shows strong exact lookup and structural discovery floors, but no unified typed ledger yet.",
        "one_liner": "Current memory readiness shows strong exact lookup, strong structural discovery, and a live typed ledger runtime, but conversational and benchmark integration are still incomplete.",
        "surfaces": {
            "exact_lookup_floor": {
                "status": "present",
                "source": str(HOT_GRAPH_METRICS_PATH),
                "mean_us": exact_lookup.get("mean_us"),
                "p95_us": exact_lookup.get("p95_us"),
            },
            "structural_discovery_floor": {
                "status": "present",
                "source": str(AGENTMEMORY_SIDE_BY_SIDE_PATH),
                "hologram_mean_latency_ms": hologram_latency,
                "hologram_r1": hologram_metrics["r1"],
                "hologram_r5": hologram_metrics["r5"],
                "hologram_mrr": hologram_metrics["mrr"],
                "raw_mean_latency_ms": raw_latency,
            },
            "audit_floor": {
                "status": "present",
                "source": str(benchmark_path) if benchmark_path else None,
                "freshness_override_accuracy": benchmark.get("aggregate", {}).get("freshness_override_accuracy"),
                "unsupported_query_abstention_rate": benchmark.get("aggregate", {}).get("unsupported_query_abstention_rate"),
            },
            "typed_memory_ledger": {
                "status": "present_runtime" if typed_runtime else "missing_runtime",
                "law_path": str(ROOT / "policy" / "typed_memory_ledger_v0.json"),
                "schema_path": str(ROOT / "schemas" / "typed_memory_unit_v0.json"),
                "runtime_path": str(TYPED_MEMORY_RUNTIME_PATH) if typed_runtime else None,
                "unit_count": typed_runtime.get("unit_count"),
                "query_commands": typed_runtime.get("runtime_commands"),
            },
        },
        "gaps": [
            "typed memory runtime is not yet integrated into the conversational front door",
            "no dedicated benchmark yet for exact recall, temporal recall, and relational expand through the runtime",
            "structural discovery and exact recall remain separate subsystems"
        ],
        "next_moves": [
            {
                "id": "frontdoor_memory_queries",
                "why": "route exact and temporal memory questions through the shell before widening to semantic reasoning"
            },
            {
                "id": "memory_runtime_benchmark",
                "why": "measure exact lookup, as-of-time recall, and relational expand as first-class control behavior"
            },
            {
                "id": "reuse_recall_benchmark",
                "why": "measure prior-tool reuse and prior-artifact recall against the PM operator"
            }
        ],
        "governance_top_candidate": (governance.get("next_moves") or [{}])[0].get("primitive"),
    }


def main() -> int:
    BUILD_DIR.mkdir(parents=True, exist_ok=True)
    readiness = build()
    OUTPUT_PATH.write_text(json.dumps(readiness, indent=2, sort_keys=True) + "\n", encoding="utf-8")
    print(OUTPUT_PATH)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())