Spaces:

minhtudragon
/

headroom

Running

App Files Files Community

JerrettDavis commited on Apr 16

Commit

7ecbffe

2 Parent(s): 545dffd 7c97511

Merge branch 'chopratejas:main' into ci/release-automation

Browse files

Files changed (39) hide show

.github/dependabot.yml +27 -0
.github/workflows/docker.yml +17 -0
.github/workflows/publish.yml +15 -2
.gitignore +3 -0
Dockerfile +7 -3
README.md +32 -35
headroom/cli/learn.py +14 -1
headroom/cli/mcp.py +23 -3
headroom/cli/proxy.py +35 -4
headroom/cli/wrap.py +260 -12
headroom/install/state.py +16 -7
headroom/learn/base.py +9 -3
headroom/learn/plugins/claude.py +18 -10
headroom/learn/plugins/codex.py +17 -6
headroom/learn/plugins/gemini.py +14 -6
headroom/memory/factory.py +10 -0
headroom/memory/mcp_server.py +375 -0
headroom/memory/sync.py +395 -0
headroom/memory/sync_adapters/__init__.py +1 -0
headroom/memory/sync_adapters/claude_code.py +233 -0
headroom/memory/sync_adapters/codex_agent.py +106 -0
headroom/memory/writers/claude_writer.py +2 -1
headroom/proxy/handlers/openai.py +370 -20
headroom/proxy/handlers/streaming.py +7 -4
headroom/proxy/memory_handler.py +33 -13
headroom/proxy/models.py +3 -0
headroom/proxy/request_logger.py +23 -7
headroom/telemetry/toin.py +4 -2
headroom/transforms/kompress_compressor.py +98 -79
headroom/transforms/smart_crusher.py +60 -48
plugins/openclaw/package.json +54 -54
tests/test_cli/test_wrap_copilot.py +17 -4
tests/test_learn/test_scanner.py +64 -0
tests/test_memory_sync.py +647 -0
tests/test_package_init_lazy.py +2 -1
tests/test_transforms/test_kompress_compressor.py +2 -3
tests/test_transforms/test_smart_crusher_bugs.py +212 -0
tests/test_transforms/test_universal_json_crush.py +20 -18
tests/test_ws_memory_relay.py +523 -0

.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,27 @@

+version: 2
+updates:
+  # Docker base image digest updates
+  - package-ecosystem: docker
+    directory: /
+    schedule:
+      interval: weekly
+    commit-message:
+      prefix: "docker"
+  # GitHub Actions version updates
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+    commit-message:
+      prefix: "ci"
+  # Python dependency updates (pip)
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly
+    commit-message:
+      prefix: "deps"
+    # Only open PRs for security updates to avoid noise
+    open-pull-requests-limit: 5

.github/workflows/docker.yml CHANGED Viewed

@@ -12,6 +12,7 @@ env:
 permissions:
   contents: read
   packages: write
 jobs:
   docker-variant-tags:
@@ -81,3 +82,19 @@ jobs:
           set: |
             *.cache-from=type=gha
             *.cache-to=type=gha,mode=max

 permissions:
   contents: read
   packages: write
+  id-token: write # For cosign keyless signing via Sigstore OIDC
 jobs:
   docker-variant-tags:
           set: |
             *.cache-from=type=gha
             *.cache-to=type=gha,mode=max
+      - name: Install cosign
+        uses: sigstore/cosign-installer@v3
+      - name: Sign images with cosign (keyless via Sigstore OIDC)
+        env:
+          BAKE_META: ${{ steps.bake.outputs.metadata }}
+        run: |
+          # Extract all pushed image digests from bake metadata and sign each
+          echo "$BAKE_META" | jq -r '
+            to_entries[].value."containerimage.digest" // empty
+          ' | while read -r digest; do
+            image="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${digest}"
+            echo "Signing ${image}"
+            cosign sign --yes "${image}"
+          done

.github/workflows/publish.yml CHANGED Viewed

@@ -11,7 +11,8 @@ jobs:
     runs-on: ubuntu-latest
     environment: pypi
     permissions:
-      id-token: write  # For trusted publishing
     steps:
       - uses: actions/checkout@v4
@@ -23,11 +24,23 @@ jobs:
       - name: Install build tools
         run: |
-          python -m pip install --upgrade pip build
       - name: Build package
         run: |
           python -m build
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1

     runs-on: ubuntu-latest
     environment: pypi
     permissions:
+      id-token: write # For trusted publishing
+      contents: write # For uploading release assets
     steps:
       - uses: actions/checkout@v4
       - name: Install build tools
         run: |
+          python -m pip install --upgrade pip build cyclonedx-bom
       - name: Build package
         run: |
           python -m build
+      - name: Generate SBOM (CycloneDX)
+        run: |
+          pip install -e ".[proxy]"
+          cyclonedx-py environment \
+            --output-format json \
+            --outfile dist/headroom-sbom.cdx.json
+      - name: Upload SBOM to release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: dist/headroom-sbom.cdx.json
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1

.gitignore CHANGED Viewed

@@ -12,6 +12,9 @@ scripts/*
 # Swift SDK (separate repo)
 swift/
 # Audit/scan outputs (contain security findings — never commit)
 bandit_result.txt
 pip_audit_result.txt

 # Swift SDK (separate repo)
 swift/
+# Local planning docs (never commit)
+ENTERPRISE_HARDENING.md
 # Audit/scan outputs (contain security findings — never commit)
 bandit_result.txt
 pip_audit_result.txt

Dockerfile CHANGED Viewed

@@ -1,10 +1,14 @@
 ARG PYTHON_VERSION=3.11
 ARG UV_VERSION=0.6.17
 ARG DISTROLESS_IMAGE=gcr.io/distroless/python3-debian13
 ARG PYTHON_SITE_PACKAGES=/usr/local/lib/python${PYTHON_VERSION}/site-packages
 # ---- Build stage: compile native extensions, build wheel ----
-FROM python:${PYTHON_VERSION}-slim AS builder
 ARG UV_VERSION
@@ -32,7 +36,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system --no-deps --reinstall-package headroom-ai .
 # ---- Runtime stage (python-slim): supports root/nonroot via build arg ----
-FROM python:${PYTHON_VERSION}-slim AS runtime-slim-base
 ARG RUNTIME_USER=nonroot
 ARG PYTHON_SITE_PACKAGES
@@ -69,7 +73,7 @@ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
 ENTRYPOINT ["headroom", "proxy"]
 CMD ["--host", "0.0.0.0", "--port", "8787"]
-FROM ${DISTROLESS_IMAGE} AS runtime-slim
 ARG RUNTIME_USER=nonroot
 ARG PYTHON_SITE_PACKAGES

 ARG PYTHON_VERSION=3.11
 ARG UV_VERSION=0.6.17
+# Pinned 2026-04-15. Update via Dependabot or: docker pull python:3.11-slim
+ARG PYTHON_DIGEST=sha256:233de06753d30d120b1a3ce359d8d3be8bda78524cd8f520c99883bfe33964cf
+# Pinned 2026-04-15. Update via Dependabot or: docker pull gcr.io/distroless/python3-debian13
+ARG DISTROLESS_DIGEST=sha256:ed3a4beb46f8f8baac068743ba1b1f95ea3f793422129cf6dd23967f779b6018
 ARG DISTROLESS_IMAGE=gcr.io/distroless/python3-debian13
 ARG PYTHON_SITE_PACKAGES=/usr/local/lib/python${PYTHON_VERSION}/site-packages
 # ---- Build stage: compile native extensions, build wheel ----
+FROM python:${PYTHON_VERSION}-slim@${PYTHON_DIGEST} AS builder
 ARG UV_VERSION
     uv pip install --system --no-deps --reinstall-package headroom-ai .
 # ---- Runtime stage (python-slim): supports root/nonroot via build arg ----
+FROM python:${PYTHON_VERSION}-slim@${PYTHON_DIGEST} AS runtime-slim-base
 ARG RUNTIME_USER=nonroot
 ARG PYTHON_SITE_PACKAGES
 ENTRYPOINT ["headroom", "proxy"]
 CMD ["--host", "0.0.0.0", "--port", "8787"]
+FROM ${DISTROLESS_IMAGE}@${DISTROLESS_DIGEST} AS runtime-slim
 ARG RUNTIME_USER=nonroot
 ARG PYTHON_SITE_PACKAGES

README.md CHANGED Viewed

@@ -155,9 +155,9 @@ OPENAI_BASE_URL=http://localhost:8787/v1 your-app
 Use `token` mode for short/medium sessions where raw compression savings matter most.
 Use `cache` mode for long-running chats where preserving prior-turn bytes improves provider cache reuse.
-Works with any language, any tool, any framework. **[Proxy docs](docs/proxy.md)**
-Prefer Docker as the runtime provider? See **[Docker-native install](docs/docker-install.md)**. Want Headroom to stay up in the background? See **[Persistent installs](docs/persistent-installs.md)**.
 ### Coding agents — one command
@@ -189,7 +189,7 @@ summary = ctx.get("research")               # Agent B reads (~80% smaller)
 full = ctx.get("research", full=True)       # Agent B gets original if needed
 ```
-Compress what moves between agents — any framework. **[SharedContext Guide](docs/shared-context.md)**
 ### MCP Tools (Claude Code, Cursor)
@@ -197,7 +197,7 @@ Compress what moves between agents — any framework. **[SharedContext Guide](do
 headroom mcp install && claude
 ```
-Gives your AI tool three MCP tools: `headroom_compress`, `headroom_retrieve`, `headroom_stats`. **[MCP Guide](docs/mcp.md)**
 ### Drop into your existing stack
@@ -219,7 +219,7 @@ Gives your AI tool three MCP tools: `headroom_compress`, `headroom_retrieve`, `h
 | **Codex / Aider** | Wrap | `headroom wrap codex` or `headroom wrap aider` |
 | **Always-on local proxy** | Persistent install | `headroom install apply --preset persistent-service --providers auto` |
-**[Full Integration Guide](docs/integration-guide.md)** | **[TypeScript SDK](docs/typescript-sdk.md)**
 ---
@@ -292,7 +292,7 @@ python -m headroom.evals suite --tier 1 -o eval_results/
 python -m headroom.evals suite --tier 1 --ci
 ```
-Full methodology: [Benchmarks](docs/benchmarks.md) | [Evals Framework](headroom/evals/README.md)
 ---
@@ -317,7 +317,7 @@ headroom wrap claude --memory    # Claude with persistent memory
 headroom wrap codex --memory     # Codex shares the SAME memory store
 ```
-Claude saves a fact, Codex reads it back. All agents sharing one proxy share one memory — project-scoped, user-isolated, with agent provenance tracking and automatic deduplication. No SDK changes needed. **[Memory docs](docs/memory.md)**
 ### Failure Learning
@@ -327,7 +327,7 @@ headroom learn --apply                # Write learnings to agent-native files
 headroom learn --agent codex --all    # Analyze all Codex sessions
 ```
-Plugin-based: reads conversation history from Claude Code, Codex, or Gemini CLI. Finds failure patterns, correlates with successes, writes corrections to CLAUDE.md / AGENTS.md / GEMINI.md. External plugins via entry points. **[Learn docs](docs/learn.md)**
 <p align="center">
   <img src="headroom_learn.gif" alt="headroom learn demo" width="800">
@@ -405,7 +405,7 @@ Context compression is a new space. Here's how the approaches differ:
   Originals are in the Compressed Store — nothing is thrown away.
 ```
-**Overhead**: 15-200ms compression latency (net positive for Sonnet/Opus). Full data: [Latency Benchmarks](docs/LATENCY_BENCHMARKS.md)
 ---
@@ -413,16 +413,16 @@ Context compression is a new space. Here's how the approaches differ:
 | Integration | Status | Docs |
 |-------------|--------|------|
-| `headroom wrap claude/copilot/codex/aider/cursor` | **Stable** | [Proxy Docs](docs/proxy.md) |
-| `compress()` — one function | **Stable** | [Integration Guide](docs/integration-guide.md) |
-| `SharedContext` — multi-agent | **Stable** | [SharedContext Guide](docs/shared-context.md) |
-| LiteLLM callback | **Stable** | [Integration Guide](docs/integration-guide.md#litellm) |
-| ASGI middleware | **Stable** | [Integration Guide](docs/integration-guide.md#asgi-middleware) |
-| Proxy server | **Stable** | [Proxy Docs](docs/proxy.md) |
-| Agno | **Stable** | [Agno Guide](docs/agno.md) |
-| MCP (Claude Code, Cursor, etc.) | **Stable** | [MCP Guide](docs/mcp.md) |
-| Strands | **Stable** | [Strands Guide](docs/strands.md) |
-| LangChain | **Stable** | [LangChain Guide](docs/langchain.md) |
 | **OpenClaw** | **Stable** | [OpenClaw plugin](#openclaw-plugin) |
 ---
@@ -521,23 +521,20 @@ Python 3.10+
 | | |
 |---|---|
-| [Integration Guide](docs/integration-guide.md) | LiteLLM, ASGI, compress(), proxy |
-| [Proxy Docs](docs/proxy.md) | Proxy server configuration |
-| [Architecture](docs/ARCHITECTURE.md) | How the pipeline works |
-| [CCR Guide](docs/ccr.md) | Reversible compression |
-| [Benchmarks](docs/benchmarks.md) | Accuracy validation |
-| [Latency Benchmarks](docs/LATENCY_BENCHMARKS.md) | Compression overhead & cost-benefit analysis |
-| [Limitations](docs/LIMITATIONS.md) | When compression helps, when it doesn't |
 | [Evals Framework](headroom/evals/README.md) | Prove compression preserves accuracy |
-| [Memory](docs/memory.md) | Cross-agent persistent memory with provenance + dedup |
-| [Agno](docs/agno.md) | Agno agent framework |
-| [MCP](docs/mcp.md) | Context engineering toolkit (compress, retrieve, stats) |
-| [SharedContext](docs/shared-context.md) | Compressed inter-agent context sharing |
-| [Learn](docs/learn.md) | Plugin-based failure learning (Claude, Codex, Gemini, extensible) |
-| [CLI Reference](docs/cli.md) | Complete command surface, help output, and Docker parity matrix |
-| [Docker-Native Install](docs/docker-install.md) | Host wrapper install, compose support, and Docker runtime behavior |
-| [Persistent Installs](docs/persistent-installs.md) | Service/task/docker deployment models and provider scopes |
-| [Configuration](docs/configuration.md) | All options |
 ---

 Use `token` mode for short/medium sessions where raw compression savings matter most.
 Use `cache` mode for long-running chats where preserving prior-turn bytes improves provider cache reuse.
+Works with any language, any tool, any framework. **[Proxy docs](docs/content/docs/proxy.mdx)**
+Prefer Docker as the runtime provider? See **[Installation — Docker](docs/content/docs/installation.mdx)**.
 ### Coding agents — one command
 full = ctx.get("research", full=True)       # Agent B gets original if needed
 ```
+Compress what moves between agents — any framework. **[SharedContext Guide](docs/content/docs/shared-context.mdx)**
 ### MCP Tools (Claude Code, Cursor)
 headroom mcp install && claude
 ```
+Gives your AI tool three MCP tools: `headroom_compress`, `headroom_retrieve`, `headroom_stats`. **[MCP Guide](docs/content/docs/mcp.mdx)**
 ### Drop into your existing stack
 | **Codex / Aider** | Wrap | `headroom wrap codex` or `headroom wrap aider` |
 | **Always-on local proxy** | Persistent install | `headroom install apply --preset persistent-service --providers auto` |
+**[Full Integration Guide](docs/content/docs/index.mdx)**
 ---
 python -m headroom.evals suite --tier 1 --ci
 ```
+Full methodology: [Benchmarks](docs/content/docs/benchmarks.mdx) | [Evals Framework](headroom/evals/README.md)
 ---
 headroom wrap codex --memory     # Codex shares the SAME memory store
 ```
+Claude saves a fact, Codex reads it back. All agents sharing one proxy share one memory — project-scoped, user-isolated, with agent provenance tracking and automatic deduplication. No SDK changes needed. **[Memory docs](docs/content/docs/memory.mdx)**
 ### Failure Learning
 headroom learn --agent codex --all    # Analyze all Codex sessions
 ```
+Plugin-based: reads conversation history from Claude Code, Codex, or Gemini CLI. Finds failure patterns, correlates with successes, writes corrections to CLAUDE.md / AGENTS.md / GEMINI.md. External plugins via entry points. **[Learn docs](docs/content/docs/failure-learning.mdx)**
 <p align="center">
   <img src="headroom_learn.gif" alt="headroom learn demo" width="800">
   Originals are in the Compressed Store — nothing is thrown away.
 ```
+**Overhead**: 15-200ms compression latency (net positive for Sonnet/Opus). Full data: [Benchmarks](docs/content/docs/benchmarks.mdx)
 ---
 | Integration | Status | Docs |
 |-------------|--------|------|
+| `headroom wrap claude/copilot/codex/aider/cursor` | **Stable** | [Proxy Docs](docs/content/docs/proxy.mdx) |
+| `compress()` — one function | **Stable** | [Integration Guide](docs/content/docs/index.mdx) |
+| `SharedContext` — multi-agent | **Stable** | [SharedContext Guide](docs/content/docs/shared-context.mdx) |
+| LiteLLM callback | **Stable** | [LiteLLM Guide](docs/content/docs/litellm.mdx) |
+| ASGI middleware | **Stable** | [Integration Guide](docs/content/docs/index.mdx) |
+| Proxy server | **Stable** | [Proxy Docs](docs/content/docs/proxy.mdx) |
+| Agno | **Stable** | [Agno Guide](docs/content/docs/agno.mdx) |
+| MCP (Claude Code, Cursor, etc.) | **Stable** | [MCP Guide](docs/content/docs/mcp.mdx) |
+| Strands | **Stable** | [Strands Guide](docs/content/docs/strands.mdx) |
+| LangChain | **Stable** | [LangChain Guide](docs/content/docs/langchain.mdx) |
 | **OpenClaw** | **Stable** | [OpenClaw plugin](#openclaw-plugin) |
 ---
 | | |
 |---|---|
+| [Integration Guide](docs/content/docs/index.mdx) | LiteLLM, ASGI, compress(), proxy |
+| [Proxy Docs](docs/content/docs/proxy.mdx) | Proxy server configuration |
+| [Architecture](docs/content/docs/architecture.mdx) | How the pipeline works |
+| [CCR Guide](docs/content/docs/ccr.mdx) | Reversible compression |
+| [Benchmarks](docs/content/docs/benchmarks.mdx) | Accuracy validation |
+| [Limitations](docs/content/docs/limitations.mdx) | When compression helps, when it doesn't |
 | [Evals Framework](headroom/evals/README.md) | Prove compression preserves accuracy |
+| [Memory](docs/content/docs/memory.mdx) | Cross-agent persistent memory with provenance + dedup |
+| [Agno](docs/content/docs/agno.mdx) | Agno agent framework |
+| [MCP](docs/content/docs/mcp.mdx) | Context engineering toolkit (compress, retrieve, stats) |
+| [SharedContext](docs/content/docs/shared-context.mdx) | Compressed inter-agent context sharing |
+| [Learn](docs/content/docs/failure-learning.mdx) | Plugin-based failure learning (Claude, Codex, Gemini, extensible) |
+| [Installation](docs/content/docs/installation.mdx) | pip, npm, Docker install methods |
+| [Configuration](docs/content/docs/configuration.mdx) | All options |
 ---

headroom/cli/learn.py CHANGED Viewed

@@ -90,12 +90,21 @@ Use 'auto' (default) to scan all detected agents."""
     help="LLM model for analysis (e.g., claude-sonnet-4-6, gpt-4o, gemini/gemini-2.0-flash). "
     "Auto-detected from API keys if not specified.",
 )
 def learn(
     project: Path | None,
     analyze_all: bool,
     apply: bool,
     agent: str,
     model: str | None,
 ) -> None:
     """Learn from past tool call failures to prevent future ones.
@@ -115,9 +124,13 @@ def learn(
         headroom learn --all                  # Analyze all projects
         headroom learn --agent codex --all    # Analyze all Codex sessions
     """
     from ..learn.analyzer import SessionAnalyzer, _detect_default_model
     from ..learn.registry import auto_detect_plugins, get_plugin
     # Resolve model early to fail fast with a clear message
     try:
         resolved_model = model or _detect_default_model()
@@ -185,7 +198,7 @@ def learn(
             click.echo(f"Path: {proj.project_path}")
             click.echo(f"{'=' * 60}")
-            sessions = plugin.scan_project(proj)
             if not sessions:
                 click.echo("  No conversation data found.")
                 continue

     help="LLM model for analysis (e.g., claude-sonnet-4-6, gpt-4o, gemini/gemini-2.0-flash). "
     "Auto-detected from API keys if not specified.",
 )
+@click.option(
+    "--workers",
+    "-j",
+    type=int,
+    default=None,
+    help="Parallel workers for session scanning. "
+    "Default: auto (min of CPU count, 8). Use 1 for serial.",
+)
 def learn(
     project: Path | None,
     analyze_all: bool,
     apply: bool,
     agent: str,
     model: str | None,
+    workers: int | None,
 ) -> None:
     """Learn from past tool call failures to prevent future ones.
         headroom learn --all                  # Analyze all projects
         headroom learn --agent codex --all    # Analyze all Codex sessions
     """
+    import os
     from ..learn.analyzer import SessionAnalyzer, _detect_default_model
     from ..learn.registry import auto_detect_plugins, get_plugin
+    max_workers = workers if workers is not None else min(os.cpu_count() or 4, 8)
     # Resolve model early to fail fast with a clear message
     try:
         resolved_model = model or _detect_default_model()
             click.echo(f"Path: {proj.project_path}")
             click.echo(f"{'=' * 60}")
+            sessions = plugin.scan_project(proj, max_workers=max_workers)
             if not sessions:
                 click.echo("  No conversation data found.")
                 continue

headroom/cli/mcp.py CHANGED Viewed

@@ -244,13 +244,33 @@ def mcp_uninstall() -> None:
                     err=True,
                 )
     # Also remove from mcp.json fallback config if present
     if MCP_CONFIG_PATH.exists():
         config = load_mcp_config()
-        if "headroom" in config.get("mcpServers", {}):
-            del config["mcpServers"]["headroom"]
             save_mcp_config(config)
-            click.echo(f"✓ Headroom MCP server removed from {MCP_CONFIG_PATH}")
             removed = True
     if not removed:

                     err=True,
                 )
+    # Also remove codebase-memory-mcp if registered (installed by --code-graph)
+    if claude_cli:
+        cbm_check = subprocess.run(
+            [claude_cli, "mcp", "get", "codebase-memory-mcp"],
+            capture_output=True,
+        )
+        if cbm_check.returncode == 0:
+            cbm_rm = subprocess.run(
+                [claude_cli, "mcp", "remove", "codebase-memory-mcp", "-s", "user"],
+                capture_output=True,
+                text=True,
+            )
+            if cbm_rm.returncode == 0:
+                click.echo("✓ codebase-memory-mcp MCP server removed")
+                removed = True
     # Also remove from mcp.json fallback config if present
     if MCP_CONFIG_PATH.exists():
         config = load_mcp_config()
+        changed = False
+        for server_name in ("headroom", "codebase-memory-mcp"):
+            if server_name in config.get("mcpServers", {}):
+                del config["mcpServers"][server_name]
+                changed = True
+        if changed:
             save_mcp_config(config)
+            click.echo(f"✓ MCP servers removed from {MCP_CONFIG_PATH}")
             removed = True
     if not removed:

headroom/cli/proxy.py CHANGED Viewed

@@ -179,6 +179,13 @@ from .main import main
     is_flag=True,
     help="Disable anonymous usage telemetry (env: HEADROOM_TELEMETRY=off)",
 )
 @click.pass_context
 def proxy(
     ctx: click.Context,
@@ -213,6 +220,7 @@ def proxy(
     bedrock_region: str | None,
     bedrock_profile: str | None,
     no_telemetry: bool,
 ) -> None:
     """Start the optimization proxy server.
@@ -251,10 +259,22 @@ def proxy(
         mode or os.environ.get("HEADROOM_MODE") or PROXY_MODE_TOKEN
     )
     # Telemetry opt-out: --no-telemetry flag sets the env var
     if no_telemetry:
         os.environ["HEADROOM_TELEMETRY"] = "off"
     # License key for managed/enterprise deployments (optional)
     license_key = os.environ.get("HEADROOM_LICENSE_KEY")
@@ -272,7 +292,7 @@ def proxy(
         connect_timeout_seconds=connect_timeout_seconds
         if connect_timeout_seconds is not None
         else 10,
-        log_file=log_file,
         budget_limit_usd=budget,
         # Code graph: live file watcher for incremental reindexing
         code_graph_watcher=code_graph,
@@ -284,13 +304,15 @@ def proxy(
         intelligent_context_compress_first=not no_compress_first,
         # Memory System (Multi-Provider with auto-detection)
         # --learn implies --memory (need backend for storing patterns)
-        memory_enabled=memory or (learn and not no_learn),
         memory_db_path=memory_db_path,
         memory_inject_tools=not no_memory_tools,
         memory_inject_context=not no_memory_context,
         memory_top_k=memory_top_k,
         # Traffic Learning: only with --learn, never with --no-learn
-        traffic_learning_enabled=learn and not no_learn,
         traffic_learning_agent_type=os.environ.get("HEADROOM_AGENT_TYPE", "unknown"),
         # Backend (Anthropic direct, Bedrock, LiteLLM, or any-llm)
         backend=backend,
@@ -299,6 +321,8 @@ def proxy(
         anyllm_provider=effective_anyllm_provider,
         # License / Usage Reporting (managed/enterprise)
         license_key=license_key,
     )
     memory_status = "DISABLED"
@@ -355,6 +379,13 @@ Memory (Multi-Provider):
   - Database: {config.memory_db_path}
 """
     from headroom.telemetry.beacon import is_telemetry_enabled
     # Build telemetry section for the startup banner
@@ -381,7 +412,7 @@ Starting proxy server...
   Rate Limit:   {"ENABLED" if config.rate_limit_enabled else "DISABLED"}
   Memory:       {memory_status}
   License:      {license_status}
-{telemetry_line}
 {backend_section}
 Routing:
   /v1/messages         → {anthropic_url}

     is_flag=True,
     help="Disable anonymous usage telemetry (env: HEADROOM_TELEMETRY=off)",
 )
+@click.option(
+    "--stateless",
+    is_flag=True,
+    help="Disable all filesystem writes — run purely in-memory. "
+    "For containerized / read-only / load-balanced deployments. "
+    "(env: HEADROOM_STATELESS=true)",
+)
 @click.pass_context
 def proxy(
     ctx: click.Context,
     bedrock_region: str | None,
     bedrock_profile: str | None,
     no_telemetry: bool,
+    stateless: bool,
 ) -> None:
     """Start the optimization proxy server.
         mode or os.environ.get("HEADROOM_MODE") or PROXY_MODE_TOKEN
     )
+    # Stateless mode: CLI flag or env var
+    is_stateless = stateless or os.environ.get("HEADROOM_STATELESS", "").lower() in (
+        "true",
+        "1",
+        "yes",
+        "on",
+    )
     # Telemetry opt-out: --no-telemetry flag sets the env var
     if no_telemetry:
         os.environ["HEADROOM_TELEMETRY"] = "off"
+    # Stateless mode: suppress TOIN filesystem persistence
+    if is_stateless:
+        os.environ["HEADROOM_TOIN_BACKEND"] = "none"
     # License key for managed/enterprise deployments (optional)
     license_key = os.environ.get("HEADROOM_LICENSE_KEY")
         connect_timeout_seconds=connect_timeout_seconds
         if connect_timeout_seconds is not None
         else 10,
+        log_file=None if is_stateless else log_file,
         budget_limit_usd=budget,
         # Code graph: live file watcher for incremental reindexing
         code_graph_watcher=code_graph,
         intelligent_context_compress_first=not no_compress_first,
         # Memory System (Multi-Provider with auto-detection)
         # --learn implies --memory (need backend for storing patterns)
+        # Stateless mode disables memory (requires SQLite on disk)
+        memory_enabled=False if is_stateless else (memory or (learn and not no_learn)),
         memory_db_path=memory_db_path,
         memory_inject_tools=not no_memory_tools,
         memory_inject_context=not no_memory_context,
         memory_top_k=memory_top_k,
         # Traffic Learning: only with --learn, never with --no-learn
+        # Stateless mode disables learning (requires filesystem)
+        traffic_learning_enabled=False if is_stateless else (learn and not no_learn),
         traffic_learning_agent_type=os.environ.get("HEADROOM_AGENT_TYPE", "unknown"),
         # Backend (Anthropic direct, Bedrock, LiteLLM, or any-llm)
         backend=backend,
         anyllm_provider=effective_anyllm_provider,
         # License / Usage Reporting (managed/enterprise)
         license_key=license_key,
+        # Stateless mode: disable all filesystem writes
+        stateless=is_stateless,
     )
     memory_status = "DISABLED"
   - Database: {config.memory_db_path}
 """
+    # Stateless mode warning
+    stateless_line = ""
+    if is_stateless:
+        stateless_line = (
+            "  Stateless:    YES (no filesystem writes — memory, logs, TOIN disabled)\n"
+        )
     from headroom.telemetry.beacon import is_telemetry_enabled
     # Build telemetry section for the startup banner
   Rate Limit:   {"ENABLED" if config.rate_limit_enabled else "DISABLED"}
   Memory:       {memory_status}
   License:      {license_status}
+{stateless_line}{telemetry_line}
 {backend_section}
 Routing:
   /v1/messages         → {anthropic_url}

headroom/cli/wrap.py CHANGED Viewed

@@ -192,13 +192,51 @@ def _setup_rtk(verbose: bool = False) -> Path | None:
     return rtk_path
 def _setup_code_graph(verbose: bool = False) -> bool:
-    """Ensure codebase-memory-mcp is installed and project is indexed.
     codebase-memory-mcp builds a knowledge graph of the codebase using
     tree-sitter, enabling the LLM to query code structure (call chains,
     function definitions, impact analysis) instead of reading entire files.
     With Claude Code's MCP Tool Search, the 14 graph tools add ~200 tokens
     overhead per request (not the full ~1,915) — they're lazy-loaded.
@@ -218,6 +256,9 @@ def _setup_code_graph(verbose: bool = False) -> bool:
     cbm_bin = str(cbm_path)
     # Index current project (fast — ~1s for most repos, idempotent)
     project_dir = str(Path.cwd())
     try:
@@ -320,6 +361,11 @@ rtk pip list            rtk pnpm install        rtk npm run <script>
 # Marker used to detect if instructions are already injected
 _RTK_MARKER = "<!-- headroom:rtk-instructions -->"
 def _ensure_rtk_binary(verbose: bool = False) -> Path | None:
     """Ensure rtk binary is installed (download if needed). No hook registration."""
@@ -364,11 +410,12 @@ def _inject_codex_provider_config(port: int) -> None:
     config_dir = Path.home() / ".codex"
     config_file = config_dir / "config.toml"
-    headroom_section = (
-        f"\n# --- Headroom proxy (auto-injected by headroom wrap codex) ---\n"
-        f'model_provider = "headroom"\n'
-        f"\n"
-        f"[model_providers.headroom]\n"
         f'name = "OpenAI via Headroom proxy"\n'
         f'base_url = "http://127.0.0.1:{port}/v1"\n'
         f'env_key = "OPENAI_API_KEY"\n'
@@ -377,7 +424,7 @@ def _inject_codex_provider_config(port: int) -> None:
         f"# --- end Headroom ---\n"
     )
-    marker = "# --- Headroom proxy (auto-injected by headroom wrap codex) ---"
     end_marker = "# --- end Headroom ---"
     try:
@@ -386,14 +433,20 @@ def _inject_codex_provider_config(port: int) -> None:
         if config_file.exists():
             content = config_file.read_text()
             if marker in content:
-                # Replace existing section
                 start = content.index(marker)
                 end = content.index(end_marker) + len(end_marker)
-                content = content[:start].rstrip() + headroom_section + content[end:].lstrip("\n")
-            else:
-                content = content.rstrip() + "\n" + headroom_section
         else:
-            content = headroom_section
         config_file.write_text(content)
         click.echo(f"  Codex config: injected Headroom provider (WS + HTTP) into {config_file}")
@@ -424,6 +477,81 @@ def _inject_rtk_instructions(file_path: Path, verbose: bool = False) -> bool:
     return True
 def _resolve_copilot_provider_type(backend: str | None, provider_type: str) -> str:
     """Resolve Copilot BYOK provider type for the current proxy backend."""
     if provider_type != "auto":
@@ -1088,6 +1216,48 @@ def claude(
     signal.signal(signal.SIGINT, cleanup)
     signal.signal(signal.SIGTERM, cleanup)
     try:
         click.echo()
         click.echo("  ╔═══════════════════════════════════════════════╗")
@@ -1241,6 +1411,20 @@ def copilot(
     env["COPILOT_PROVIDER_TYPE"] = effective_provider_type
     env.pop("COPILOT_PROVIDER_WIRE_API", None)
     env_vars_display: list[str]
     if effective_provider_type == "anthropic":
         env["COPILOT_PROVIDER_BASE_URL"] = f"http://127.0.0.1:{port}"
@@ -1258,6 +1442,19 @@ def copilot(
             f"COPILOT_PROVIDER_WIRE_API={effective_wire_api}",
         ]
     if not _copilot_model_configured(copilot_args, env):
         click.echo(
             "  Note: Copilot BYOK requires a model. Pass `--model <name>` "
@@ -1357,6 +1554,49 @@ def codex(
             global_agents = Path.home() / ".codex" / "AGENTS.md"
             _inject_rtk_instructions(global_agents, verbose=verbose)
     if prepare_only:
         _inject_codex_provider_config(port)
         return
@@ -1373,7 +1613,15 @@ def codex(
     # Inject Headroom provider into Codex config so WebSocket traffic also
     # routes through the proxy.  Codex ignores OPENAI_BASE_URL for its WS
     # transport unless a custom provider declares supports_websockets = true.
     _inject_codex_provider_config(port)
     _launch_tool(
         binary=codex_bin,

     return rtk_path
+_CBM_MCP_SERVER_NAME = "codebase-memory-mcp"
+def _register_cbm_mcp_server(cbm_bin: str) -> None:
+    """Register codebase-memory-mcp as an MCP server in Claude Code.
+    Uses ``claude mcp add`` so the tools appear in ``/mcp`` automatically.
+    Idempotent — skips if already registered.
+    """
+    claude_cli = shutil.which("claude")
+    if not claude_cli:
+        return
+    # Check if already registered
+    check = subprocess.run(
+        [claude_cli, "mcp", "get", _CBM_MCP_SERVER_NAME],
+        capture_output=True,
+        text=True,
+    )
+    if check.returncode == 0:
+        return  # Already registered
+    result = subprocess.run(
+        [claude_cli, "mcp", "add", _CBM_MCP_SERVER_NAME, "-s", "user", "--", cbm_bin],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode == 0:
+        click.echo(f"  Code graph: registered {_CBM_MCP_SERVER_NAME} MCP server")
+    else:
+        pass  # Non-critical — tools won't appear in /mcp but graph still works
 def _setup_code_graph(verbose: bool = False) -> bool:
+    """Ensure codebase-memory-mcp is installed, registered as MCP server, and project is indexed.
     codebase-memory-mcp builds a knowledge graph of the codebase using
     tree-sitter, enabling the LLM to query code structure (call chains,
     function definitions, impact analysis) instead of reading entire files.
+    Steps:
+    1. Download the binary if not already present.
+    2. Register as an MCP server in Claude Code (``claude mcp add``).
+    3. Index the current project (fast, idempotent).
     With Claude Code's MCP Tool Search, the 14 graph tools add ~200 tokens
     overhead per request (not the full ~1,915) — they're lazy-loaded.
     cbm_bin = str(cbm_path)
+    # Register as MCP server so tools appear in /mcp
+    _register_cbm_mcp_server(cbm_bin)
     # Index current project (fast — ~1s for most repos, idempotent)
     project_dir = str(Path.cwd())
     try:
 # Marker used to detect if instructions are already injected
 _RTK_MARKER = "<!-- headroom:rtk-instructions -->"
+# Memory MCP markers
+_MEMORY_MCP_MARKER = "# --- Headroom memory MCP (auto-injected) ---"
+_MEMORY_MCP_END = "# --- end Headroom memory ---"
+_MEMORY_AGENTS_MARKER = "<!-- headroom:memory-instructions -->"
 def _ensure_rtk_binary(verbose: bool = False) -> Path | None:
     """Ensure rtk binary is installed (download if needed). No hook registration."""
     config_dir = Path.home() / ".codex"
     config_file = config_dir / "config.toml"
+    # model_provider must be a top-level TOML key (before any [section]).
+    # The [model_providers.headroom] table can go at the end.
+    top_level_marker = "# --- Headroom proxy (auto-injected by headroom wrap codex) ---"
+    top_level_block = f'{top_level_marker}\nmodel_provider = "headroom"\n'
+    provider_section = (
+        f"\n[model_providers.headroom]\n"
         f'name = "OpenAI via Headroom proxy"\n'
         f'base_url = "http://127.0.0.1:{port}/v1"\n'
         f'env_key = "OPENAI_API_KEY"\n'
         f"# --- end Headroom ---\n"
     )
+    marker = top_level_marker
     end_marker = "# --- end Headroom ---"
     try:
         if config_file.exists():
             content = config_file.read_text()
             if marker in content:
+                # Remove existing Headroom blocks entirely
                 start = content.index(marker)
                 end = content.index(end_marker) + len(end_marker)
+                content = content[:start].rstrip("\n") + content[end:].lstrip("\n")
+            # Strip any stale top-level model_provider left behind
+            import re
+            content = re.sub(r'\nmodel_provider\s*=\s*"headroom"\n', "\n", content)
+            # Place top-level key at the very beginning, provider table at the end
+            content = top_level_block + "\n" + content.strip() + "\n" + provider_section
         else:
+            content = top_level_block + "\n" + provider_section
         config_file.write_text(content)
         click.echo(f"  Codex config: injected Headroom provider (WS + HTTP) into {config_file}")
     return True
+def _inject_memory_mcp_config(db_path: str, user_id: str) -> None:
+    """Register headroom memory as an MCP server in Codex's config.toml.
+    Idempotent — replaces existing section if present.
+    """
+    import sys
+    config_dir = Path.home() / ".codex"
+    config_file = config_dir / "config.toml"
+    # Use forward slashes in TOML paths (works on all platforms, avoids
+    # backslash escaping issues on Windows)
+    python_bin = sys.executable.replace("\\", "/")
+    db_path_toml = db_path.replace("\\", "/")
+    mcp_section = (
+        f"\n{_MEMORY_MCP_MARKER}\n"
+        f"[mcp_servers.headroom_memory]\n"
+        f'command = "{python_bin}"\n'
+        f'args = ["-m", "headroom.memory.mcp_server", "--db", "{db_path_toml}", "--user", "{user_id}"]\n'
+        f"startup_timeout_sec = 30\n"
+        f"tool_timeout_sec = 30\n"
+        f"{_MEMORY_MCP_END}\n"
+    )
+    try:
+        config_dir.mkdir(parents=True, exist_ok=True)
+        if config_file.exists():
+            content = config_file.read_text()
+            if _MEMORY_MCP_MARKER in content:
+                start = content.index(_MEMORY_MCP_MARKER)
+                end = content.index(_MEMORY_MCP_END) + len(_MEMORY_MCP_END)
+                content = content[:start].rstrip("\n") + mcp_section + content[end:].lstrip("\n")
+            else:
+                content = content.rstrip() + "\n" + mcp_section
+        else:
+            content = mcp_section
+        config_file.write_text(content)
+        click.echo(f"  Memory MCP: registered in {config_file}")
+    except Exception as e:
+        click.echo(f"  Warning: could not register memory MCP: {e}")
+def _inject_memory_agents_md(file_path: Path) -> bool:
+    """Inject memory usage guidance into AGENTS.md.
+    Idempotent — skips if marker already present.
+    """
+    memory_block = (
+        f"{_MEMORY_AGENTS_MARKER}\n"
+        "## Memory\n\n"
+        "Use the `headroom_memory` MCP server for persistent cross-session knowledge.\n\n"
+        "**Before** answering questions about prior decisions, conventions, project context,\n"
+        "architecture, user preferences, org info, codenames, debugging history, or anything\n"
+        "from past sessions — call `memory_search` first.\n\n"
+        "**After** making durable decisions, discovering conventions, or learning important\n"
+        "facts — call `memory_save` to persist them for future sessions.\n\n"
+        "Memory is your first source of truth for anything not visible in the current conversation.\n"
+    )
+    if file_path.exists():
+        existing = file_path.read_text()
+        if _MEMORY_AGENTS_MARKER in existing:
+            return True  # Already injected
+        with open(file_path, "a") as f:
+            f.write("\n\n" + memory_block)
+    else:
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(memory_block)
+    click.echo(f"  Memory guidance injected into {file_path.name}")
+    return True
 def _resolve_copilot_provider_type(backend: str | None, provider_type: str) -> str:
     """Resolve Copilot BYOK provider type for the current proxy backend."""
     if provider_type != "auto":
     signal.signal(signal.SIGINT, cleanup)
     signal.signal(signal.SIGTERM, cleanup)
+    # Memory sync BEFORE proxy startup — sync headroom DB ↔ Claude's files
+    if memory:
+        try:
+            import subprocess as _sp
+            mem_dir = Path.cwd() / ".headroom"
+            mem_dir.mkdir(parents=True, exist_ok=True)
+            _sync_db = str(mem_dir / "memory.db")
+            _sync_user = os.environ.get("USER", os.environ.get("USERNAME", "default"))
+            click.echo(f"  Syncing memory (user={_sync_user})...")
+            sync_result = _sp.run(
+                [
+                    sys.executable,
+                    "-m",
+                    "headroom.memory.sync",
+                    "--db",
+                    _sync_db,
+                    "--user",
+                    _sync_user,
+                    "--agent",
+                    "claude",
+                    "--force",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if sync_result.returncode == 0 and sync_result.stdout.strip():
+                import json as _json
+                stats = _json.loads(sync_result.stdout.strip().split("\n")[-1])
+                imp, exp, ms = stats["imported"], stats["exported"], stats["ms"]
+                if imp or exp:
+                    click.echo(f"  Memory synced: {imp} imported, {exp} exported ({ms}ms)")
+                else:
+                    click.echo(f"  Memory: up to date ({ms}ms)")
+            elif sync_result.returncode != 0:
+                click.echo(f"  Warning: memory sync error: {sync_result.stderr[-200:]}")
+        except Exception as e:
+            click.echo(f"  Warning: memory sync failed: {e}")
     try:
         click.echo()
         click.echo("  ╔═══════════════════════════════════════════════╗")
     env["COPILOT_PROVIDER_TYPE"] = effective_provider_type
     env.pop("COPILOT_PROVIDER_WIRE_API", None)
+    # Copilot BYOK requires COPILOT_PROVIDER_API_KEY — propagate from the
+    # user's existing provider key so they don't have to set it twice.
+    # Note: `headroom wrap copilot` uses Copilot's BYOK mode, which bypasses
+    # GitHub's Copilot API and talks directly to the model provider through
+    # the Headroom proxy. This requires the provider's own API key — a GitHub
+    # Copilot subscription alone is not sufficient for BYOK mode.
+    if not env.get("COPILOT_PROVIDER_API_KEY"):
+        if effective_provider_type == "anthropic":
+            _key = env.get("ANTHROPIC_API_KEY", "")
+        else:
+            _key = env.get("OPENAI_API_KEY", "")
+        if _key:
+            env["COPILOT_PROVIDER_API_KEY"] = _key
     env_vars_display: list[str]
     if effective_provider_type == "anthropic":
         env["COPILOT_PROVIDER_BASE_URL"] = f"http://127.0.0.1:{port}"
             f"COPILOT_PROVIDER_WIRE_API={effective_wire_api}",
         ]
+    if not env.get("COPILOT_PROVIDER_API_KEY"):
+        src = "ANTHROPIC_API_KEY" if effective_provider_type == "anthropic" else "OPENAI_API_KEY"
+        click.echo(
+            f"\n  Error: Copilot BYOK mode requires a provider API key.\n"
+            f"  `headroom wrap copilot` uses Copilot's BYOK mode, which bypasses GitHub's\n"
+            f"  Copilot API and routes requests directly to the model provider through the\n"
+            f"  Headroom proxy. A GitHub Copilot subscription alone is not sufficient.\n\n"
+            f"  Set one of:\n"
+            f"    export {src}=sk-...          # recommended\n"
+            f"    export COPILOT_PROVIDER_API_KEY=sk-...  # also works\n"
+        )
+        raise SystemExit(1)
     if not _copilot_model_configured(copilot_args, env):
         click.echo(
             "  Note: Copilot BYOK requires a model. Pass `--model <name>` "
             global_agents = Path.home() / ".codex" / "AGENTS.md"
             _inject_rtk_instructions(global_agents, verbose=verbose)
+    # Setup memory MCP server for Codex (native tool integration)
+    if memory:
+        click.echo("  Setting up memory for Codex...")
+        mem_dir = Path.cwd() / ".headroom"
+        mem_dir.mkdir(parents=True, exist_ok=True)
+        db_path = str(mem_dir / "memory.db")
+        mem_user = os.environ.get("USER", os.environ.get("USERNAME", "default"))
+        # Register MCP server in Codex config
+        _inject_memory_mcp_config(db_path, mem_user)
+        # Inject memory guidance into project AGENTS.md
+        agents_md = Path.cwd() / "AGENTS.md"
+        _inject_memory_agents_md(agents_md)
+        # Sync Claude's memories → DB so MCP search finds them
+        try:
+            import asyncio
+            from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
+            from headroom.memory.sync import sync_import
+            from headroom.memory.sync_adapters.claude_code import (
+                ClaudeCodeAdapter,
+                get_claude_memory_dir,
+            )
+            claude_memory_dir = get_claude_memory_dir()
+            async def _import_claude_memories() -> int:
+                config = LocalBackendConfig(db_path=db_path)
+                backend = LocalBackend(config)
+                await backend._ensure_initialized()
+                adapter = ClaudeCodeAdapter(claude_memory_dir)
+                count = await sync_import(backend, adapter, mem_user)
+                await backend.close()
+                return count
+            imported = asyncio.run(_import_claude_memories())
+            if imported:
+                click.echo(f"  Memory: imported {imported} memories from Claude")
+        except Exception as e:
+            click.echo(f"  Warning: Claude memory import failed: {e}")
     if prepare_only:
         _inject_codex_provider_config(port)
         return
     # Inject Headroom provider into Codex config so WebSocket traffic also
     # routes through the proxy.  Codex ignores OPENAI_BASE_URL for its WS
     # transport unless a custom provider declares supports_websockets = true.
+    # NOTE: this must run BEFORE _inject_memory_mcp_config because it rewrites
+    # the config file.  Re-inject MCP config after if memory is enabled.
     _inject_codex_provider_config(port)
+    if memory:
+        mem_dir = Path.cwd() / ".headroom"
+        _inject_memory_mcp_config(
+            str(mem_dir / "memory.db"),
+            os.environ.get("USER", os.environ.get("USERNAME", "default")),
+        )
     _launch_tool(
         binary=codex_bin,

headroom/install/state.py CHANGED Viewed

@@ -3,21 +3,30 @@
 from __future__ import annotations
 import json
 import shutil
 from dataclasses import asdict
 from .models import ArtifactRecord, DeploymentManifest, ManagedMutation, iso_utc_now
 from .paths import deploy_root, manifest_path, profile_root
-def save_manifest(manifest: DeploymentManifest) -> None:
-    """Persist a deployment manifest to disk."""
-    root = profile_root(manifest.profile)
-    root.mkdir(parents=True, exist_ok=True)
-    manifest.updated_at = iso_utc_now()
-    path = manifest_path(manifest.profile)
-    path.write_text(json.dumps(asdict(manifest), indent=2) + "\n")
 def load_manifest(profile: str = "default") -> DeploymentManifest | None:

 from __future__ import annotations
 import json
+import logging
 import shutil
 from dataclasses import asdict
 from .models import ArtifactRecord, DeploymentManifest, ManagedMutation, iso_utc_now
 from .paths import deploy_root, manifest_path, profile_root
+logger = logging.getLogger(__name__)
+def save_manifest(manifest: DeploymentManifest) -> None:
+    """Persist a deployment manifest to disk.
+    Gracefully handles read-only filesystems by logging a warning
+    instead of crashing.
+    """
+    try:
+        root = profile_root(manifest.profile)
+        root.mkdir(parents=True, exist_ok=True)
+        manifest.updated_at = iso_utc_now()
+        path = manifest_path(manifest.profile)
+        path.write_text(json.dumps(asdict(manifest), indent=2) + "\n")
+    except OSError as e:
+        logger.warning("Cannot save deployment manifest: %s — continuing without persistence", e)
 def load_manifest(profile: str = "default") -> DeploymentManifest | None:

headroom/learn/base.py CHANGED Viewed

@@ -25,7 +25,7 @@ class ConversationScanner(ABC):
         ...
     @abstractmethod
-    def scan_project(self, project: ProjectInfo) -> list[SessionData]:
         """Scan all sessions for a project, returning normalized tool calls."""
         ...
@@ -99,8 +99,14 @@ class LearnPlugin(ABC):
         ...
     @abstractmethod
-    def scan_project(self, project: ProjectInfo) -> list[SessionData]:
-        """Scan all sessions for a project, returning normalized data."""
         ...
     # --- Writing ---

         ...
     @abstractmethod
+    def scan_project(self, project: ProjectInfo, max_workers: int = 1) -> list[SessionData]:
         """Scan all sessions for a project, returning normalized tool calls."""
         ...
         ...
     @abstractmethod
+    def scan_project(self, project: ProjectInfo, max_workers: int = 1) -> list[SessionData]:
+        """Scan all sessions for a project, returning normalized data.
+        Args:
+            project: The project to scan.
+            max_workers: Number of threads for parallel file scanning.
+                         1 (default) = serial.  >1 = concurrent.
+        """
         ...
     # --- Writing ---

headroom/learn/plugins/claude.py CHANGED Viewed

@@ -106,16 +106,24 @@ class ClaudeCodePlugin(LearnPlugin, ConversationScanner):
         return projects
-    def scan_project(self, project: ProjectInfo) -> list[SessionData]:
         """Scan all conversation JSONL files for a project."""
-        sessions = []
         jsonl_files = sorted(project.data_path.glob("*.jsonl"))
-        for jsonl_path in jsonl_files:
-            session = self._scan_session(jsonl_path)
-            if session and session.tool_calls:
-                sessions.append(session)
         return sessions
     def _scan_session(self, jsonl_path: Path) -> SessionData | None:
@@ -375,9 +383,9 @@ def _component_tokenizations(component: str) -> list[list[str]]:
     add([component])
-    for separator in ("-", ".", None):
         if separator is None:
-            tokens = [token for token in re.split(r"[-.]", component) if token]
         else:
             tokens = [token for token in component.split(separator) if token]
         add(tokens)
@@ -385,9 +393,9 @@ def _component_tokenizations(component: str) -> list[list[str]]:
     if component.startswith(".") and len(component) > 1:
         hidden_component = component[1:]
         add(["", hidden_component])
-        for separator in ("-", ".", None):
             if separator is None:
-                tokens = [token for token in re.split(r"[-.]", hidden_component) if token]
             else:
                 tokens = [token for token in hidden_component.split(separator) if token]
             add(["", *tokens])

         return projects
+    def scan_project(self, project: ProjectInfo, max_workers: int = 1) -> list[SessionData]:
         """Scan all conversation JSONL files for a project."""
         jsonl_files = sorted(project.data_path.glob("*.jsonl"))
+        if not jsonl_files:
+            return []
+        if max_workers <= 1 or len(jsonl_files) <= 1:
+            return [s for f in jsonl_files if (s := self._scan_session(f)) and s.tool_calls]
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        sessions: list[SessionData] = []
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = {executor.submit(self._scan_session, f): f for f in jsonl_files}
+            for future in as_completed(futures):
+                session = future.result()
+                if session and session.tool_calls:
+                    sessions.append(session)
         return sessions
     def _scan_session(self, jsonl_path: Path) -> SessionData | None:
     add([component])
+    for separator in ("-", ".", "_", None):
         if separator is None:
+            tokens = [token for token in re.split(r"[-._]", component) if token]
         else:
             tokens = [token for token in component.split(separator) if token]
         add(tokens)
     if component.startswith(".") and len(component) > 1:
         hidden_component = component[1:]
         add(["", hidden_component])
+        for separator in ("-", ".", "_", None):
             if separator is None:
+                tokens = [token for token in re.split(r"[-._]", hidden_component) if token]
             else:
                 tokens = [token for token in hidden_component.split(separator) if token]
             add(["", *tokens])

headroom/learn/plugins/codex.py CHANGED Viewed

@@ -91,13 +91,24 @@ class CodexPlugin(LearnPlugin, ConversationScanner):
             )
         ]
-    def scan_project(self, project: ProjectInfo) -> list[SessionData]:
         """Scan all Codex session JSON files."""
-        sessions = []
-        for json_path in self._iter_session_files(project.data_path):
-            session = self._scan_session(json_path)
-            if session and session.tool_calls:
-                sessions.append(session)
         return sessions
     def _scan_session(self, json_path: Path) -> SessionData | None:

             )
         ]
+    def scan_project(self, project: ProjectInfo, max_workers: int = 1) -> list[SessionData]:
         """Scan all Codex session JSON files."""
+        session_files = self._iter_session_files(project.data_path)
+        if not session_files:
+            return []
+        if max_workers <= 1 or len(session_files) <= 1:
+            return [s for f in session_files if (s := self._scan_session(f)) and s.tool_calls]
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        sessions: list[SessionData] = []
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = {executor.submit(self._scan_session, f): f for f in session_files}
+            for future in as_completed(futures):
+                session = future.result()
+                if session and session.tool_calls:
+                    sessions.append(session)
         return sessions
     def _scan_session(self, json_path: Path) -> SessionData | None:

headroom/learn/plugins/gemini.py CHANGED Viewed

@@ -106,18 +106,26 @@ class GeminiPlugin(LearnPlugin, ConversationScanner):
         return projects
-    def scan_project(self, project: ProjectInfo) -> list[SessionData]:
         """Scan all Gemini session files for a project."""
-        sessions = []
         session_files = sorted(project.data_path.glob("session-*.json")) + sorted(
             project.data_path.glob("session-*.jsonl")
         )
-        for session_path in session_files:
-            session = self._scan_session(session_path)
-            if session and session.tool_calls:
-                sessions.append(session)
         return sessions
     def _scan_session(self, session_path: Path) -> SessionData | None:

         return projects
+    def scan_project(self, project: ProjectInfo, max_workers: int = 1) -> list[SessionData]:
         """Scan all Gemini session files for a project."""
         session_files = sorted(project.data_path.glob("session-*.json")) + sorted(
             project.data_path.glob("session-*.jsonl")
         )
+        if not session_files:
+            return []
+        if max_workers <= 1 or len(session_files) <= 1:
+            return [s for f in session_files if (s := self._scan_session(f)) and s.tool_calls]
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        sessions: list[SessionData] = []
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = {executor.submit(self._scan_session, f): f for f in session_files}
+            for future in as_completed(futures):
+                session = future.result()
+                if session and session.tool_calls:
+                    sessions.append(session)
         return sessions
     def _scan_session(self, session_path: Path) -> SessionData | None:

headroom/memory/factory.py CHANGED Viewed

@@ -7,6 +7,7 @@ and proper wiring between components.
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from headroom.memory.config import (
@@ -199,12 +200,21 @@ def _create_vector_index(config: MemoryConfig) -> VectorIndex:
         from headroom.memory.adapters.hnsw import HNSWVectorIndex
         return HNSWVectorIndex(
             dimension=config.vector_dimension,
             ef_construction=config.hnsw_ef_construction,
             m=config.hnsw_m,
             ef_search=config.hnsw_ef_search,
             max_entries=config.hnsw_max_entries,
         )
     raise ValueError(f"Unknown vector backend: {config.vector_backend}")

 from __future__ import annotations
+from pathlib import Path
 from typing import TYPE_CHECKING
 from headroom.memory.config import (
         from headroom.memory.adapters.hnsw import HNSWVectorIndex
+        # Derive persistent save path from the main DB path so the HNSW
+        # index survives across process restarts (critical for cross-agent
+        # interop: memories saved by Codex MCP must be searchable by Claude).
+        hnsw_save_path: str | Path | None = None
+        if config.db_path:
+            hnsw_save_path = config.db_path.parent / f"{config.db_path.stem}_hnsw"
         return HNSWVectorIndex(
             dimension=config.vector_dimension,
             ef_construction=config.hnsw_ef_construction,
             m=config.hnsw_m,
             ef_search=config.hnsw_ef_search,
             max_entries=config.hnsw_max_entries,
+            save_path=hnsw_save_path,
+            auto_save=True,
         )
     raise ValueError(f"Unknown vector backend: {config.vector_backend}")

headroom/memory/mcp_server.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""Headroom Memory MCP Server.
+A stdio MCP server that exposes headroom's memory backend as tools
+that Codex (or any MCP-compatible client) can call natively.
+Tools:
+    memory_search  — semantic search across stored memories
+    memory_save    — persist a new fact/decision/convention
+Design:
+    - Embedder is pre-loaded at startup (no cold-start on first query)
+    - On startup, any memories missing vector embeddings are re-indexed
+      (fixes interop gap when memories were saved via a different path)
+    - Save always generates embeddings inline
+Usage:
+    # Standalone (for testing):
+    python -m headroom.memory.mcp_server --db /path/to/.headroom/memory.db
+    # Registered in Codex config.toml (done by `headroom wrap codex --memory`):
+    [mcp_servers.headroom_memory]
+    command = "python"
+    args = ["-m", "headroom.memory.mcp_server", "--db", ".headroom/memory.db"]
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Any
+from mcp.server import Server
+from mcp.server.stdio import stdio_server
+from mcp.types import TextContent, Tool
+from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
+logger = logging.getLogger("headroom.memory.mcp")
+# ---------------------------------------------------------------------------
+# Tool definitions
+# ---------------------------------------------------------------------------
+_TOOLS = [
+    Tool(
+        name="memory_search",
+        description=(
+            "Search persistent memory for relevant knowledge from prior sessions. "
+            "Use this for questions about architecture, conventions, prior decisions, "
+            "project context, user preferences, org info, codenames, debugging history, "
+            "or anything that might have been discussed before."
+        ),
+        inputSchema={
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Natural-language search query.",
+                },
+                "top_k": {
+                    "type": "integer",
+                    "description": "Max results to return (default 10).",
+                    "default": 10,
+                },
+            },
+            "required": ["query"],
+        },
+    ),
+    Tool(
+        name="memory_save",
+        description=(
+            "Save information to persistent memory for future sessions. "
+            "Use this for decisions, conventions, architecture context, "
+            "user preferences, project facts, or anything worth remembering.\n\n"
+            "IMPORTANT: Break information into atomic facts — one fact per "
+            "entry in the 'facts' array. Each fact should be a single, "
+            "self-contained statement that answers one question. "
+            "Do NOT combine multiple facts into one string.\n\n"
+            "Good:  facts: ['Repo owner is Tejas C.', 'User prefers dark mode']\n"
+            "Bad:   facts: ['Repo owner is Tejas C. Prefers dark mode.']"
+        ),
+        inputSchema={
+            "type": "object",
+            "properties": {
+                "facts": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": (
+                        "Array of atomic facts to save. Each entry should be "
+                        "one self-contained fact. The system stores and indexes "
+                        "each fact separately for precise retrieval."
+                    ),
+                },
+                "importance": {
+                    "type": "number",
+                    "description": "0.0 (low) to 1.0 (critical). Default 0.7.",
+                    "default": 0.7,
+                },
+            },
+            "required": [],
+        },
+    ),
+]
+# ---------------------------------------------------------------------------
+# Startup: pre-load embedder + re-index unembedded memories
+# ---------------------------------------------------------------------------
+async def _warm_up_backend(backend: LocalBackend, user_id: str) -> None:
+    """Pre-load the embedder and re-index memories that lack embeddings.
+    Memories saved via other paths (e.g. Claude Code proxy direct SQL)
+    may exist in the store but have no vector embeddings.  This scans
+    for those and re-indexes them so vector search works across agents.
+    """
+    await backend._ensure_initialized()
+    hm = backend._hierarchical_memory
+    if hm is None:
+        return
+    # Force-load the embedder now (not lazily on first search)
+    _dummy = await hm._embedder.embed("warmup")
+    logger.info("Memory MCP: embedder pre-loaded")
+    # Ensure ALL memories are in the vector index.
+    # Memories saved via other agents (Claude Code proxy, direct SQL) may
+    # exist in the store but not be indexed — re-embed and index them all.
+    all_memories = await backend.get_user_memories(user_id, limit=500)
+    if not all_memories:
+        return
+    indexed = 0
+    for mem in all_memories:
+        if mem.embedding is None:
+            mem.embedding = await hm._embedder.embed(mem.content)
+            await hm._store.save(mem)
+        await hm._vector_index.index(mem)
+        indexed += 1
+    logger.info(f"Memory MCP: indexed {indexed} memories into vector store")
+# ---------------------------------------------------------------------------
+# MCP Server
+# ---------------------------------------------------------------------------
+def create_memory_server(db_path: str, user_id: str = "default") -> Server:
+    """Create an MCP server backed by headroom's local memory."""
+    server = Server("headroom-memory")
+    _backend: LocalBackend | None = None
+    _init_task: asyncio.Task | None = None
+    async def _init_backend() -> LocalBackend:
+        """Initialize backend with ONNX embedder (fast, no PyTorch)."""
+        nonlocal _backend
+        config = LocalBackendConfig(db_path=db_path, embedder_backend="onnx")
+        _backend = LocalBackend(config)
+        await _warm_up_backend(_backend, user_id)
+        logger.info(f"Memory MCP: ready (db={db_path}, user={user_id})")
+        return _backend
+    async def _get_backend() -> LocalBackend:
+        nonlocal _backend, _init_task
+        if _backend is not None:
+            return _backend
+        # Wait for background init if it's running
+        if _init_task is not None:
+            await _init_task
+            return _backend  # type: ignore[return-value]
+        # Fallback: init inline (shouldn't normally happen)
+        return await _init_backend()
+    @server.list_tools()
+    async def list_tools() -> list[Tool]:
+        # Kick off background init on first list_tools (called at MCP handshake)
+        nonlocal _init_task
+        if _backend is None and _init_task is None:
+            _init_task = asyncio.create_task(_init_backend())
+        return _TOOLS
+    @server.call_tool()
+    async def call_tool(name: str, arguments: dict) -> list[TextContent]:
+        backend = await _get_backend()
+        if name == "memory_search":
+            return await _handle_search(backend, arguments, user_id)
+        elif name == "memory_save":
+            return await _handle_save(backend, arguments, user_id)
+        return [TextContent(type="text", text=f"Unknown tool: {name}")]
+    return server
+async def _handle_search(
+    backend: LocalBackend, arguments: dict[str, Any], user_id: str
+) -> list[TextContent]:
+    query = arguments.get("query", "")
+    top_k = arguments.get("top_k", 10)
+    if not query:
+        return [TextContent(type="text", text="Error: query is required")]
+    try:
+        # Over-fetch to compensate for filtering out superseded memories
+        results = await backend.search_memories(
+            query=query,
+            user_id=user_id,
+            top_k=top_k * 3,
+            include_related=True,
+        )
+        if not results:
+            return [TextContent(type="text", text="No memories found.")]
+        # Filter out superseded memories — only return current/active ones.
+        # Re-check the store because in-memory HNSW metadata may be stale.
+        active_results = []
+        for r in results:
+            if getattr(r.memory, "superseded_by", None):
+                continue
+            # Double-check against the store for recently superseded memories
+            try:
+                stored = await backend.get_memory(r.memory.id)
+                if stored and getattr(stored, "superseded_by", None):
+                    continue
+            except Exception:
+                pass
+            active_results.append(r)
+        if not active_results:
+            return [TextContent(type="text", text="No memories found.")]
+        # Trim to requested top_k
+        active_results = active_results[:top_k]
+        lines = []
+        for i, r in enumerate(active_results, 1):
+            score = f"{r.score:.2f}" if hasattr(r, "score") else "?"
+            lines.append(f"{i}. [relevance={score}] {r.memory.content}")
+            if hasattr(r, "related_entities") and r.related_entities:
+                lines.append(f"   Related: {', '.join(r.related_entities[:3])}")
+        return [TextContent(type="text", text="\n".join(lines))]
+    except Exception as e:
+        logger.error(f"memory_search failed: {e}")
+        return [TextContent(type="text", text=f"Search error: {e}")]
+# Similarity threshold for auto-supersession: if a new memory is this
+# similar to an existing one, it replaces (supersedes) the old one.
+_SUPERSEDE_SIMILARITY = 0.70
+async def _handle_save(
+    backend: LocalBackend, arguments: dict[str, Any], user_id: str
+) -> list[TextContent]:
+    facts = arguments.get("facts", [])
+    importance = arguments.get("importance", 0.7)
+    # Backward compat: accept single "content" string too
+    if not facts:
+        content = arguments.get("content", "")
+        if content:
+            facts = [content]
+    if not facts:
+        return [TextContent(type="text", text="Error: facts array is required")]
+    try:
+        saved = 0
+        superseded = 0
+        results_lines: list[str] = []
+        for fact in facts:
+            fact = fact.strip()
+            if not fact:
+                continue
+            # Check for semantically similar existing memory to auto-supersede
+            superseded_id: str | None = None
+            try:
+                existing = await backend.search_memories(
+                    query=fact,
+                    user_id=user_id,
+                    top_k=3,
+                )
+                for r in existing:
+                    if getattr(r.memory, "superseded_by", None):
+                        continue
+                    if r.score >= _SUPERSEDE_SIMILARITY:
+                        superseded_id = r.memory.id
+                        logger.info(
+                            f"Memory MCP: auto-superseding [{r.memory.id[:8]}] "
+                            f"(similarity={r.score:.2f}): {r.memory.content[:60]}"
+                        )
+                        break
+            except Exception:
+                pass
+            if superseded_id:
+                memory = await backend.update_memory(
+                    memory_id=superseded_id,
+                    new_content=fact,
+                )
+                results_lines.append(
+                    f"  updated [{superseded_id[:8]}→{memory.id[:8]}]: {fact[:60]}"
+                )
+                superseded += 1
+            else:
+                memory = await backend.save_memory(
+                    content=fact,
+                    user_id=user_id,
+                    importance=importance,
+                )
+                results_lines.append(f"  saved [{memory.id[:8]}]: {fact[:60]}")
+                saved += 1
+        summary = f"Saved {saved} new, updated {superseded} existing ({saved + superseded} total)"
+        return [TextContent(type="text", text=summary + "\n" + "\n".join(results_lines))]
+    except Exception as e:
+        logger.error(f"memory_save failed: {e}")
+        return [TextContent(type="text", text=f"Save error: {e}")]
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+async def _run(db_path: str, user_id: str) -> None:
+    server = create_memory_server(db_path, user_id)
+    async with stdio_server() as (read_stream, write_stream):
+        await server.run(read_stream, write_stream, server.create_initialization_options())
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Headroom Memory MCP Server")
+    parser.add_argument(
+        "--db",
+        default=str(Path.cwd() / ".headroom" / "memory.db"),
+        help="Path to memory SQLite database",
+    )
+    parser.add_argument(
+        "--user",
+        default=os.environ.get("USER", os.environ.get("USERNAME", "default")),
+        help="User ID for memory scoping",
+    )
+    args = parser.parse_args()
+    # Skip HuggingFace model freshness checks — use cached models only.
+    # This eliminates 1-2s of HTTP HEAD requests on every startup.
+    os.environ.setdefault("HF_HUB_OFFLINE", "1")
+    os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
+    # Log to stderr (MCP uses stdout for protocol)
+    logging.basicConfig(
+        level=logging.INFO,
+        stream=sys.stderr,
+        format="%(name)s: %(message)s",
+    )
+    asyncio.run(_run(args.db, args.user))
+if __name__ == "__main__":
+    main()

headroom/memory/sync.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""Universal memory sync engine for cross-agent interoperability.
+Provides bidirectional sync between headroom's memory DB and any
+agent's native memory format via pluggable adapters.
+Architecture:
+    DB ← sync_import → Agent files   (agent's knowledge enters the shared DB)
+    DB → sync_export → Agent files   (shared knowledge flows to the agent)
+    sync() = import + export          (bidirectional, fast no-op when unchanged)
+Usage:
+    from headroom.memory.sync import sync, SyncResult
+    from headroom.memory.sync_adapters.claude_code import ClaudeCodeAdapter
+    adapter = ClaudeCodeAdapter(memory_dir=Path("~/.claude/projects/.../memory"))
+    backend = LocalBackend(config)
+    result: SyncResult = await sync(backend, adapter, user_id="tcms")
+"""
+from __future__ import annotations
+import hashlib
+import json
+import logging
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+logger = logging.getLogger("headroom.memory.sync")
+# State file for fast no-op detection
+_DEFAULT_STATE_PATH = Path.home() / ".headroom" / "sync_state.json"
+# ---------------------------------------------------------------------------
+# Data models
+# ---------------------------------------------------------------------------
+@dataclass
+class SyncResult:
+    """Result of a sync operation."""
+    imported: int = 0  # agent files → DB
+    exported: int = 0  # DB → agent files
+    skipped_unchanged: int = 0
+    skipped_dedup: int = 0
+    duration_ms: float = 0
+@dataclass
+class AgentMemory:
+    """A memory entry read from an agent's native format."""
+    content: str
+    category: str = ""
+    source_file: str = ""
+    content_hash: str = ""
+    metadata: dict[str, Any] = field(default_factory=dict)
+    def __post_init__(self) -> None:
+        if not self.content_hash:
+            self.content_hash = hashlib.sha256(self.content.encode()).hexdigest()[:16]
+# ---------------------------------------------------------------------------
+# Adapter interface
+# ---------------------------------------------------------------------------
+class AgentMemoryAdapter(ABC):
+    """Base class for agent memory format adapters.
+    Each agent (Claude Code, Codex, Aider, Cursor) has a subclass
+    that knows how to read/write that agent's native memory format.
+    """
+    agent_name: str = "unknown"
+    @abstractmethod
+    async def read_memories(self) -> list[AgentMemory]:
+        """Read memories from the agent's native format.
+        Returns a list of AgentMemory entries found in the agent's files.
+        """
+        ...
+    @abstractmethod
+    async def write_memories(self, memories: list[dict[str, Any]]) -> int:
+        """Write memories to the agent's native format.
+        Args:
+            memories: List of dicts with keys: content, category, importance,
+                      headroom_id, source_agent, content_hash.
+        Returns:
+            Count of memories written.
+        """
+        ...
+    @abstractmethod
+    def fingerprint(self) -> str:
+        """Fast hash of the agent's memory state.
+        Used for no-op detection: if the fingerprint hasn't changed
+        since last sync, we can skip the full read/compare cycle.
+        """
+        ...
+# ---------------------------------------------------------------------------
+# Sync state persistence
+# ---------------------------------------------------------------------------
+def _load_sync_state(state_path: Path) -> dict[str, Any]:
+    """Load sync state from disk."""
+    if state_path.exists():
+        try:
+            result: dict[str, Any] = json.loads(state_path.read_text())
+            return result
+        except (json.JSONDecodeError, OSError):
+            pass
+    return {}
+def _save_sync_state(state_path: Path, state: dict[str, Any]) -> None:
+    """Save sync state to disk."""
+    state_path.parent.mkdir(parents=True, exist_ok=True)
+    state_path.write_text(json.dumps(state, indent=2))
+def _db_fingerprint(memories: list[Any]) -> str:
+    """Compute a fast fingerprint of DB state."""
+    if not memories:
+        return "empty"
+    # Hash: count + most recent created_at
+    parts = [str(len(memories))]
+    for m in memories[:5]:  # Sample first 5 for speed
+        parts.append(getattr(m, "id", "")[:8])
+    return hashlib.sha256("|".join(parts).encode()).hexdigest()[:16]
+# ---------------------------------------------------------------------------
+# Sync engine
+# ---------------------------------------------------------------------------
+async def sync(
+    backend: Any,
+    adapter: AgentMemoryAdapter,
+    user_id: str,
+    state_path: Path = _DEFAULT_STATE_PATH,
+    force: bool = False,
+) -> SyncResult:
+    """Bidirectional sync between headroom DB and an agent's memory.
+    1. Fast no-op check (fingerprint comparison)
+    2. Import: agent files → DB (new entries only, deduped by content hash)
+    3. Export: DB → agent files (entries not already in agent's files)
+    Args:
+        backend: LocalBackend instance (must have save_memory, get_user_memories).
+        adapter: Agent-specific memory adapter.
+        user_id: User ID for memory scoping.
+        state_path: Path to sync state file.
+        force: Skip no-op check and always sync.
+    Returns:
+        SyncResult with import/export counts and timing.
+    """
+    start = time.monotonic()
+    result = SyncResult()
+    # --- Fast no-op check ---
+    if not force:
+        state = _load_sync_state(state_path)
+        adapter_key = f"{adapter.agent_name}:{user_id}"
+        prev = state.get(adapter_key, {})
+        current_agent_fp = adapter.fingerprint()
+        all_memories = await backend.get_user_memories(user_id, limit=500)
+        current_db_fp = _db_fingerprint(all_memories)
+        if (
+            prev.get("agent_fingerprint") == current_agent_fp
+            and prev.get("db_fingerprint") == current_db_fp
+        ):
+            result.duration_ms = (time.monotonic() - start) * 1000
+            logger.info(
+                f"Sync [{adapter.agent_name}]: no-op — nothing changed ({result.duration_ms:.1f}ms)"
+            )
+            return result
+    else:
+        all_memories = await backend.get_user_memories(user_id, limit=500)
+    # --- Phase 1: Import (agent files → DB) ---
+    result.imported = await sync_import(backend, adapter, user_id, all_memories)
+    # --- Phase 2: Export (DB → agent files) ---
+    # Re-fetch if imports happened (new entries)
+    if result.imported > 0:
+        all_memories = await backend.get_user_memories(user_id, limit=500)
+    result.exported = await sync_export(backend, adapter, user_id, all_memories)
+    # --- Update sync state ---
+    state = _load_sync_state(state_path)
+    adapter_key = f"{adapter.agent_name}:{user_id}"
+    state[adapter_key] = {
+        "agent_fingerprint": adapter.fingerprint(),
+        "db_fingerprint": _db_fingerprint(all_memories),
+        "last_sync": datetime.now(timezone.utc).isoformat(),
+        "last_imported": result.imported,
+        "last_exported": result.exported,
+    }
+    _save_sync_state(state_path, state)
+    result.duration_ms = (time.monotonic() - start) * 1000
+    logger.info(
+        f"Sync [{adapter.agent_name}]: imported={result.imported}, "
+        f"exported={result.exported} ({result.duration_ms:.1f}ms)"
+    )
+    return result
+async def sync_import(
+    backend: Any,
+    adapter: AgentMemoryAdapter,
+    user_id: str,
+    existing_memories: list[Any] | None = None,
+) -> int:
+    """Import: agent files → DB. Returns count imported."""
+    agent_memories = await adapter.read_memories()
+    if not agent_memories:
+        return 0
+    # Build set of existing content hashes for dedup
+    if existing_memories is None:
+        existing_memories = await backend.get_user_memories(user_id, limit=500)
+    existing_hashes: set[str] = set()
+    for mem in existing_memories:
+        h = (mem.metadata or {}).get("content_hash", "")
+        if h:
+            existing_hashes.add(h)
+        # Also hash the content directly for safety
+        existing_hashes.add(hashlib.sha256(mem.content.encode()).hexdigest()[:16])
+    imported = 0
+    for am in agent_memories:
+        if am.content_hash in existing_hashes:
+            continue
+        # Save to DB with lineage metadata
+        await backend.save_memory(
+            content=am.content,
+            user_id=user_id,
+            importance=0.6,
+            metadata={
+                "source_agent": adapter.agent_name,
+                "source_file": am.source_file,
+                "content_hash": am.content_hash,
+                "synced_at": datetime.now(timezone.utc).isoformat(),
+                "sync_direction": "import",
+                **am.metadata,
+            },
+        )
+        existing_hashes.add(am.content_hash)
+        imported += 1
+    if imported:
+        logger.info(f"Sync [{adapter.agent_name}]: imported {imported} memories from agent files")
+    return imported
+async def sync_export(
+    backend: Any,
+    adapter: AgentMemoryAdapter,
+    user_id: str,
+    existing_memories: list[Any] | None = None,
+) -> int:
+    """Export: DB → agent files. Returns count exported."""
+    if existing_memories is None:
+        existing_memories = await backend.get_user_memories(user_id, limit=500)
+    if not existing_memories:
+        return 0
+    # Read what the agent already has (to avoid re-exporting)
+    agent_memories = await adapter.read_memories()
+    agent_hashes: set[str] = {am.content_hash for am in agent_memories}
+    # Find memories to export (not already in agent, not imported FROM this agent)
+    to_export: list[dict[str, Any]] = []
+    for mem in existing_memories:
+        content_hash = hashlib.sha256(mem.content.encode()).hexdigest()[:16]
+        # Skip if agent already has it
+        if content_hash in agent_hashes:
+            continue
+        # Skip if this memory was originally imported FROM this same agent
+        # (prevents echo: agent → DB → agent)
+        meta = mem.metadata or {}
+        if (
+            meta.get("source_agent") == adapter.agent_name
+            and meta.get("sync_direction") == "import"
+        ):
+            continue
+        to_export.append(
+            {
+                "content": mem.content,
+                "category": getattr(mem, "category", "") or "",
+                "importance": getattr(mem, "importance", 0.5),
+                "headroom_id": mem.id,
+                "source_agent": meta.get("source_agent", "unknown"),
+                "content_hash": content_hash,
+                "created_at": mem.created_at.isoformat()
+                if hasattr(mem.created_at, "isoformat")
+                else str(mem.created_at),
+            }
+        )
+    if not to_export:
+        return 0
+    exported = await adapter.write_memories(to_export)
+    if exported:
+        logger.info(f"Sync [{adapter.agent_name}]: exported {exported} memories to agent files")
+    return exported
+# ---------------------------------------------------------------------------
+# CLI entry point: python -m headroom.memory.sync --db ... --user ... --agent ...
+# ---------------------------------------------------------------------------
+def main() -> None:
+    """CLI entry point for running sync from a subprocess."""
+    import argparse
+    parser = argparse.ArgumentParser(description="Headroom memory sync")
+    parser.add_argument("--db", required=True, help="Path to memory DB")
+    parser.add_argument("--user", required=True, help="User ID")
+    parser.add_argument("--agent", required=True, choices=["claude", "codex"], help="Agent to sync")
+    parser.add_argument("--force", action="store_true", help="Skip no-op check")
+    args = parser.parse_args()
+    import asyncio
+    import json as _json
+    async def _run() -> None:
+        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
+        config = LocalBackendConfig(db_path=args.db)
+        backend = LocalBackend(config)
+        await backend._ensure_initialized()
+        if args.agent == "claude":
+            from headroom.memory.sync_adapters.claude_code import (
+                ClaudeCodeAdapter,
+                get_claude_memory_dir,
+            )
+            adapter: ClaudeCodeAdapter | Any = ClaudeCodeAdapter(get_claude_memory_dir())
+        elif args.agent == "codex":
+            from headroom.memory.sync_adapters.codex_agent import CodexAdapter
+            adapter = CodexAdapter()
+        else:
+            print(_json.dumps({"error": f"Unknown agent: {args.agent}"}))
+            return
+        result = await sync(backend, adapter, args.user, force=args.force)
+        await backend.close()
+        print(
+            _json.dumps(
+                {
+                    "imported": result.imported,
+                    "exported": result.exported,
+                    "ms": round(result.duration_ms),
+                }
+            )
+        )
+    asyncio.run(_run())
+if __name__ == "__main__":
+    main()

headroom/memory/sync_adapters/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Agent memory sync adapters for cross-agent interoperability."""

headroom/memory/sync_adapters/claude_code.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""Claude Code memory sync adapter.
+Reads/writes Claude Code's native memory format:
+    ~/.claude/projects/<sanitized-path>/memory/
+        MEMORY.md          — index file (first 200 lines always in context)
+        user_role.md       — individual memory files with YAML frontmatter
+        project_codename.md
+        ...
+Each .md file has:
+    ---
+    name: <title>
+    description: <one-line summary>
+    type: <user|project|reference|feedback>
+    headroom_id: <uuid>          (added by sync for cross-reference)
+    source_agent: <agent name>   (added by sync for lineage)
+    ---
+    <body content>
+"""
+from __future__ import annotations
+import hashlib
+import re
+from pathlib import Path
+from typing import Any
+from headroom.memory.sync import AgentMemory, AgentMemoryAdapter
+def _sanitize_for_filename(text: str) -> str:
+    """Convert text to a safe filename slug."""
+    slug = re.sub(r"[^a-z0-9]+", "_", text.lower().strip())
+    slug = slug.strip("_")[:50]
+    return slug or "memory"
+def _parse_frontmatter(content: str) -> tuple[dict[str, str], str]:
+    """Parse YAML frontmatter from a markdown file.
+    Returns (frontmatter_dict, body).
+    """
+    if not content.startswith("---"):
+        return {}, content
+    end = content.find("---", 3)
+    if end == -1:
+        return {}, content
+    fm_text = content[3:end].strip()
+    body = content[end + 3 :].strip()
+    fm: dict[str, str] = {}
+    for line in fm_text.split("\n"):
+        if ":" in line:
+            key, _, value = line.partition(":")
+            fm[key.strip()] = value.strip().strip('"').strip("'")
+    return fm, body
+def _build_frontmatter(fields: dict[str, str]) -> str:
+    """Build YAML frontmatter block."""
+    lines = ["---"]
+    for key, value in fields.items():
+        if value:
+            lines.append(f"{key}: {value}")
+    lines.append("---")
+    return "\n".join(lines)
+class ClaudeCodeAdapter(AgentMemoryAdapter):
+    """Sync adapter for Claude Code's native memory files."""
+    agent_name = "claude"
+    def __init__(self, memory_dir: Path | str) -> None:
+        self._memory_dir = Path(memory_dir)
+    async def read_memories(self) -> list[AgentMemory]:
+        """Read all .md memory files (except MEMORY.md index)."""
+        if not self._memory_dir.exists():
+            return []
+        memories: list[AgentMemory] = []
+        for md_file in sorted(self._memory_dir.glob("*.md")):
+            if md_file.name == "MEMORY.md":
+                continue  # Index file, not a memory
+            try:
+                content = md_file.read_text(encoding="utf-8")
+            except OSError:
+                continue
+            fm, body = _parse_frontmatter(content)
+            if not body.strip():
+                continue
+            memories.append(
+                AgentMemory(
+                    content=body.strip(),
+                    category=fm.get("type", ""),
+                    source_file=md_file.name,
+                    metadata={
+                        "name": fm.get("name", ""),
+                        "description": fm.get("description", ""),
+                        "headroom_id": fm.get("headroom_id", ""),
+                        "source_agent": fm.get("source_agent", "claude"),
+                    },
+                )
+            )
+        return memories
+    async def write_memories(self, memories: list[dict[str, Any]]) -> int:
+        """Write memories as individual .md files with frontmatter.
+        Also updates MEMORY.md index.
+        """
+        if not memories:
+            return 0
+        self._memory_dir.mkdir(parents=True, exist_ok=True)
+        written = 0
+        new_index_entries: list[str] = []
+        for mem in memories:
+            content = mem["content"]
+            category = mem.get("category", "project")
+            headroom_id = mem.get("headroom_id", "")
+            source_agent = mem.get("source_agent", "unknown")
+            content_hash = mem.get("content_hash", "")
+            # Generate filename from content
+            first_line = content.split("\n")[0][:60].strip()
+            slug = _sanitize_for_filename(first_line)
+            filename = f"headroom_{slug}.md"
+            # Skip if file already exists with same content
+            target = self._memory_dir / filename
+            if target.exists():
+                existing_fm, existing_body = _parse_frontmatter(target.read_text(encoding="utf-8"))
+                existing_hash = hashlib.sha256(existing_body.strip().encode()).hexdigest()[:16]
+                if existing_hash == content_hash:
+                    continue
+            # Build description (first 100 chars)
+            description = content.replace("\n", " ")[:100]
+            # Write file
+            fm = _build_frontmatter(
+                {
+                    "name": first_line[:60],
+                    "description": description,
+                    "type": category or "project",
+                    "headroom_id": headroom_id,
+                    "source_agent": source_agent,
+                }
+            )
+            target.write_text(f"{fm}\n\n{content}\n", encoding="utf-8")
+            written += 1
+            # Track for MEMORY.md index
+            new_index_entries.append(f"- [{first_line[:60]}]({filename}) — {description[:80]}")
+        # Update MEMORY.md index
+        if new_index_entries:
+            self._update_memory_md_index(new_index_entries)
+        return written
+    def _update_memory_md_index(self, new_entries: list[str]) -> None:
+        """Append new entries to MEMORY.md under a Headroom section."""
+        memory_md = self._memory_dir / "MEMORY.md"
+        section_marker = "## Headroom Shared Memory"
+        new_section = f"\n{section_marker}\n" + "\n".join(new_entries) + "\n"
+        if memory_md.exists():
+            content = memory_md.read_text(encoding="utf-8")
+            if section_marker in content:
+                # Append to existing section (before next ## or end)
+                idx = content.index(section_marker)
+                # Find end of section (next ## or end of file)
+                next_section = content.find("\n## ", idx + len(section_marker))
+                if next_section == -1:
+                    # Append at end
+                    content = content.rstrip() + "\n" + "\n".join(new_entries) + "\n"
+                else:
+                    # Insert before next section
+                    content = (
+                        content[:next_section].rstrip()
+                        + "\n"
+                        + "\n".join(new_entries)
+                        + "\n"
+                        + content[next_section:]
+                    )
+            else:
+                content = content.rstrip() + "\n" + new_section
+        else:
+            content = "# Memory\n" + new_section
+        memory_md.write_text(content, encoding="utf-8")
+    def fingerprint(self) -> str:
+        """Hash of all .md filenames + mtimes for fast change detection."""
+        if not self._memory_dir.exists():
+            return "empty"
+        parts: list[str] = []
+        for md_file in sorted(self._memory_dir.glob("*.md")):
+            try:
+                stat = md_file.stat()
+                parts.append(f"{md_file.name}:{stat.st_mtime_ns}")
+            except OSError:
+                continue
+        if not parts:
+            return "empty"
+        return hashlib.sha256("|".join(parts).encode()).hexdigest()[:16]
+def get_claude_memory_dir(project_path: Path | None = None) -> Path:
+    """Get the Claude Code memory directory for a project.
+    Claude Code stores per-project memory at:
+        ~/.claude/projects/-<sanitized-path>/memory/
+    """
+    project = project_path or Path.cwd()
+    # Replace both Unix and Windows path separators (Claude Code does the same)
+    sanitized = str(project).replace("/", "-").replace("\\", "-")
+    return Path.home() / ".claude" / "projects" / sanitized / "memory"

headroom/memory/sync_adapters/codex_agent.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""Codex CLI memory sync adapter.
+Syncs memories to/from a headroom-managed section in AGENTS.md.
+Codex reads AGENTS.md automatically before every task.
+Note: Codex primarily uses the MCP server for memory (memory_search/save).
+This adapter provides supplementary context injection via AGENTS.md so
+Codex has key memories even without explicit tool calls.
+Format in AGENTS.md:
+    <!-- headroom:memory:start -->
+    ## Headroom Shared Memory
+    - fact 1
+    - fact 2
+    <!-- headroom:memory:end -->
+"""
+from __future__ import annotations
+import hashlib
+import re
+from pathlib import Path
+from typing import Any
+from headroom.memory.sync import AgentMemory, AgentMemoryAdapter
+_MARKER_START = "<!-- headroom:memory:start -->"
+_MARKER_END = "<!-- headroom:memory:end -->"
+_MARKER_PATTERN = re.compile(
+    re.escape(_MARKER_START) + r"(.*?)" + re.escape(_MARKER_END),
+    re.DOTALL,
+)
+class CodexAdapter(AgentMemoryAdapter):
+    """Sync adapter for Codex's AGENTS.md."""
+    agent_name = "codex"
+    def __init__(self, agents_md_path: Path | str | None = None) -> None:
+        self._path = Path(agents_md_path) if agents_md_path else Path.cwd() / "AGENTS.md"
+    async def read_memories(self) -> list[AgentMemory]:
+        """Read memories from the headroom section of AGENTS.md."""
+        if not self._path.exists():
+            return []
+        content = self._path.read_text(encoding="utf-8")
+        match = _MARKER_PATTERN.search(content)
+        if not match:
+            return []
+        section = match.group(1).strip()
+        memories: list[AgentMemory] = []
+        for line in section.split("\n"):
+            line = line.strip()
+            if line.startswith("- "):
+                fact = line[2:].strip()
+                if fact:
+                    memories.append(
+                        AgentMemory(
+                            content=fact,
+                            source_file=self._path.name,
+                        )
+                    )
+        return memories
+    async def write_memories(self, memories: list[dict[str, Any]]) -> int:
+        """Write memories into the headroom section of AGENTS.md."""
+        if not memories:
+            return 0
+        # Build section content
+        lines = ["## Headroom Shared Memory", ""]
+        for mem in memories:
+            content = mem["content"].split("\n")[0].strip()  # First line only
+            lines.append(f"- {content}")
+        lines.append("")
+        section = f"{_MARKER_START}\n" + "\n".join(lines) + f"{_MARKER_END}"
+        # Merge into AGENTS.md
+        if self._path.exists():
+            content = self._path.read_text(encoding="utf-8")
+            if _MARKER_START in content:
+                content = _MARKER_PATTERN.sub(section, content)
+            else:
+                content = content.rstrip() + "\n\n" + section + "\n"
+        else:
+            self._path.parent.mkdir(parents=True, exist_ok=True)
+            content = section + "\n"
+        self._path.write_text(content, encoding="utf-8")
+        return len(memories)
+    def fingerprint(self) -> str:
+        """Hash of AGENTS.md mtime."""
+        if not self._path.exists():
+            return "empty"
+        try:
+            stat = self._path.stat()
+            return hashlib.sha256(f"{self._path.name}:{stat.st_mtime_ns}".encode()).hexdigest()[:16]
+        except OSError:
+            return "error"

headroom/memory/writers/claude_writer.py CHANGED Viewed

@@ -64,7 +64,8 @@ class ClaudeCodeMemoryWriter(AgentWriter):
         project_path = self._project_path
         # Claude Code stores per-project memory at:
         # ~/.claude/projects/-<sanitized-path>/memory/MEMORY.md
-        sanitized = str(project_path).replace("/", "-")
         claude_memory_dir = Path.home() / ".claude" / "projects" / sanitized / "memory"
         return claude_memory_dir / "MEMORY.md"

         project_path = self._project_path
         # Claude Code stores per-project memory at:
         # ~/.claude/projects/-<sanitized-path>/memory/MEMORY.md
+        # Replace both Unix and Windows path separators
+        sanitized = str(project_path).replace("/", "-").replace("\\", "-")
         claude_memory_dir = Path.home() / ".claude" / "projects" / sanitized / "memory"
         return claude_memory_dir / "MEMORY.md"

headroom/proxy/handlers/openai.py CHANGED Viewed

@@ -672,7 +672,9 @@ class OpenAIHandlerMixin:
                         uncached_tokens=uncached_input_tokens,
                     )
-                # Memory: handle memory tool calls in OpenAI response
                 if (
                     self.memory_handler
                     and memory_user_id
@@ -684,10 +686,29 @@ class OpenAIHandlerMixin:
                         tool_results = await self.memory_handler.handle_memory_tool_calls(
                             resp_json, memory_user_id, "openai"
                         )
-                        logger.info(
-                            f"[{request_id}] Memory: Handled {len(tool_results)} "
-                            f"tool call(s) for user {memory_user_id}"
-                        )
                     except Exception as e:
                         logger.warning(f"[{request_id}] Memory tool handling failed: {e}")
@@ -963,14 +984,29 @@ class OpenAIHandlerMixin:
                             f"of context into instructions for user {memory_user_id}"
                         )
-                # Inject memory tools
                 if self.memory_handler.config.inject_tools:
                     resp_tools = body.get("tools") or []
                     resp_tools, mem_tools_injected = self.memory_handler.inject_tools(
                         resp_tools, "openai"
                     )
                     if mem_tools_injected:
-                        body["tools"] = resp_tools
                         logger.info(
                             f"[{request_id}] Memory: Injected memory tools (openai/responses)"
                         )
@@ -1037,13 +1073,67 @@ class OpenAIHandlerMixin:
                     and self.memory_handler.has_memory_tool_calls(resp_json, "openai")
                 ):
                     try:
-                        tool_results = await self.memory_handler.handle_memory_tool_calls(
-                            resp_json, memory_user_id, "openai"
-                        )
-                        logger.info(
-                            f"[{request_id}] Memory: Handled {len(tool_results)} "
-                            f"tool call(s) for user {memory_user_id} (responses)"
-                        )
                     except Exception as e:
                         logger.warning(
                             f"[{request_id}] Memory tool handling failed (responses): {e}"
@@ -1279,6 +1369,116 @@ class OpenAIHandlerMixin:
                 # Not JSON — pass through as-is
                 tokens_saved = 0
             # --- Connect to upstream OpenAI WebSocket ---
             logger.info(f"[{request_id}] WS /v1/responses connecting to {upstream_url}")
@@ -1303,7 +1503,7 @@ class OpenAIHandlerMixin:
                     # Send (potentially compressed) first message
                     await upstream.send(first_msg_raw)
-                    # Bidirectional relay
                     async def _client_to_upstream() -> None:
                         try:
                             while True:
@@ -1318,14 +1518,164 @@ class OpenAIHandlerMixin:
                                 await upstream.close()
                     async def _upstream_to_client() -> None:
                         try:
                             async for msg in upstream:
-                                if isinstance(msg, str):
-                                    await websocket.send_text(msg)
-                                elif isinstance(msg, bytes):
                                     await websocket.send_bytes(msg)
-                                else:
-                                    await websocket.send_text(str(msg))
                         except Exception as relay_err:
                             if "WebSocketDisconnect" not in type(relay_err).__name__:
                                 logger.debug(

                         uncached_tokens=uncached_input_tokens,
                     )
+                # Memory: handle memory tool calls in OpenAI Chat Completions response.
+                # After executing tools, send a continuation request so the model
+                # can produce a final user-facing response (not just tool_calls).
                 if (
                     self.memory_handler
                     and memory_user_id
                         tool_results = await self.memory_handler.handle_memory_tool_calls(
                             resp_json, memory_user_id, "openai"
                         )
+                        if tool_results:
+                            # Build continuation: original messages + assistant tool_calls + tool results
+                            assistant_msg = resp_json.get("choices", [{}])[0].get("message", {})
+                            continuation_messages = list(optimized_messages)
+                            continuation_messages.append(assistant_msg)
+                            continuation_messages.extend(tool_results)
+                            continuation_body = {
+                                **body,
+                                "messages": continuation_messages,
+                            }
+                            cont_response = await self._retry_request(
+                                "POST", url, headers, continuation_body
+                            )
+                            if cont_response.status_code == 200:
+                                resp_json = cont_response.json()
+                                response = cont_response
+                            logger.info(
+                                f"[{request_id}] Memory: Handled {len(tool_results)} "
+                                f"tool call(s) with continuation for user {memory_user_id}"
+                            )
                     except Exception as e:
                         logger.warning(f"[{request_id}] Memory tool handling failed: {e}")
                             f"of context into instructions for user {memory_user_id}"
                         )
+                # Inject memory tools (Responses API format)
                 if self.memory_handler.config.inject_tools:
                     resp_tools = body.get("tools") or []
                     resp_tools, mem_tools_injected = self.memory_handler.inject_tools(
                         resp_tools, "openai"
                     )
                     if mem_tools_injected:
+                        # Convert Chat Completions format to Responses API format
+                        converted_tools = []
+                        for t in resp_tools:
+                            if t.get("type") == "function" and "function" in t:
+                                fn = t["function"]
+                                converted_tools.append(
+                                    {
+                                        "type": "function",
+                                        "name": fn.get("name"),
+                                        "description": fn.get("description", ""),
+                                        "parameters": fn.get("parameters", {}),
+                                    }
+                                )
+                            else:
+                                converted_tools.append(t)
+                        body["tools"] = converted_tools
                         logger.info(
                             f"[{request_id}] Memory: Injected memory tools (openai/responses)"
                         )
                     and self.memory_handler.has_memory_tool_calls(resp_json, "openai")
                 ):
                     try:
+                        # Extract function_call items from output
+                        from headroom.proxy.memory_handler import MEMORY_TOOL_NAMES
+                        output_items = resp_json.get("output", [])
+                        memory_fc_items = [
+                            item
+                            for item in output_items
+                            if isinstance(item, dict)
+                            and item.get("type") == "function_call"
+                            and item.get("name") in MEMORY_TOOL_NAMES
+                        ]
+                        # Execute memory tool calls
+                        tool_outputs: list[dict[str, Any]] = []
+                        for fc in memory_fc_items:
+                            call_id = fc.get("call_id", fc.get("id", ""))
+                            name = fc.get("name", "")
+                            args_str = fc.get("arguments", "{}")
+                            try:
+                                args = json.loads(args_str)
+                            except json.JSONDecodeError:
+                                args = {}
+                            await self.memory_handler._ensure_initialized()
+                            if self.memory_handler._backend:
+                                result = await self.memory_handler._execute_memory_tool(
+                                    name, args, memory_user_id, "openai"
+                                )
+                            else:
+                                result = json.dumps({"error": "Memory backend not initialized"})
+                            tool_outputs.append(
+                                {
+                                    "type": "function_call_output",
+                                    "call_id": call_id,
+                                    "output": result,
+                                }
+                            )
+                        if tool_outputs:
+                            # Make continuation request with tool results
+                            response_id = resp_json.get("id")
+                            continuation_body = {
+                                "model": model,
+                                "input": tool_outputs,
+                            }
+                            if response_id:
+                                continuation_body["previous_response_id"] = response_id
+                            existing_tools = body.get("tools")
+                            if existing_tools:
+                                continuation_body["tools"] = existing_tools
+                            cont_response = await self._retry_request(
+                                "POST", url, headers, continuation_body
+                            )
+                            resp_json = cont_response.json()
+                            response = cont_response
+                            logger.info(
+                                f"[{request_id}] Memory: Handled {len(tool_outputs)} "
+                                f"tool call(s) with continuation for user {memory_user_id} (responses)"
+                            )
                     except Exception as e:
                         logger.warning(
                             f"[{request_id}] Memory tool handling failed (responses): {e}"
                 # Not JSON — pass through as-is
                 tokens_saved = 0
+            # --- Memory: inject context, tools, and instructions ---
+            memory_user_id: str | None = None
+            if self.memory_handler and body:
+                memory_user_id = ws_headers.get(
+                    "x-headroom-user-id",
+                    os.environ.get("USER", os.environ.get("USERNAME", "default")),
+                )
+                try:
+                    # Unwrap response.create envelope to access the response body
+                    ws_response_body = body.get("response", body)
+                    # Debug: log what Codex sends so we can see the full tool list
+                    existing_tool_names = [
+                        t.get("name") or t.get("function", {}).get("name", "?")
+                        for t in (ws_response_body.get("tools") or [])
+                    ]
+                    instr_preview = (ws_response_body.get("instructions") or "")[:200]
+                    logger.info(
+                        f"[{request_id}] WS Memory: Codex tools={existing_tool_names}, "
+                        f"instructions_len={len(ws_response_body.get('instructions') or '')}, "
+                        f"instructions_preview={instr_preview!r}"
+                    )
+                    # Inject memory context into instructions
+                    if self.memory_handler.config.inject_context:
+                        ws_input = ws_response_body.get("input", "")
+                        ws_instructions = ws_response_body.get("instructions")
+                        ws_msgs: list[dict[str, Any]] = []
+                        if ws_instructions:
+                            ws_msgs.append({"role": "system", "content": ws_instructions})
+                        if isinstance(ws_input, str) and ws_input:
+                            ws_msgs.append({"role": "user", "content": ws_input})
+                        elif isinstance(ws_input, list):
+                            from headroom.proxy.responses_converter import (
+                                responses_items_to_messages,
+                            )
+                            converted_msgs, _ = responses_items_to_messages(ws_input)
+                            ws_msgs.extend(converted_msgs)
+                        memory_context = await self.memory_handler.search_and_format_context(
+                            memory_user_id, ws_msgs
+                        )
+                        if memory_context:
+                            existing = ws_response_body.get("instructions") or ""
+                            if existing:
+                                ws_response_body["instructions"] = f"{existing}\n\n{memory_context}"
+                            else:
+                                ws_response_body["instructions"] = memory_context
+                            logger.info(
+                                f"[{request_id}] WS Memory: Injected {len(memory_context)} chars "
+                                f"of context into instructions"
+                            )
+                    # Inject memory tools (Responses API format)
+                    if self.memory_handler.config.inject_tools:
+                        ws_tools = ws_response_body.get("tools") or []
+                        ws_tools, mem_injected = self.memory_handler.inject_tools(
+                            ws_tools, "openai"
+                        )
+                        if mem_injected:
+                            converted_tools = []
+                            for t in ws_tools:
+                                if t.get("type") == "function" and "function" in t:
+                                    fn = t["function"]
+                                    converted_tools.append(
+                                        {
+                                            "type": "function",
+                                            "name": fn.get("name"),
+                                            "description": fn.get("description", ""),
+                                            "parameters": fn.get("parameters", {}),
+                                        }
+                                    )
+                                else:
+                                    converted_tools.append(t)
+                            ws_response_body["tools"] = converted_tools
+                            # Add memory instruction so the model uses
+                            # memory tools as persistent cross-session knowledge.
+                            mem_instruction = (
+                                "\n\n## Memory\n"
+                                "You have persistent memory via memory_search and "
+                                "memory_save tools. Memory stores knowledge across "
+                                "sessions — user info, project details, org context, "
+                                "decisions, architecture, conventions, anything worth "
+                                "remembering.\n\n"
+                                "- ALWAYS call memory_search BEFORE searching files "
+                                "when the user asks a question that could be answered "
+                                "from prior knowledge.\n"
+                                "- Call memory_save to store important facts, decisions, "
+                                "or context that would be useful in future sessions.\n"
+                                "- Memory is your first source of truth for anything "
+                                "not visible in the current conversation."
+                            )
+                            existing_instr = ws_response_body.get("instructions") or ""
+                            ws_response_body["instructions"] = existing_instr + mem_instruction
+                            logger.info(
+                                f"[{request_id}] WS Memory: Injected memory tools + instruction"
+                            )
+                    # Write back into envelope if it was wrapped
+                    if "response" in body and isinstance(body["response"], dict):
+                        body["response"] = ws_response_body
+                    else:
+                        body = ws_response_body
+                    first_msg_raw = json.dumps(body)
+                except Exception as e:
+                    logger.warning(f"[{request_id}] WS Memory injection failed: {e}")
             # --- Connect to upstream OpenAI WebSocket ---
             logger.info(f"[{request_id}] WS /v1/responses connecting to {upstream_url}")
                     # Send (potentially compressed) first message
                     await upstream.send(first_msg_raw)
+                    # Bidirectional relay with memory tool interception
                     async def _client_to_upstream() -> None:
                         try:
                             while True:
                                 await upstream.close()
                     async def _upstream_to_client() -> None:
+                        """Relay upstream→client with transparent memory tool handling.
+                        Uses a buffer-then-decide approach:
+                        1. Buffer events until first output item arrives
+                        2. If first output is a memory tool → suppress entire response,
+                           execute tools silently, send continuation upstream
+                        3. If first output is non-memory → flush buffer, stream normally
+                        4. Continuation response events are relayed to Codex seamlessly
+                        This prevents orphaned response.created events from confusing Codex.
+                        """
+                        from headroom.proxy.memory_handler import MEMORY_TOOL_NAMES
+                        memory_enabled = bool(self.memory_handler and memory_user_id)
+                        # Per-response state (reset after each response.completed)
+                        event_buffer: list[str] = []
+                        decided = False
+                        suppress_response = False
+                        pending_fcs: list[dict[str, Any]] = []
+                        resp_id: str | None = None
+                        def _reset() -> None:
+                            nonlocal decided, suppress_response, resp_id
+                            event_buffer.clear()
+                            decided = False
+                            suppress_response = False
+                            pending_fcs.clear()
+                            resp_id = None
                         try:
                             async for msg in upstream:
+                                if isinstance(msg, bytes):
                                     await websocket.send_bytes(msg)
+                                    continue
+                                msg_str = msg if isinstance(msg, str) else str(msg)
+                                if not memory_enabled:
+                                    await websocket.send_text(msg_str)
+                                    continue
+                                # Parse event
+                                try:
+                                    event = json.loads(msg_str)
+                                except (json.JSONDecodeError, TypeError):
+                                    await websocket.send_text(msg_str)
+                                    continue
+                                event_type = event.get("type", "")
+                                # --- Phase 1: Buffer until first output item ---
+                                if not decided:
+                                    event_buffer.append(msg_str)
+                                    if event_type == "response.output_item.added":
+                                        item = event.get("item", {})
+                                        if (
+                                            item.get("type") == "function_call"
+                                            and item.get("name") in MEMORY_TOOL_NAMES
+                                        ):
+                                            # Memory tool first → suppress entire response
+                                            suppress_response = True
+                                            decided = True
+                                            event_buffer.clear()
+                                            logger.info(
+                                                f"[{request_id}] WS Memory: Detected "
+                                                f"{item.get('name')} — suppressing response"
+                                            )
+                                        else:
+                                            # Non-memory first → flush buffer, pass through
+                                            decided = True
+                                            for buf in event_buffer:
+                                                await websocket.send_text(buf)
+                                            event_buffer.clear()
+                                    elif event_type == "response.completed":
+                                        # No output items at all — flush
+                                        decided = True
+                                        for buf in event_buffer:
+                                            await websocket.send_text(buf)
+                                        event_buffer.clear()
+                                        _reset()
+                                    continue
+                                # --- Phase 2a: Suppress mode (memory response) ---
+                                if suppress_response:
+                                    if event_type == "response.output_item.done":
+                                        item = event.get("item", {})
+                                        if (
+                                            item.get("type") == "function_call"
+                                            and item.get("name") in MEMORY_TOOL_NAMES
+                                        ):
+                                            pending_fcs.append(item)
+                                    elif event_type == "response.completed":
+                                        resp = event.get("response", {})
+                                        resp_id = resp.get("id")
+                                        if pending_fcs:
+                                            logger.info(
+                                                f"[{request_id}] WS Memory: Executing "
+                                                f"{len(pending_fcs)} tool(s) transparently"
+                                            )
+                                            # Execute memory tool calls
+                                            tool_outputs: list[dict[str, Any]] = []
+                                            for fc in pending_fcs:
+                                                call_id = fc.get("call_id", fc.get("id", ""))
+                                                fc_name = fc.get("name", "")
+                                                args_str = fc.get("arguments", "{}")
+                                                try:
+                                                    fc_args = json.loads(args_str)
+                                                except json.JSONDecodeError:
+                                                    fc_args = {}
+                                                await self.memory_handler._ensure_initialized()
+                                                if self.memory_handler._backend:
+                                                    result = await self.memory_handler._execute_memory_tool(
+                                                        fc_name, fc_args, memory_user_id, "openai"
+                                                    )
+                                                else:
+                                                    result = json.dumps(
+                                                        {"error": "backend not ready"}
+                                                    )
+                                                tool_outputs.append(
+                                                    {
+                                                        "type": "function_call_output",
+                                                        "call_id": call_id,
+                                                        "output": result,
+                                                    }
+                                                )
+                                                logger.info(
+                                                    f"[{request_id}] WS Memory: Executed "
+                                                    f"{fc_name} for user {memory_user_id}"
+                                                )
+                                            # Send continuation upstream
+                                            cont: dict[str, Any] = {
+                                                "type": "response.create",
+                                                "response": {"input": tool_outputs},
+                                            }
+                                            if resp_id:
+                                                cont["response"]["previous_response_id"] = resp_id
+                                            await upstream.send(json.dumps(cont))
+                                            logger.info(
+                                                f"[{request_id}] WS Memory: Sent continuation "
+                                                f"with {len(tool_outputs)} result(s)"
+                                            )
+                                        _reset()
+                                    # All events suppressed in this mode
+                                    continue
+                                # --- Phase 2b: Pass-through mode ---
+                                await websocket.send_text(msg_str)
                         except Exception as relay_err:
                             if "WebSocketDisconnect" not in type(relay_err).__name__:
                                 logger.debug(

headroom/proxy/handlers/streaming.py CHANGED Viewed

@@ -614,15 +614,18 @@ class StreamingMixin:
                             f"[{request_id}] Memory: Detected tool calls in streaming response"
                         )
-                        # Execute memory tool calls silently — response already
-                        # streamed so we cannot make a continuation request.
                         tool_results = await self.memory_handler.handle_memory_tool_calls(
                             parsed_response, memory_user_id, provider
                         )
                         if tool_results:
                             logger.info(
-                                f"[{request_id}] Memory: Tool calls executed silently "
-                                "(streaming mode — no continuation)"
                             )
                 # CCR Feedback: Record headroom_retrieve tool calls for TOIN learning.

                             f"[{request_id}] Memory: Detected tool calls in streaming response"
                         )
+                        # Execute memory tool calls — response already streamed
+                        # so results are saved but continuation is not possible
+                        # in SSE streaming mode. The WS and non-streaming paths
+                        # handle continuation properly.
                         tool_results = await self.memory_handler.handle_memory_tool_calls(
                             parsed_response, memory_user_id, provider
                         )
                         if tool_results:
                             logger.info(
+                                f"[{request_id}] Memory: Tool calls executed "
+                                f"({len(tool_results)} results saved, SSE streaming — "
+                                "continuation handled by client)"
                             )
                 # CCR Feedback: Record headroom_retrieve tool calls for TOIN learning.

headroom/proxy/memory_handler.py CHANGED Viewed

@@ -380,7 +380,14 @@ class MemoryHandler:
                     entities_str = ", ".join(result.related_entities[:3])
                     memory_lines.append(f"   (Related: {entities_str})")
-            context = f"""## Relevant Memories for This User
 The following information was previously saved about this user:
@@ -388,15 +395,11 @@ The following information was previously saved about this user:
 Use this context to provide personalized and contextually relevant responses."""
-            logger.info(
-                f"Memory: Injecting {len(filtered_results)} memories "
-                f"({len(context)} chars) for user {user_id}"
-            )
-            return context
-        except Exception as e:
-            logger.warning(f"Memory: Search failed for user {user_id}: {e}")
-            return None
     def _extract_user_query(self, messages: list[dict[str, Any]]) -> str:
         """Extract the user query from the last user message."""
@@ -445,10 +448,25 @@ Use this context to provide personalized and contextually relevant responses."""
             return []
         elif provider == "openai":
             choices = response.get("choices", [])
             if choices:
                 message = choices[0].get("message", {})
-                return list(message.get("tool_calls", []) or [])
             return []
         return []
@@ -474,13 +492,15 @@ Use this context to provide personalized and contextually relevant responses."""
         for tc in tool_calls:
             tool_name = tc.get("name") or tc.get("function", {}).get("name")
-            tool_id = tc.get("id", "")
             # Parse input data
             if provider == "anthropic":
                 input_data = tc.get("input", {})
             else:
-                args_str = tc.get("function", {}).get("arguments", "{}")
                 try:
                     input_data = json.loads(args_str)
                 except json.JSONDecodeError:

                     entities_str = ", ".join(result.related_entities[:3])
                     memory_lines.append(f"   (Related: {entities_str})")
+        except Exception as e:
+            logger.warning(f"Memory: Search failed for user {user_id}: {e}")
+            return None
+        if not memory_lines:
+            return None
+        context = f"""## Relevant Memories for This User
 The following information was previously saved about this user:
 Use this context to provide personalized and contextually relevant responses."""
+        logger.info(
+            f"Memory: Injecting {len(memory_lines)} memories "
+            f"({len(context)} chars) for user {user_id}"
+        )
+        return context
     def _extract_user_query(self, messages: list[dict[str, Any]]) -> str:
         """Extract the user query from the last user message."""
             return []
         elif provider == "openai":
+            # Chat Completions format: choices[0].message.tool_calls
             choices = response.get("choices", [])
             if choices:
                 message = choices[0].get("message", {})
+                tc_list = list(message.get("tool_calls", []) or [])
+                if tc_list:
+                    return tc_list
+            # Responses API format: output[] with type=function_call
+            output = response.get("output", [])
+            if isinstance(output, list):
+                fc_items = [
+                    item
+                    for item in output
+                    if isinstance(item, dict) and item.get("type") == "function_call"
+                ]
+                if fc_items:
+                    return fc_items
             return []
         return []
         for tc in tool_calls:
             tool_name = tc.get("name") or tc.get("function", {}).get("name")
+            tool_id = tc.get("id") or tc.get("call_id", "")
             # Parse input data
             if provider == "anthropic":
                 input_data = tc.get("input", {})
             else:
+                # Chat Completions format: function.arguments
+                # Responses API format: arguments (top-level string)
+                args_str = tc.get("arguments") or tc.get("function", {}).get("arguments") or "{}"
                 try:
                     input_data = json.loads(args_str)
                 except json.JSONDecodeError:

headroom/proxy/models.py CHANGED Viewed

@@ -207,3 +207,6 @@ class ProxyConfig:
     subscription_tracking_enabled: bool = True
     subscription_poll_interval_s: int = 10
     subscription_active_window_s: int = 60

     subscription_tracking_enabled: bool = True
     subscription_poll_interval_s: int = 10
     subscription_active_window_s: int = 60
+    # Stateless mode — disable all filesystem writes for read-only / container deployments
+    stateless: bool = False

headroom/proxy/request_logger.py CHANGED Viewed

@@ -8,6 +8,7 @@ Extracted from server.py for maintainability.
 from __future__ import annotations
 import json
 import sys
 from collections import deque
 from dataclasses import asdict
@@ -19,11 +20,15 @@ if TYPE_CHECKING:
 from headroom.proxy.models import RequestLog
 class RequestLogger:
     """Log requests to JSONL file.
     Uses a deque with max 10,000 entries to prevent unbounded memory growth.
     """
     MAX_LOG_ENTRIES = 10_000
@@ -35,19 +40,30 @@ class RequestLogger:
         self._logs: deque[RequestLog] = deque(maxlen=self.MAX_LOG_ENTRIES)
         if self.log_file:
-            self.log_file.parent.mkdir(parents=True, exist_ok=True)
     def log(self, entry: RequestLog):
         """Log a request. Oldest entries are automatically removed when limit reached."""
         self._logs.append(entry)
         if self.log_file:
-            with open(self.log_file, "a") as f:
-                log_dict = asdict(entry)
-                if not self.log_full_messages:
-                    log_dict.pop("request_messages", None)
-                    log_dict.pop("response_content", None)
-                f.write(json.dumps(log_dict) + "\n")
     def get_recent(self, n: int = 100) -> list[dict]:
         """Get recent log entries."""

 from __future__ import annotations
 import json
+import logging
 import sys
 from collections import deque
 from dataclasses import asdict
 from headroom.proxy.models import RequestLog
+logger = logging.getLogger(__name__)
 class RequestLogger:
     """Log requests to JSONL file.
     Uses a deque with max 10,000 entries to prevent unbounded memory growth.
+    Gracefully degrades to in-memory-only if the log file cannot be written
+    (read-only filesystem, permissions error, etc.).
     """
     MAX_LOG_ENTRIES = 10_000
         self._logs: deque[RequestLog] = deque(maxlen=self.MAX_LOG_ENTRIES)
         if self.log_file:
+            try:
+                self.log_file.parent.mkdir(parents=True, exist_ok=True)
+            except OSError as e:
+                logger.warning(
+                    "Cannot create log directory %s: %s — logging to memory only",
+                    self.log_file.parent,
+                    e,
+                )
+                self.log_file = None
     def log(self, entry: RequestLog):
         """Log a request. Oldest entries are automatically removed when limit reached."""
         self._logs.append(entry)
         if self.log_file:
+            try:
+                with open(self.log_file, "a") as f:
+                    log_dict = asdict(entry)
+                    if not self.log_full_messages:
+                        log_dict.pop("request_messages", None)
+                        log_dict.pop("response_content", None)
+                    f.write(json.dumps(log_dict) + "\n")
+            except OSError:
+                pass  # Graceful degradation: memory-only logging continues
     def get_recent(self, n: int = 100) -> list[dict]:
         """Get recent log entries."""

headroom/telemetry/toin.py CHANGED Viewed

@@ -1174,11 +1174,11 @@ class ToolIntelligenceNetwork:
             canonical = "null"
         elif isinstance(value, bool):
             canonical = "true" if value else "false"
-        elif isinstance(value, (int, float)):
             canonical = str(value)
         elif isinstance(value, str):
             canonical = value
-        elif isinstance(value, (list, dict)):
             # For complex types, use JSON serialization
             try:
                 canonical = json.dumps(value, sort_keys=True, default=str)
@@ -1573,6 +1573,8 @@ def _create_default_toin_backend() -> Any:
     backend_type = (os.environ.get(TOIN_BACKEND_ENV_VAR) or "").strip().lower()
     if not backend_type or backend_type == "filesystem":
         return None
     try:
         from importlib.metadata import entry_points

             canonical = "null"
         elif isinstance(value, bool):
             canonical = "true" if value else "false"
+        elif isinstance(value, int | float):
             canonical = str(value)
         elif isinstance(value, str):
             canonical = value
+        elif isinstance(value, list | dict):
             # For complex types, use JSON serialization
             try:
                 canonical = json.dumps(value, sort_keys=True, default=str)
     backend_type = (os.environ.get(TOIN_BACKEND_ENV_VAR) or "").strip().lower()
     if not backend_type or backend_type == "filesystem":
         return None
+    if backend_type == "none":
+        return None  # Explicit in-memory-only (e.g. --stateless mode)
     try:
         from importlib.metadata import entry_points

headroom/transforms/kompress_compressor.py CHANGED Viewed

@@ -25,12 +25,12 @@ from .base import Transform
 logger = logging.getLogger(__name__)
-# HuggingFace model ID
 HF_MODEL_ID = "chopratejas/kompress-base"
-# Lazy singleton
-_kompress_model = None
-_kompress_tokenizer = None
 _kompress_lock = threading.Lock()
@@ -132,9 +132,6 @@ def _get_model_class() -> type:
 # ── Model Loading ─────────────────────────────────────────────────────
-# Backend tag: "onnx" or "pytorch"
-_kompress_backend: str | None = None
 class _OnnxModel:
     """Thin wrapper so ONNX session has the same interface as PyTorch model."""
@@ -163,48 +160,42 @@ class _OnnxModel:
         return (np.array(scores) > 0.5).tolist()
-def _load_kompress_onnx() -> tuple[Any, Any]:
     """Download ONNX INT8 model from HuggingFace and load with onnxruntime."""
     import onnxruntime as ort
     from transformers import AutoTokenizer
-    global _kompress_model, _kompress_tokenizer, _kompress_backend
     with _kompress_lock:
-        if _kompress_model is not None:
-            return _kompress_model, _kompress_tokenizer
         from huggingface_hub import hf_hub_download
-        logger.info("Downloading Kompress ONNX model from %s ...", HF_MODEL_ID)
-        onnx_path = hf_hub_download(HF_MODEL_ID, "onnx/kompress-int8.onnx")
         session = ort.InferenceSession(onnx_path)
         model = _OnnxModel(session)
         tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
-        _kompress_model = model
-        _kompress_tokenizer = tokenizer
-        _kompress_backend = "onnx"
-        logger.info("Kompress ONNX INT8 loaded (no torch dependency)")
-        return model, tokenizer
-def _load_kompress_pytorch(device: str = "auto") -> tuple[Any, Any]:
     """Download PyTorch model from HuggingFace and load with torch."""
     import torch
     from transformers import AutoTokenizer
-    global _kompress_model, _kompress_tokenizer, _kompress_backend
     with _kompress_lock:
-        if _kompress_model is not None:
-            return _kompress_model, _kompress_tokenizer
         from huggingface_hub import hf_hub_download
-        logger.info("Downloading Kompress PyTorch model from %s ...", HF_MODEL_ID)
-        weights_path = hf_hub_download(HF_MODEL_ID, "model.safetensors")
         HeadroomCompressorModel = _get_model_class()
         model = HeadroomCompressorModel()
@@ -227,50 +218,60 @@ def _load_kompress_pytorch(device: str = "auto") -> tuple[Any, Any]:
         tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
-        _kompress_model = model
-        _kompress_tokenizer = tokenizer
-        _kompress_backend = "pytorch"
-        logger.info("Kompress PyTorch loaded on %s (%s)", device, HF_MODEL_ID)
-        return model, tokenizer
-def _load_kompress(device: str = "auto") -> tuple[Any, Any]:
-    """Load Kompress model: try ONNX first (lightweight), fall back to PyTorch."""
-    global _kompress_model
-    if _kompress_model is not None:
-        return _kompress_model, _kompress_tokenizer
     # Prefer ONNX (50MB onnxruntime vs 800MB torch)
     if _is_onnx_available():
         try:
-            return _load_kompress_onnx()
         except Exception as e:
-            logger.warning("ONNX load failed, trying PyTorch: %s", e)
     if _is_pytorch_available():
-        return _load_kompress_pytorch(device)
     raise ImportError(
         "Kompress requires onnxruntime or torch. Install with: pip install headroom-ai[proxy]"
     )
-def unload_kompress_model() -> bool:
-    """Unload the Kompress model to free memory."""
-    global _kompress_model, _kompress_tokenizer
     with _kompress_lock:
-        if _kompress_model is not None:
-            _kompress_model = None
-            _kompress_tokenizer = None
-            try:
-                import torch
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-            except ImportError:
-                pass
-            return True
-    return False
 # ── Compressor ────────────────────────────────────────────────────────
@@ -278,10 +279,26 @@ def unload_kompress_model() -> bool:
 @dataclass
 class KompressConfig:
-    """Minimal config. The model decides what's important — not us."""
     device: str = "auto"
     enable_ccr: bool = True
 @dataclass
@@ -308,9 +325,10 @@ class KompressResult:
 class KompressCompressor(Transform):
-    """Kompress: ModernBERT token compressor for structured tool outputs.
-    Auto-downloads chopratejas/kompress-base from HuggingFace on first use.
     """
     name: str = "kompress_compressor"
@@ -347,11 +365,10 @@ class KompressCompressor(Transform):
             return self._passthrough(content, n_words)
         try:
-            model, tokenizer = _load_kompress(self.config.device)
-            is_onnx = _kompress_backend == "onnx"
-            # Chunk at 512 tokens ≈ 350 words (matches training max_length)
-            max_chunk_words = 350
             kept_ids: set[int] = set()
             for chunk_start in range(0, n_words, max_chunk_words):
@@ -423,6 +440,7 @@ class KompressCompressor(Transform):
                 original_tokens=n_words,
                 compressed_tokens=compressed_count,
                 compression_ratio=ratio,
             )
             # CCR marker
@@ -542,7 +560,7 @@ class KompressCompressor(Transform):
         word_lists: list[list[str]] = [c.split() for c in contents]
         # Short texts short-circuit to passthrough — no model call needed.
-        max_chunk_words = 350
         chunk_queue: list[tuple[int, int, list[str], float | None]] = []
         for i, (words, ratio) in enumerate(zip(word_lists, ratios, strict=True)):
             if len(words) < 10:
@@ -558,7 +576,7 @@ class KompressCompressor(Transform):
         # Load model once for the whole batch.
         try:
-            model, tokenizer = _load_kompress(self.config.device)
         except Exception as e:
             logger.warning("Kompress load failed for batch: %s — passthrough all", e)
             for i in range(n):
@@ -566,7 +584,7 @@ class KompressCompressor(Transform):
                     results[i] = self._passthrough(contents[i], len(word_lists[i]))
             return [r for r in results if r is not None]
-        is_onnx = _kompress_backend == "onnx"
         kept_ids_per_text: dict[int, set[int]] = {i: set() for i in range(n) if results[i] is None}
         for batch_start in range(0, len(chunk_queue), batch_size):
@@ -620,9 +638,9 @@ class KompressCompressor(Transform):
                         for wid in sorted_wids[:num_keep]:
                             kept_ids_per_text[text_idx].add(wid + chunk_start)
                     else:
-                        # Threshold at 0.5 (matches ONNX get_keep_mask behavior).
                         for wid, score in word_scores.items():
-                            if score > 0.5:
                                 kept_ids_per_text[text_idx].add(wid + chunk_start)
             except Exception as e:
@@ -659,6 +677,7 @@ class KompressCompressor(Transform):
                 original_tokens=n_words,
                 compressed_tokens=compressed_count,
                 compression_ratio=comp_ratio,
             )
             if self.config.enable_ccr and comp_ratio < 0.8:
@@ -692,28 +711,28 @@ class KompressCompressor(Transform):
         If the model isn't loaded yet, we trigger loading so the backend
         is known. This is a no-op if the model is already in cache.
         """
-        global _kompress_model, _kompress_backend
-        if _kompress_model is None:
             try:
-                _load_kompress(self.config.device)
             except Exception:
-                # If load fails, caller will see the error downstream.
                 return True
-        if _kompress_backend == "onnx":
             return True  # ONNX CPU provider doesn't parallelize batch dim
-        if _kompress_backend == "pytorch":
             try:
                 import torch
-                # Check the model's actual device
-                if _kompress_model is not None and hasattr(_kompress_model, "parameters"):
-                    device = next(_kompress_model.parameters()).device
-                    if device.type == "cuda":
-                        return False  # GPU benefits from batching
-                    if device.type == "mps":
-                        return False  # MPS (Apple Silicon) also benefits
-                    # Fall through for CPU
                 _ = torch
             except ImportError:
                 return True

 logger = logging.getLogger(__name__)
+# Default HuggingFace model ID
 HF_MODEL_ID = "chopratejas/kompress-base"
+# Model cache: model_id -> (model, tokenizer, backend)
+# Supports multiple models loaded simultaneously.
+_kompress_cache: dict[str, tuple[Any, Any, str]] = {}
 _kompress_lock = threading.Lock()
 # ── Model Loading ─────────────────────────────────────────────────────
 class _OnnxModel:
     """Thin wrapper so ONNX session has the same interface as PyTorch model."""
         return (np.array(scores) > 0.5).tolist()
+def _load_kompress_onnx(model_id: str) -> tuple[Any, Any, str]:
     """Download ONNX INT8 model from HuggingFace and load with onnxruntime."""
     import onnxruntime as ort
     from transformers import AutoTokenizer
     with _kompress_lock:
+        if model_id in _kompress_cache:
+            return _kompress_cache[model_id]
         from huggingface_hub import hf_hub_download
+        logger.info("Downloading Kompress ONNX model from %s ...", model_id)
+        onnx_path = hf_hub_download(model_id, "onnx/kompress-int8.onnx")
         session = ort.InferenceSession(onnx_path)
         model = _OnnxModel(session)
         tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
+        _kompress_cache[model_id] = (model, tokenizer, "onnx")
+        logger.info("Kompress ONNX INT8 loaded: %s", model_id)
+        return model, tokenizer, "onnx"
+def _load_kompress_pytorch(model_id: str, device: str = "auto") -> tuple[Any, Any, str]:
     """Download PyTorch model from HuggingFace and load with torch."""
     import torch
     from transformers import AutoTokenizer
     with _kompress_lock:
+        if model_id in _kompress_cache:
+            return _kompress_cache[model_id]
         from huggingface_hub import hf_hub_download
+        logger.info("Downloading Kompress PyTorch model from %s ...", model_id)
+        weights_path = hf_hub_download(model_id, "model.safetensors")
         HeadroomCompressorModel = _get_model_class()
         model = HeadroomCompressorModel()
         tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
+        _kompress_cache[model_id] = (model, tokenizer, "pytorch")
+        logger.info("Kompress PyTorch loaded on %s (%s)", device, model_id)
+        return model, tokenizer, "pytorch"
+def _load_kompress(model_id: str = HF_MODEL_ID, device: str = "auto") -> tuple[Any, Any, str]:
+    """Load Kompress model, returns (model, tokenizer, backend).
+    Try ONNX first (lightweight), fall back to PyTorch.
+    Models are cached by model_id — multiple models can coexist.
+    """
+    if model_id in _kompress_cache:
+        return _kompress_cache[model_id]
     # Prefer ONNX (50MB onnxruntime vs 800MB torch)
     if _is_onnx_available():
         try:
+            return _load_kompress_onnx(model_id)
         except Exception as e:
+            logger.warning("ONNX load failed for %s, trying PyTorch: %s", model_id, e)
     if _is_pytorch_available():
+        return _load_kompress_pytorch(model_id, device)
     raise ImportError(
         "Kompress requires onnxruntime or torch. Install with: pip install headroom-ai[proxy]"
     )
+def unload_kompress_model(model_id: str | None = None) -> bool:
+    """Unload Kompress model(s) to free memory.
+    Args:
+        model_id: Specific model to unload. If None, unloads all cached models.
+    """
     with _kompress_lock:
+        if model_id is not None:
+            if model_id in _kompress_cache:
+                del _kompress_cache[model_id]
+            else:
+                return False
+        elif _kompress_cache:
+            _kompress_cache.clear()
+        else:
+            return False
+        try:
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        except ImportError:
+            pass
+        return True
 # ── Compressor ────────────────────────────────────────────────────────
 @dataclass
 class KompressConfig:
+    """Configuration for Kompress compression.
+    The model_id, chunk_words, and score_threshold are coupled: a model
+    trained on 50-word chunks needs chunk_words=50 at inference. The
+    defaults match kompress-base. For domain-specific models, set all three.
+    Example — financial documents::
+        KompressConfig(
+            model_id="chopratejas/kompress-finance",
+            chunk_words=50,
+            score_threshold=0.5,
+        )
+    """
     device: str = "auto"
     enable_ccr: bool = True
+    model_id: str = HF_MODEL_ID
+    chunk_words: int = 350
+    score_threshold: float = 0.5
 @dataclass
 class KompressCompressor(Transform):
+    """Kompress: ModernBERT token compressor.
+    Auto-downloads the model from HuggingFace on first use.
+    Configure via KompressConfig to select model, chunk size, and threshold.
     """
     name: str = "kompress_compressor"
             return self._passthrough(content, n_words)
         try:
+            model, tokenizer, backend = _load_kompress(self.config.model_id, self.config.device)
+            is_onnx = backend == "onnx"
+            max_chunk_words = self.config.chunk_words
             kept_ids: set[int] = set()
             for chunk_start in range(0, n_words, max_chunk_words):
                 original_tokens=n_words,
                 compressed_tokens=compressed_count,
                 compression_ratio=ratio,
+                model_used=self.config.model_id,
             )
             # CCR marker
         word_lists: list[list[str]] = [c.split() for c in contents]
         # Short texts short-circuit to passthrough — no model call needed.
+        max_chunk_words = self.config.chunk_words
         chunk_queue: list[tuple[int, int, list[str], float | None]] = []
         for i, (words, ratio) in enumerate(zip(word_lists, ratios, strict=True)):
             if len(words) < 10:
         # Load model once for the whole batch.
         try:
+            model, tokenizer, backend = _load_kompress(self.config.model_id, self.config.device)
         except Exception as e:
             logger.warning("Kompress load failed for batch: %s — passthrough all", e)
             for i in range(n):
                     results[i] = self._passthrough(contents[i], len(word_lists[i]))
             return [r for r in results if r is not None]
+        is_onnx = backend == "onnx"
         kept_ids_per_text: dict[int, set[int]] = {i: set() for i in range(n) if results[i] is None}
         for batch_start in range(0, len(chunk_queue), batch_size):
                         for wid in sorted_wids[:num_keep]:
                             kept_ids_per_text[text_idx].add(wid + chunk_start)
                     else:
+                        # Threshold from config (default 0.5, matches ONNX get_keep_mask).
                         for wid, score in word_scores.items():
+                            if score > self.config.score_threshold:
                                 kept_ids_per_text[text_idx].add(wid + chunk_start)
             except Exception as e:
                 original_tokens=n_words,
                 compressed_tokens=compressed_count,
                 compression_ratio=comp_ratio,
+                model_used=self.config.model_id,
             )
             if self.config.enable_ccr and comp_ratio < 0.8:
         If the model isn't loaded yet, we trigger loading so the backend
         is known. This is a no-op if the model is already in cache.
         """
+        model_id = self.config.model_id
+        if model_id not in _kompress_cache:
             try:
+                _load_kompress(model_id, self.config.device)
             except Exception:
                 return True
+        if model_id not in _kompress_cache:
+            return True
+        model, _tokenizer, backend = _kompress_cache[model_id]
+        if backend == "onnx":
             return True  # ONNX CPU provider doesn't parallelize batch dim
+        if backend == "pytorch":
             try:
                 import torch
+                if hasattr(model, "parameters"):
+                    device = next(model.parameters()).device
+                    if device.type in ("cuda", "mps"):
+                        return False  # GPU/MPS benefits from batching
                 _ = torch
             except ImportError:
                 return True

headroom/transforms/smart_crusher.py CHANGED Viewed

@@ -180,27 +180,31 @@ def _hash_field_name(field_name: str) -> str:
 # Minimum chars for a text field to be worth compressing within an item
 _MIN_FIELD_CHARS_FOR_WITHIN = 200
-# Lazy-loaded compressor for within-item text compression
 _within_compressor: Any = None
 _within_compressor_checked = False
 def _get_within_compressor() -> Any:
     """Get a text compressor for within-item field compression.
     Returns Kompress if available (requires [ml] extra), else None.
     """
     global _within_compressor, _within_compressor_checked
     if not _within_compressor_checked:
-        _within_compressor_checked = True
-        try:
-            from .kompress_compressor import KompressCompressor, is_kompress_available
-            if is_kompress_available():
-                _within_compressor = KompressCompressor()
-                logger.debug("Within-item compression: using Kompress")
-        except ImportError:
-            pass
     return _within_compressor
@@ -435,7 +439,7 @@ def _detect_sequential_pattern(values: list[Any], check_order: bool = True) -> b
     # Get numeric values
     nums = []
     for v in values:
-        if isinstance(v, (int, float)) and not isinstance(v, bool):
             nums.append(v)
         elif isinstance(v, str):
             try:
@@ -546,7 +550,6 @@ def _detect_score_field_statistically(stats: FieldStats, items: list[dict]) -> t
     confidence = 0.0
     # Check for bounded range typical of scores
-    stats.max_val - stats.min_val
     min_val, max_val = stats.min_val, stats.max_val
     # Common score ranges: [0,1], [0,10], [0,100], [-1,1], [0,5]
@@ -578,7 +581,7 @@ def _detect_score_field_statistically(stats: FieldStats, items: list[dict]) -> t
     for item in items:
         if stats.name in item:
             val = item.get(stats.name)
-            if isinstance(val, (int, float)) and math.isfinite(val):
                 values_in_order.append(float(val))
     if len(values_in_order) >= 5:
         # Check for descending sort
@@ -804,11 +807,11 @@ def _detect_items_by_learned_semantics(
                 value_canonical = "null"
             elif isinstance(value, bool):
                 value_canonical = "true" if value else "false"
-            elif isinstance(value, (int, float)):
                 value_canonical = str(value)
             elif isinstance(value, str):
                 value_canonical = value
-            elif isinstance(value, (list, dict)):
                 try:
                     value_canonical = json.dumps(value, sort_keys=True, default=str)
                 except (TypeError, ValueError):
@@ -1030,7 +1033,7 @@ class SmartAnalyzer:
         first_val = non_null_values[0]
         if isinstance(first_val, bool):
             field_type = "boolean"
-        elif isinstance(first_val, (int, float)):
             field_type = "numeric"
         elif isinstance(first_val, str):
             field_type = "string"
@@ -1064,7 +1067,7 @@ class SmartAnalyzer:
         # Numeric-specific analysis
         if field_type == "numeric":
             # Filter out NaN and Infinity which break statistics functions
-            nums = [v for v in non_null_values if isinstance(v, (int, float)) and math.isfinite(v)]
             if nums:
                 try:
                     stats.min_val = min(nums)
@@ -1283,7 +1286,7 @@ class SmartAnalyzer:
                     threshold = self.config.variance_threshold * std
                     for i, item in enumerate(items):
                         val = item.get(stats.name)
-                        if isinstance(val, (int, float)):
                             if abs(val - stats.mean_val) > threshold:
                                 anomaly_indices.add(i)
@@ -1953,9 +1956,9 @@ class SmartCrusher(Transform):
         if len(keep_indices) <= effective_max:
             return keep_indices
-        # Use provided field_semantics or fall back to instance variable (set by crush())
         effective_field_semantics = field_semantics or getattr(
-            self, "_current_field_semantics", None
         )
         # Identify error items using KEYWORD detection (preservation guarantee)
@@ -1976,7 +1979,7 @@ class SmartCrusher(Transform):
                         threshold = self.config.variance_threshold * std
                         for i, item in enumerate(items):
                             val = item.get(field_name)
-                            if isinstance(val, (int, float)):
                                 if abs(val - stats.mean_val) > threshold:
                                     anomaly_indices.add(i)
@@ -2297,6 +2300,10 @@ class SmartCrusher(Transform):
         return result, was_modified, info
     def _process_value(
         self,
         value: Any,
@@ -2311,6 +2318,10 @@ class SmartCrusher(Transform):
             Tuple of (processed_value, info_string, ccr_markers).
             ccr_markers is a list of (hash, original_count, compressed_count, summary) tuples.
         """
         info_parts = []
         ccr_markers: list[tuple] = []
@@ -2495,9 +2506,12 @@ class SmartCrusher(Transform):
             )
         # === TOIN Evolution: Extract field semantics for signal detection ===
-        # Store temporarily on instance for use in _prioritize_indices
         # This enables learned signal detection without changing all method signatures
-        self._current_field_semantics = (
             toin_hint.field_semantics if toin_hint.field_semantics else None
         )
@@ -2661,12 +2675,14 @@ class SmartCrusher(Transform):
             )
             # Clean up temporary instance variable
-            self._current_field_semantics = None
             return result, strategy_info, ccr_hash, dropped_summary
         except Exception:
             # Clean up temporary instance variable
-            self._current_field_semantics = None
             # Re-raise any exceptions (removed finally block since we no longer mutate config)
             raise
@@ -2814,7 +2830,7 @@ class SmartCrusher(Transform):
             return items, "number:passthrough"
         # Filter out non-finite values for statistics
-        finite = [x for x in items if isinstance(x, (int, float)) and math.isfinite(x)]
         if not finite:
             return items, "number:no_finite"
@@ -2832,7 +2848,7 @@ class SmartCrusher(Transform):
         outlier_indices: set[int] = set()
         if std_val > 0:
             for i, val in enumerate(items):
-                if isinstance(val, (int, float)) and math.isfinite(val):
                     if abs(val - mean_val) > self.config.variance_threshold * std_val:
                         outlier_indices.add(i)
@@ -2844,12 +2860,12 @@ class SmartCrusher(Transform):
                 left = [
                     items[j]
                     for j in range(i - window, i)
-                    if isinstance(items[j], (int, float)) and math.isfinite(items[j])
                 ]
                 right = [
                     items[j]
                     for j in range(i, i + window)
-                    if isinstance(items[j], (int, float)) and math.isfinite(items[j])
                 ]
                 if left and right:
                     left_mean = statistics.mean(left)
@@ -2877,27 +2893,23 @@ class SmartCrusher(Transform):
                 if i not in keep_indices:
                     keep_indices.add(i)
-        # Build output: summary string + kept values in original order
-        stats_summary = (
-            f"[{n} numbers: min={min(finite)}, max={max(finite)}, "
-            f"mean={mean_val:.4g}, median={median_val:.4g}, "
-            f"stddev={std_val:.4g}, p25={p25:.4g}, p75={p75:.4g}"
-        )
-        if outlier_indices:
-            stats_summary += f", outliers={len(outlier_indices)}"
-        if change_indices:
-            stats_summary += f", change_points={len(change_indices)}"
-        stats_summary += "]"
         kept_values = [items[i] for i in sorted(keep_indices)]
-        result: list = [stats_summary] + kept_values
-        strategy = f"number:adaptive({n}->{len(kept_values)}"
         if outlier_indices:
             strategy += f",outliers={len(outlier_indices)}"
         strategy += ")"
-        return result, strategy
     def _crush_mixed_array(
         self,
@@ -2930,7 +2942,7 @@ class SmartCrusher(Transform):
                 key = "str"
             elif isinstance(item, bool):
                 key = "bool"
-            elif isinstance(item, (int, float)):
                 key = "number"
             elif isinstance(item, list):
                 key = "list"
@@ -2979,13 +2991,13 @@ class SmartCrusher(Transform):
                 last_idx = set(indices[-k_last:])
                 keep_indices.update(first_idx | last_idx)
                 # Outliers
-                finite = [v for v in values if isinstance(v, (int, float)) and math.isfinite(v)]
                 if len(finite) > 1:
                     mean_v = statistics.mean(finite)
                     std_v = statistics.stdev(finite)
                     if std_v > 0:
                         for idx, val in group_items:
-                            if isinstance(val, (int, float)) and math.isfinite(val):
                                 if abs(val - mean_v) > self.config.variance_threshold * std_v:
                                     keep_indices.add(idx)
                 strategy_parts.append(f"num:{len(values)}")
@@ -3553,7 +3565,7 @@ class SmartCrusher(Transform):
                     threshold = self.config.variance_threshold * std
                     for i, item in enumerate(items):
                         val = item.get(name)
-                        if isinstance(val, (int, float)):
                             if abs(val - stats.mean_val) > threshold:
                                 keep_indices.add(i)

 # Minimum chars for a text field to be worth compressing within an item
 _MIN_FIELD_CHARS_FOR_WITHIN = 200
+# Lazy-loaded compressor for within-item text compression (thread-safe)
 _within_compressor: Any = None
 _within_compressor_checked = False
+_within_compressor_lock = threading.Lock()
 def _get_within_compressor() -> Any:
     """Get a text compressor for within-item field compression.
     Returns Kompress if available (requires [ml] extra), else None.
+    Thread-safe via double-checked locking.
     """
     global _within_compressor, _within_compressor_checked
     if not _within_compressor_checked:
+        with _within_compressor_lock:
+            if not _within_compressor_checked:
+                try:
+                    from .kompress_compressor import KompressCompressor, is_kompress_available
+                    if is_kompress_available():
+                        _within_compressor = KompressCompressor()
+                        logger.debug("Within-item compression: using Kompress")
+                except ImportError:
+                    pass
+                _within_compressor_checked = True
     return _within_compressor
     # Get numeric values
     nums = []
     for v in values:
+        if isinstance(v, int | float) and not isinstance(v, bool):
             nums.append(v)
         elif isinstance(v, str):
             try:
     confidence = 0.0
     # Check for bounded range typical of scores
     min_val, max_val = stats.min_val, stats.max_val
     # Common score ranges: [0,1], [0,10], [0,100], [-1,1], [0,5]
     for item in items:
         if stats.name in item:
             val = item.get(stats.name)
+            if isinstance(val, int | float) and math.isfinite(val):
                 values_in_order.append(float(val))
     if len(values_in_order) >= 5:
         # Check for descending sort
                 value_canonical = "null"
             elif isinstance(value, bool):
                 value_canonical = "true" if value else "false"
+            elif isinstance(value, int | float):
                 value_canonical = str(value)
             elif isinstance(value, str):
                 value_canonical = value
+            elif isinstance(value, list | dict):
                 try:
                     value_canonical = json.dumps(value, sort_keys=True, default=str)
                 except (TypeError, ValueError):
         first_val = non_null_values[0]
         if isinstance(first_val, bool):
             field_type = "boolean"
+        elif isinstance(first_val, int | float):
             field_type = "numeric"
         elif isinstance(first_val, str):
             field_type = "string"
         # Numeric-specific analysis
         if field_type == "numeric":
             # Filter out NaN and Infinity which break statistics functions
+            nums = [v for v in non_null_values if isinstance(v, int | float) and math.isfinite(v)]
             if nums:
                 try:
                     stats.min_val = min(nums)
                     threshold = self.config.variance_threshold * std
                     for i, item in enumerate(items):
                         val = item.get(stats.name)
+                        if isinstance(val, int | float):
                             if abs(val - stats.mean_val) > threshold:
                                 anomaly_indices.add(i)
         if len(keep_indices) <= effective_max:
             return keep_indices
+        # Use provided field_semantics or fall back to thread-local (set by _crush_array)
         effective_field_semantics = field_semantics or getattr(
+            getattr(self, "_thread_local", None), "field_semantics", None
         )
         # Identify error items using KEYWORD detection (preservation guarantee)
                         threshold = self.config.variance_threshold * std
                         for i, item in enumerate(items):
                             val = item.get(field_name)
+                            if isinstance(val, int | float):
                                 if abs(val - stats.mean_val) > threshold:
                                     anomaly_indices.add(i)
         return result, was_modified, info
+    # Maximum recursion depth for nested JSON processing.
+    # Prevents RecursionError on adversarial/deeply-nested input.
+    _MAX_PROCESS_DEPTH = 50
     def _process_value(
         self,
         value: Any,
             Tuple of (processed_value, info_string, ccr_markers).
             ccr_markers is a list of (hash, original_count, compressed_count, summary) tuples.
         """
+        # Guard against deeply nested JSON causing RecursionError
+        if depth >= self._MAX_PROCESS_DEPTH:
+            return value, "", []
         info_parts = []
         ccr_markers: list[tuple] = []
             )
         # === TOIN Evolution: Extract field semantics for signal detection ===
+        # Store in thread-local storage for use in _prioritize_indices.
         # This enables learned signal detection without changing all method signatures
+        # while remaining thread-safe (no cross-thread contamination).
+        if not hasattr(self, "_thread_local"):
+            self._thread_local = threading.local()
+        self._thread_local.field_semantics = (
             toin_hint.field_semantics if toin_hint.field_semantics else None
         )
             )
             # Clean up temporary instance variable
+            if hasattr(self, "_thread_local"):
+                self._thread_local.field_semantics = None
             return result, strategy_info, ccr_hash, dropped_summary
         except Exception:
             # Clean up temporary instance variable
+            if hasattr(self, "_thread_local"):
+                self._thread_local.field_semantics = None
             # Re-raise any exceptions (removed finally block since we no longer mutate config)
             raise
             return items, "number:passthrough"
         # Filter out non-finite values for statistics
+        finite = [x for x in items if isinstance(x, int | float) and math.isfinite(x)]
         if not finite:
             return items, "number:no_finite"
         outlier_indices: set[int] = set()
         if std_val > 0:
             for i, val in enumerate(items):
+                if isinstance(val, int | float) and math.isfinite(val):
                     if abs(val - mean_val) > self.config.variance_threshold * std_val:
                         outlier_indices.add(i)
                 left = [
                     items[j]
                     for j in range(i - window, i)
+                    if isinstance(items[j], int | float) and math.isfinite(items[j])
                 ]
                 right = [
                     items[j]
                     for j in range(i, i + window)
+                    if isinstance(items[j], int | float) and math.isfinite(items[j])
                 ]
                 if left and right:
                     left_mean = statistics.mean(left)
                 if i not in keep_indices:
                     keep_indices.add(i)
+        # Build output: kept values only (schema-preserving — no generated text)
         kept_values = [items[i] for i in sorted(keep_indices)]
+        # Encode statistics into the strategy string (not the array itself)
+        strategy = (
+            f"number:adaptive({n}->{len(kept_values)}"
+            f",min={min(finite)},max={max(finite)}"
+            f",mean={mean_val:.4g},median={median_val:.4g}"
+            f",stddev={std_val:.4g},p25={p25:.4g},p75={p75:.4g}"
+        )
         if outlier_indices:
             strategy += f",outliers={len(outlier_indices)}"
+        if change_indices:
+            strategy += f",change_points={len(change_indices)}"
         strategy += ")"
+        return kept_values, strategy
     def _crush_mixed_array(
         self,
                 key = "str"
             elif isinstance(item, bool):
                 key = "bool"
+            elif isinstance(item, int | float):
                 key = "number"
             elif isinstance(item, list):
                 key = "list"
                 last_idx = set(indices[-k_last:])
                 keep_indices.update(first_idx | last_idx)
                 # Outliers
+                finite = [v for v in values if isinstance(v, int | float) and math.isfinite(v)]
                 if len(finite) > 1:
                     mean_v = statistics.mean(finite)
                     std_v = statistics.stdev(finite)
                     if std_v > 0:
                         for idx, val in group_items:
+                            if isinstance(val, int | float) and math.isfinite(val):
                                 if abs(val - mean_v) > self.config.variance_threshold * std_v:
                                     keep_indices.add(idx)
                 strategy_parts.append(f"num:{len(values)}")
                     threshold = self.config.variance_threshold * std
                     for i, item in enumerate(items):
                         val = item.get(name)
+                        if isinstance(val, int | float):
                             if abs(val - stats.mean_val) > threshold:
                                 keep_indices.add(i)

plugins/openclaw/package.json CHANGED Viewed

@@ -1,54 +1,54 @@
-{
-  "name": "headroom-openclaw",
-  "version": "0.1.0",
-  "description": "Headroom context compression plugin for OpenClaw — 70-90% token savings with zero LLM calls",
-  "type": "module",
-  "main": "./dist/index.js",
-  "types": "./dist/index.d.ts",
-  "files": [
-    "dist",
-    "hook-shim",
-    "openclaw.plugin.json",
-    "README.md"
-  ],
-  "scripts": {
-    "build": "tsup && node prepare-dist.mjs",
-    "test": "vitest run",
-    "test:watch": "vitest",
-    "typecheck": "tsc --noEmit"
-  },
-  "dependencies": {
-    "headroom-ai": "^0.1.0"
-  },
-  "peerDependencies": {
-    "openclaw": "*"
-  },
-  "peerDependenciesMeta": {
-    "openclaw": {
-      "optional": true
-    }
-  },
-  "devDependencies": {
-    "@types/node": "^22.10.0",
-    "tsup": "^8.0.0",
-    "typescript": "^5.5.0",
-    "vitest": "^2.0.0"
-  },
-  "openclaw": {
-    "hooks": [
-      "./hook-shim"
-    ],
-    "extensions": [
-      "./dist/index.js"
-    ],
-    "capabilities": {
-      "network": {
-        "allow": [
-          "http://*:*",
-          "https://*:*"
-        ]
-      }
-    }
-  },
-  "license": "Apache-2.0"
-}

+{
+  "name": "headroom-openclaw",
+  "version": "0.1.1",
+  "description": "Headroom context compression plugin for OpenClaw — 70-90% token savings with zero LLM calls",
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "files": [
+    "dist",
+    "hook-shim",
+    "openclaw.plugin.json",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup && node prepare-dist.mjs",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "typecheck": "tsc --noEmit"
+  },
+  "dependencies": {
+    "headroom-ai": "^0.1.0"
+  },
+  "peerDependencies": {
+    "openclaw": "*"
+  },
+  "peerDependenciesMeta": {
+    "openclaw": {
+      "optional": true
+    }
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.0",
+    "tsup": "^8.0.0",
+    "typescript": "^5.5.0",
+    "vitest": "^2.0.0"
+  },
+  "openclaw": {
+    "hooks": [
+      "./hook-shim"
+    ],
+    "extensions": [
+      "./dist/index.js"
+    ],
+    "capabilities": {
+      "network": {
+        "allow": [
+          "http://*:*",
+          "https://*:*"
+        ]
+      }
+    }
+  },
+  "license": "Apache-2.0"
+}

tests/test_cli/test_wrap_copilot.py CHANGED Viewed

@@ -21,6 +21,7 @@ def test_wrap_copilot_auto_anthropic_injects_instructions(
     runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
     monkeypatch.chdir(tmp_path)
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
@@ -51,7 +52,10 @@ def test_wrap_copilot_auto_anthropic_injects_instructions(
     assert captured["args"] == ("--model", "claude-sonnet-4-20250514")
-def test_wrap_copilot_openai_backend_sets_completions_env(runner: CliRunner) -> None:
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
@@ -90,7 +94,10 @@ def test_wrap_copilot_openai_backend_sets_completions_env(runner: CliRunner) ->
     assert captured["args"] == ("--model", "gpt-4o")
-def test_wrap_copilot_auto_detects_running_proxy_backend(runner: CliRunner) -> None:
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
@@ -153,7 +160,10 @@ def test_wrap_copilot_rejects_responses_for_translated_backends(runner: CliRunne
     assert "not supported with translated backends" in result.output
-def test_wrap_copilot_clears_stale_wire_api_in_anthropic_mode(runner: CliRunner) -> None:
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
@@ -164,7 +174,10 @@ def test_wrap_copilot_clears_stale_wire_api_in_anthropic_mode(runner: CliRunner)
             result = runner.invoke(
                 main,
                 ["wrap", "copilot", "--no-rtk", "--", "--model", "claude-sonnet-4-20250514"],
-                env={"COPILOT_PROVIDER_WIRE_API": "responses"},
             )
     assert result.exit_code == 0, result.output

     runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 ) -> None:
     monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
     assert captured["args"] == ("--model", "claude-sonnet-4-20250514")
+def test_wrap_copilot_openai_backend_sets_completions_env(
+    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
     assert captured["args"] == ("--model", "gpt-4o")
+def test_wrap_copilot_auto_detects_running_proxy_backend(
+    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
     assert "not supported with translated backends" in result.output
+def test_wrap_copilot_clears_stale_wire_api_in_anthropic_mode(
+    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
     captured: dict[str, object] = {}
     def fake_launch_tool(**kwargs):  # noqa: ANN003
             result = runner.invoke(
                 main,
                 ["wrap", "copilot", "--no-rtk", "--", "--model", "claude-sonnet-4-20250514"],
+                env={
+                    "COPILOT_PROVIDER_WIRE_API": "responses",
+                    "ANTHROPIC_API_KEY": "sk-test-dummy",
+                },
             )
     assert result.exit_code == 0, result.output

tests/test_learn/test_scanner.py CHANGED Viewed

@@ -103,6 +103,38 @@ class TestGreedyPathDecode:
         result = _greedy_path_decode(tmp_path, ["my", "cool", "project", "nosync", "headroom"])
         assert result == tmp_path / "my-cool-project.nosync" / "headroom"
     def test_nonexistent_path_returns_none(self, tmp_path: Path) -> None:
         result = _greedy_path_decode(tmp_path, ["does", "not", "exist"])
         assert result is None
@@ -236,6 +268,38 @@ class TestDecodeProjectPath:
         else:
             assert result is None or result == project
     def test_windows_drive_letter_pattern(self) -> None:
         """Encoded name -C-MQ2-macros should detect Windows drive letter."""
         import sys

         result = _greedy_path_decode(tmp_path, ["my", "cool", "project", "nosync", "headroom"])
         assert result == tmp_path / "my-cool-project.nosync" / "headroom"
+    # ---- Underscore tests (issue #159) ----
+    def test_single_underscore_in_dirname(self, tmp_path: Path) -> None:
+        """Directory name contains one literal underscore (e.g. my_project)."""
+        _make_dirs(tmp_path, "my_project")
+        result = _greedy_path_decode(tmp_path, ["my", "project"])
+        assert result == tmp_path / "my_project"
+    def test_multiple_underscores_in_dirname(self, tmp_path: Path) -> None:
+        """Directory name contains multiple underscores (e.g. my_cool_project)."""
+        _make_dirs(tmp_path, "my_cool_project")
+        result = _greedy_path_decode(tmp_path, ["my", "cool", "project"])
+        assert result == tmp_path / "my_cool_project"
+    def test_underscore_nested_path(self, tmp_path: Path) -> None:
+        """Nested path like org/my_project should decode correctly."""
+        _make_dirs(tmp_path, "org/my_project")
+        result = _greedy_path_decode(tmp_path, ["org", "my", "project"])
+        assert result == tmp_path / "org" / "my_project"
+    def test_mixed_underscore_and_hyphen_in_dirname(self, tmp_path: Path) -> None:
+        """Directory with both hyphens and underscores (e.g. my-cool_project)."""
+        _make_dirs(tmp_path, "my-cool_project")
+        result = _greedy_path_decode(tmp_path, ["my", "cool", "project"])
+        assert result == tmp_path / "my-cool_project"
+    def test_underscore_dir_containing_hyphen_subdir(self, tmp_path: Path) -> None:
+        """Path like my_app/sub-module — underscore parent + hyphen child."""
+        _make_dirs(tmp_path, "my_app/sub-module")
+        result = _greedy_path_decode(tmp_path, ["my", "app", "sub", "module"])
+        assert result == tmp_path / "my_app" / "sub-module"
     def test_nonexistent_path_returns_none(self, tmp_path: Path) -> None:
         result = _greedy_path_decode(tmp_path, ["does", "not", "exist"])
         assert result is None
         else:
             assert result is None or result == project
+    def test_underscore_dirname_via_greedy(self, users_tmp: Path) -> None:
+        """my_project — underscore in directory name (issue #159).
+        Claude Code encodes /Users/foo/org/my_project as
+        -Users-foo-org-my-project.  Simple replace gives
+        …/org/my/project which does not exist, so the greedy decoder
+        must reconstruct my_project from tokens ['my', 'project'].
+        """
+        project = users_tmp / "org" / "my_project"
+        project.mkdir(parents=True)
+        encoded = "-" + str(project)[1:].replace("/", "-")
+        result = _decode_project_path(encoded)
+        if str(users_tmp).startswith("/Users/"):
+            assert result == project
+        else:
+            assert result is None or result == project
+    def test_multi_underscore_dirname_via_greedy(self, users_tmp: Path) -> None:
+        """my_cool_project — multiple underscores (issue #159)."""
+        project = users_tmp / "my_cool_project"
+        project.mkdir(parents=True)
+        encoded = "-" + str(project)[1:].replace("/", "-")
+        result = _decode_project_path(encoded)
+        if str(users_tmp).startswith("/Users/"):
+            assert result == project
+        else:
+            assert result is None or result == project
     def test_windows_drive_letter_pattern(self) -> None:
         """Encoded name -C-MQ2-macros should detect Windows drive letter."""
         import sys

tests/test_memory_sync.py ADDED Viewed

	@@ -0,0 +1,647 @@

+"""Comprehensive tests for the universal memory sync engine.
+Tests cover:
+- Core sync: import, export, bidirectional
+- Idempotency and deduplication
+- Fast no-op detection
+- Lineage and governance metadata
+- Claude Code adapter: read/write frontmatter files
+- Codex adapter: read/write AGENTS.md sections
+- Cross-agent interop: save in one agent, find in another
+"""
+from __future__ import annotations
+import hashlib
+import json
+import time
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+import pytest
+from headroom.memory.sync import (
+    sync,
+    sync_export,
+    sync_import,
+)
+from headroom.memory.sync_adapters.claude_code import (
+    ClaudeCodeAdapter,
+    _parse_frontmatter,
+)
+from headroom.memory.sync_adapters.codex_agent import CodexAdapter
+# ---------------------------------------------------------------------------
+# Fake backend for testing (no real DB/embeddings needed)
+# ---------------------------------------------------------------------------
+@dataclass
+class FakeMemory:
+    id: str = ""
+    content: str = ""
+    user_id: str = ""
+    category: str = ""
+    importance: float = 0.5
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    metadata: dict[str, Any] = field(default_factory=dict)
+class FakeBackend:
+    """In-memory backend for testing sync without real DB."""
+    def __init__(self) -> None:
+        self._memories: list[FakeMemory] = []
+        self._next_id = 1
+    async def get_user_memories(self, user_id: str, limit: int = 500) -> list[FakeMemory]:
+        return [m for m in self._memories if m.user_id == user_id][:limit]
+    async def save_memory(
+        self,
+        content: str,
+        user_id: str,
+        importance: float = 0.5,
+        metadata: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> FakeMemory:
+        mem = FakeMemory(
+            id=f"mem_{self._next_id:04d}",
+            content=content,
+            user_id=user_id,
+            importance=importance,
+            metadata=metadata or {},
+        )
+        self._next_id += 1
+        self._memories.append(mem)
+        return mem
+    def add_memory(self, content: str, user_id: str = "tcms", **kwargs: Any) -> FakeMemory:
+        """Sync helper to pre-populate memories."""
+        mem = FakeMemory(
+            id=f"mem_{self._next_id:04d}",
+            content=content,
+            user_id=user_id,
+            metadata=kwargs.get("metadata", {}),
+            importance=kwargs.get("importance", 0.5),
+        )
+        self._next_id += 1
+        self._memories.append(mem)
+        return mem
+# ---------------------------------------------------------------------------
+# Core sync tests
+# ---------------------------------------------------------------------------
+class TestSyncImport:
+    """Test importing from agent files into DB."""
+    @pytest.fixture
+    def backend(self):
+        return FakeBackend()
+    @pytest.fixture
+    def claude_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+    def _write_claude_memory(
+        self, memory_dir: Path, name: str, content: str, **fm_fields: str
+    ) -> None:
+        slug = name.lower().replace(" ", "_")
+        fields = {"name": name, "description": content[:80], "type": "project", **fm_fields}
+        fm_lines = ["---"]
+        for k, v in fields.items():
+            fm_lines.append(f"{k}: {v}")
+        fm_lines.append("---")
+        (memory_dir / f"{slug}.md").write_text("\n".join(fm_lines) + f"\n\n{content}\n")
+    @pytest.mark.asyncio
+    async def test_import_claude_files_to_db(self, backend, claude_dir):
+        self._write_claude_memory(claude_dir, "Project codename", "The secret name is TC")
+        self._write_claude_memory(claude_dir, "Dark mode", "User prefers dark mode")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        imported = await sync_import(backend, adapter, "tcms")
+        assert imported == 2
+        mems = await backend.get_user_memories("tcms")
+        contents = {m.content for m in mems}
+        assert "The secret name is TC" in contents
+        assert "User prefers dark mode" in contents
+    @pytest.mark.asyncio
+    async def test_import_skips_existing(self, backend, claude_dir):
+        """Memories already in DB are not re-imported."""
+        backend.add_memory(
+            "The secret name is TC",
+            metadata={"content_hash": hashlib.sha256(b"The secret name is TC").hexdigest()[:16]},
+        )
+        self._write_claude_memory(claude_dir, "Project codename", "The secret name is TC")
+        self._write_claude_memory(claude_dir, "New fact", "Something new")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        imported = await sync_import(backend, adapter, "tcms")
+        assert imported == 1  # Only "Something new"
+    @pytest.mark.asyncio
+    async def test_import_preserves_lineage(self, backend, claude_dir):
+        self._write_claude_memory(claude_dir, "Fact", "Important fact")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        await sync_import(backend, adapter, "tcms")
+        mems = await backend.get_user_memories("tcms")
+        assert len(mems) == 1
+        assert mems[0].metadata["source_agent"] == "claude"
+        assert mems[0].metadata["source_file"] == "fact.md"
+        assert "content_hash" in mems[0].metadata
+        assert mems[0].metadata["sync_direction"] == "import"
+class TestSyncExport:
+    """Test exporting from DB to agent files."""
+    @pytest.fixture
+    def backend(self):
+        return FakeBackend()
+    @pytest.fixture
+    def claude_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+    @pytest.mark.asyncio
+    async def test_export_new_memory_to_claude_files(self, backend, claude_dir):
+        backend.add_memory(
+            "Project uses Python 3.12",
+            metadata={
+                "source_agent": "codex",
+                "sync_direction": "export",  # Not from claude import
+            },
+        )
+        adapter = ClaudeCodeAdapter(claude_dir)
+        exported = await sync_export(backend, adapter, "tcms")
+        assert exported == 1
+        # Check file was created
+        md_files = list(claude_dir.glob("headroom_*.md"))
+        assert len(md_files) == 1
+        content = md_files[0].read_text()
+        assert "Python 3.12" in content
+        assert "headroom_id: mem_0001" in content
+        assert "source_agent: codex" in content
+    @pytest.mark.asyncio
+    async def test_export_skips_claude_originated(self, backend, claude_dir):
+        """Don't re-export memories that were imported FROM claude (anti-echo)."""
+        backend.add_memory(
+            "From claude",
+            metadata={
+                "source_agent": "claude",
+                "sync_direction": "import",
+            },
+        )
+        backend.add_memory(
+            "From codex",
+            metadata={
+                "source_agent": "codex",
+            },
+        )
+        adapter = ClaudeCodeAdapter(claude_dir)
+        exported = await sync_export(backend, adapter, "tcms")
+        assert exported == 1  # Only "From codex"
+    @pytest.mark.asyncio
+    async def test_export_updates_memory_md_index(self, backend, claude_dir):
+        # Create an existing MEMORY.md
+        (claude_dir / "MEMORY.md").write_text("# Memory\n\n## User\n- Some existing entry\n")
+        backend.add_memory("New fact from codex", metadata={"source_agent": "codex"})
+        adapter = ClaudeCodeAdapter(claude_dir)
+        await sync_export(backend, adapter, "tcms")
+        memory_md = (claude_dir / "MEMORY.md").read_text()
+        assert "Headroom Shared Memory" in memory_md
+        assert "New fact from codex" in memory_md
+        assert "Some existing entry" in memory_md  # Preserved
+class TestBidirectionalSync:
+    """Test full bidirectional sync."""
+    @pytest.fixture
+    def backend(self):
+        return FakeBackend()
+    @pytest.fixture
+    def claude_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+    @pytest.fixture
+    def state_path(self, tmp_path):
+        return tmp_path / "sync_state.json"
+    def _write_claude_memory(self, memory_dir: Path, name: str, content: str) -> None:
+        slug = name.lower().replace(" ", "_")
+        fm = f"---\nname: {name}\ndescription: {content[:80]}\ntype: project\n---"
+        (memory_dir / f"{slug}.md").write_text(f"{fm}\n\n{content}\n")
+    @pytest.mark.asyncio
+    async def test_bidirectional_sync(self, backend, claude_dir, state_path):
+        # Claude has a memory file
+        self._write_claude_memory(claude_dir, "Convention", "Always use ruff for linting")
+        # DB has a memory from Codex
+        backend.add_memory("Secret name is TC", metadata={"source_agent": "codex"})
+        adapter = ClaudeCodeAdapter(claude_dir)
+        result = await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        assert result.imported == 1  # Claude file → DB
+        assert result.exported == 1  # Codex memory → Claude file
+        # Verify DB has both
+        mems = await backend.get_user_memories("tcms")
+        contents = {m.content for m in mems}
+        assert "Always use ruff for linting" in contents
+        assert "Secret name is TC" in contents
+        # Verify Claude dir has the exported file
+        all_files = list(claude_dir.glob("headroom_*.md"))
+        assert len(all_files) >= 1
+        exported_content = " ".join(f.read_text() for f in all_files)
+        assert "TC" in exported_content
+    @pytest.mark.asyncio
+    async def test_sync_idempotent(self, backend, claude_dir, state_path):
+        """Running sync twice produces no duplicates."""
+        self._write_claude_memory(claude_dir, "Fact", "Python 3.12 is required")
+        backend.add_memory("Port 8787 is default", metadata={"source_agent": "codex"})
+        adapter = ClaudeCodeAdapter(claude_dir)
+        r1 = await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        assert r1.imported == 1
+        assert r1.exported == 1
+        r2 = await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        assert r2.imported == 0  # Already imported
+        assert r2.exported == 0  # Already exported
+        # No duplicates in DB
+        mems = await backend.get_user_memories("tcms")
+        assert len(mems) == 2
+    @pytest.mark.asyncio
+    async def test_fast_noop_when_unchanged(self, backend, claude_dir, state_path):
+        """Second sync with no changes completes in < 10ms."""
+        self._write_claude_memory(claude_dir, "Fact", "Some fact")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        # First sync (populates state)
+        await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        # Second sync (should be fast no-op)
+        start = time.monotonic()
+        r = await sync(backend, adapter, "tcms", state_path=state_path)
+        elapsed = (time.monotonic() - start) * 1000
+        assert r.imported == 0
+        assert r.exported == 0
+        assert elapsed < 50  # Generous threshold for CI
+class TestLineageAndGovernance:
+    """Test metadata tracking for audit and lineage."""
+    @pytest.fixture
+    def backend(self):
+        return FakeBackend()
+    @pytest.fixture
+    def claude_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+    @pytest.mark.asyncio
+    async def test_lineage_tracks_source_agent(self, backend, claude_dir):
+        fm = "---\nname: test\ndescription: test\ntype: project\n---"
+        (claude_dir / "test.md").write_text(f"{fm}\n\nClaude discovered this\n")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        await sync_import(backend, adapter, "tcms")
+        mems = await backend.get_user_memories("tcms")
+        assert mems[0].metadata["source_agent"] == "claude"
+    @pytest.mark.asyncio
+    async def test_exported_files_have_headroom_id(self, backend, claude_dir):
+        backend.add_memory("From codex", metadata={"source_agent": "codex"})
+        adapter = ClaudeCodeAdapter(claude_dir)
+        await sync_export(backend, adapter, "tcms")
+        md_files = list(claude_dir.glob("headroom_*.md"))
+        assert len(md_files) == 1
+        content = md_files[0].read_text()
+        assert "headroom_id:" in content
+    @pytest.mark.asyncio
+    async def test_sync_state_records_timestamps(self, backend, claude_dir, tmp_path):
+        state_path = tmp_path / "state.json"
+        fm = "---\nname: t\ndescription: t\ntype: project\n---"
+        (claude_dir / "t.md").write_text(f"{fm}\n\nFact\n")
+        adapter = ClaudeCodeAdapter(claude_dir)
+        await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        state = json.loads(state_path.read_text())
+        key = "claude:tcms"
+        assert key in state
+        assert "last_sync" in state[key]
+        assert "agent_fingerprint" in state[key]
+        assert "db_fingerprint" in state[key]
+# ---------------------------------------------------------------------------
+# Claude Code adapter tests
+# ---------------------------------------------------------------------------
+class TestClaudeCodeAdapter:
+    """Test Claude Code adapter read/write."""
+    @pytest.fixture
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+    def test_parse_frontmatter(self):
+        content = "---\nname: Test\ntype: project\n---\n\nBody content here."
+        fm, body = _parse_frontmatter(content)
+        assert fm["name"] == "Test"
+        assert fm["type"] == "project"
+        assert body == "Body content here."
+    def test_parse_frontmatter_no_frontmatter(self):
+        content = "Just plain content."
+        fm, body = _parse_frontmatter(content)
+        assert fm == {}
+        assert body == "Just plain content."
+    @pytest.mark.asyncio
+    async def test_read_memories_skips_memory_md(self, memory_dir):
+        (memory_dir / "MEMORY.md").write_text("# Index\n- entry")
+        (memory_dir / "fact.md").write_text(
+            "---\nname: Fact\ntype: project\n---\n\nImportant fact."
+        )
+        adapter = ClaudeCodeAdapter(memory_dir)
+        mems = await adapter.read_memories()
+        assert len(mems) == 1
+        assert mems[0].content == "Important fact."
+        assert mems[0].source_file == "fact.md"
+    @pytest.mark.asyncio
+    async def test_write_creates_valid_md(self, memory_dir):
+        adapter = ClaudeCodeAdapter(memory_dir)
+        written = await adapter.write_memories(
+            [
+                {
+                    "content": "Project uses FastAPI",
+                    "category": "architecture",
+                    "headroom_id": "mem_001",
+                    "source_agent": "codex",
+                    "content_hash": "abc123",
+                }
+            ]
+        )
+        assert written == 1
+        files = list(memory_dir.glob("headroom_*.md"))
+        assert len(files) == 1
+        content = files[0].read_text()
+        fm, body = _parse_frontmatter(content)
+        assert fm["type"] == "architecture"
+        assert fm["headroom_id"] == "mem_001"
+        assert fm["source_agent"] == "codex"
+        assert "FastAPI" in body
+    def test_fingerprint_changes_on_modification(self, memory_dir):
+        (memory_dir / "test.md").write_text("content 1")
+        adapter = ClaudeCodeAdapter(memory_dir)
+        fp1 = adapter.fingerprint()
+        (memory_dir / "test.md").write_text("content 2")
+        fp2 = adapter.fingerprint()
+        assert fp1 != fp2
+    def test_fingerprint_stable_when_unchanged(self, memory_dir):
+        (memory_dir / "test.md").write_text("stable content")
+        adapter = ClaudeCodeAdapter(memory_dir)
+        assert adapter.fingerprint() == adapter.fingerprint()
+    def test_fingerprint_empty_dir(self, tmp_path):
+        empty = tmp_path / "empty"
+        empty.mkdir()
+        adapter = ClaudeCodeAdapter(empty)
+        assert adapter.fingerprint() == "empty"
+# ---------------------------------------------------------------------------
+# Codex adapter tests
+# ---------------------------------------------------------------------------
+class TestCodexAdapter:
+    """Test Codex AGENTS.md adapter."""
+    @pytest.fixture
+    def agents_md(self, tmp_path):
+        return tmp_path / "AGENTS.md"
+    @pytest.mark.asyncio
+    async def test_read_from_agents_md(self, agents_md):
+        agents_md.write_text(
+            "# Instructions\n\n"
+            "<!-- headroom:memory:start -->\n"
+            "## Headroom Shared Memory\n\n"
+            "- Secret name is TC\n"
+            "- Uses Python 3.12\n"
+            "<!-- headroom:memory:end -->\n"
+        )
+        adapter = CodexAdapter(agents_md)
+        mems = await adapter.read_memories()
+        assert len(mems) == 2
+        assert mems[0].content == "Secret name is TC"
+        assert mems[1].content == "Uses Python 3.12"
+    @pytest.mark.asyncio
+    async def test_write_to_agents_md(self, agents_md):
+        agents_md.write_text("# Existing instructions\n")
+        adapter = CodexAdapter(agents_md)
+        written = await adapter.write_memories(
+            [
+                {"content": "Port 8787 is default"},
+                {"content": "Uses ruff for linting"},
+            ]
+        )
+        assert written == 2
+        content = agents_md.read_text()
+        assert "headroom:memory:start" in content
+        assert "Port 8787 is default" in content
+        assert "Uses ruff for linting" in content
+        assert "Existing instructions" in content  # Preserved
+    @pytest.mark.asyncio
+    async def test_write_replaces_existing_section(self, agents_md):
+        agents_md.write_text(
+            "# Instructions\n\n"
+            "<!-- headroom:memory:start -->\n"
+            "## Old\n- old fact\n"
+            "<!-- headroom:memory:end -->\n"
+        )
+        adapter = CodexAdapter(agents_md)
+        await adapter.write_memories([{"content": "new fact"}])
+        content = agents_md.read_text()
+        assert "new fact" in content
+        assert "old fact" not in content
+    @pytest.mark.asyncio
+    async def test_read_empty_agents_md(self, agents_md):
+        agents_md.write_text("# No memory section\n")
+        adapter = CodexAdapter(agents_md)
+        mems = await adapter.read_memories()
+        assert mems == []
+    @pytest.mark.asyncio
+    async def test_read_nonexistent_file(self, tmp_path):
+        adapter = CodexAdapter(tmp_path / "nonexistent.md")
+        mems = await adapter.read_memories()
+        assert mems == []
+# ---------------------------------------------------------------------------
+# Cross-agent integration tests
+# ---------------------------------------------------------------------------
+class TestCrossAgentInterop:
+    """Test that memories flow between agents via sync."""
+    @pytest.fixture
+    def backend(self):
+        return FakeBackend()
+    @pytest.fixture
+    def claude_dir(self, tmp_path):
+        d = tmp_path / "claude_memory"
+        d.mkdir()
+        return d
+    @pytest.fixture
+    def agents_md(self, tmp_path):
+        return tmp_path / "AGENTS.md"
+    @pytest.fixture
+    def state_path(self, tmp_path):
+        return tmp_path / "state.json"
+    @pytest.mark.asyncio
+    async def test_codex_saves_claude_finds(self, backend, claude_dir, state_path):
+        """Memory saved via Codex MCP appears in Claude's files after sync."""
+        # Simulate Codex saving via MCP (directly to backend)
+        backend.add_memory(
+            "Secret name is TC",
+            metadata={"source_agent": "codex", "content_hash": "x"},
+        )
+        # Sync to Claude
+        adapter = ClaudeCodeAdapter(claude_dir)
+        result = await sync(backend, adapter, "tcms", state_path=state_path, force=True)
+        assert result.exported == 1
+        # Claude's memory dir should have the file
+        files = list(claude_dir.glob("headroom_*.md"))
+        assert len(files) == 1
+        assert "TC" in files[0].read_text()
+    @pytest.mark.asyncio
+    async def test_claude_saves_codex_finds(self, backend, claude_dir, agents_md, state_path):
+        """Memory saved in Claude's files appears in Codex AGENTS.md after sync."""
+        # Claude has a memory
+        fm = "---\nname: Linting\ndescription: use ruff\ntype: project\n---"
+        (claude_dir / "linting.md").write_text(f"{fm}\n\nAlways use ruff for linting\n")
+        # Sync Claude → DB
+        claude_adapter = ClaudeCodeAdapter(claude_dir)
+        await sync(backend, claude_adapter, "tcms", state_path=state_path, force=True)
+        # Sync DB → Codex AGENTS.md
+        codex_adapter = CodexAdapter(agents_md)
+        result = await sync(backend, codex_adapter, "tcms", state_path=state_path, force=True)
+        assert result.exported >= 1
+        assert "ruff" in agents_md.read_text()
+    @pytest.mark.asyncio
+    async def test_full_round_trip(self, backend, claude_dir, agents_md, state_path):
+        """Full round trip: Claude → DB → Codex, Codex → DB → Claude."""
+        # Claude has a memory
+        fm = "---\nname: Framework\ntype: project\n---"
+        (claude_dir / "framework.md").write_text(f"{fm}\n\nUses FastAPI\n")
+        # Codex has a memory (in DB via MCP)
+        backend.add_memory("Port is 8787", metadata={"source_agent": "codex"})
+        # Sync both adapters
+        claude_adapter = ClaudeCodeAdapter(claude_dir)
+        codex_adapter = CodexAdapter(agents_md)
+        await sync(backend, claude_adapter, "tcms", state_path=state_path, force=True)
+        await sync(backend, codex_adapter, "tcms", state_path=state_path, force=True)
+        # DB has both memories
+        mems = await backend.get_user_memories("tcms")
+        contents = {m.content for m in mems}
+        assert "Uses FastAPI" in contents
+        assert "Port is 8787" in contents
+        # Claude files have Codex's memory
+        all_claude = " ".join(f.read_text() for f in claude_dir.glob("headroom_*.md"))
+        assert "8787" in all_claude
+        # AGENTS.md has both (from DB)
+        agents_content = agents_md.read_text()
+        assert "FastAPI" in agents_content or "8787" in agents_content

tests/test_package_init_lazy.py CHANGED Viewed

@@ -33,7 +33,8 @@ def test_headroom_import_stays_lazy() -> None:
     )
     data = json.loads(result.stdout.strip())
-    assert data["version"] == "0.5.21"
     assert data["cache_loaded"] is False
     assert data["models_registry_loaded"] is False
     assert data["memory_loaded"] is False

     )
     data = json.loads(result.stdout.strip())
+    # Version is a non-empty string; don't hardcode a specific value.
+    assert isinstance(data["version"], str) and data["version"]
     assert data["cache_loaded"] is False
     assert data["models_registry_loaded"] is False
     assert data["memory_loaded"] is False

tests/test_transforms/test_kompress_compressor.py CHANGED Viewed

@@ -352,9 +352,8 @@ class TestUnloadKompressModel:
         import headroom.transforms.kompress_compressor as kmod
         from headroom.transforms.kompress_compressor import unload_kompress_model
-        # Ensure no model is loaded (previous tests may have set the global)
-        kmod._kompress_model = None
-        kmod._kompress_tokenizer = None
         # Should return False when no model is loaded
         assert unload_kompress_model() is False

         import headroom.transforms.kompress_compressor as kmod
         from headroom.transforms.kompress_compressor import unload_kompress_model
+        # Ensure no model is loaded (previous tests may have set the cache)
+        kmod._kompress_cache.clear()
         # Should return False when no model is loaded
         assert unload_kompress_model() is False

tests/test_transforms/test_smart_crusher_bugs.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""Regression tests for SmartCrusher bugs.
+Bug 1: _crush_number_array mixes types (string summary + numbers),
+       violating the schema-preserving guarantee.
+Bug 2: _current_field_semantics is shared instance state, creating
+       a race condition when crushing concurrently.
+"""
+from __future__ import annotations
+import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from headroom import SmartCrusherConfig
+from headroom.transforms.smart_crusher import SmartCrusher
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+def _make_crusher(max_items: int = 10, min_items: int = 3) -> SmartCrusher:
+    config = SmartCrusherConfig(
+        enabled=True,
+        min_items_to_analyze=min_items,
+        min_tokens_to_crush=0,
+        max_items_after_crush=max_items,
+        variance_threshold=2.0,
+    )
+    return SmartCrusher(config=config)
+# ---------------------------------------------------------------------------
+# Bug 1: Number array type mixing
+# ---------------------------------------------------------------------------
+class TestNumberArraySchemaPreservation:
+    """_crush_number_array must return only original numeric values.
+    Previously it prepended a stats summary string, producing
+    [string, int, int, ...] which violates the schema-preserving
+    guarantee and breaks type-aware JSON consumers.
+    """
+    def test_crushed_number_array_contains_only_numbers(self) -> None:
+        """Every element of the crushed array must be int or float."""
+        crusher = _make_crusher(max_items=10)
+        numbers = list(range(50))  # 0..49, well above the n<=8 passthrough
+        crushed, strategy = crusher._crush_number_array(numbers)
+        for i, item in enumerate(crushed):
+            assert isinstance(item, int | float), (
+                f"Item {i} is {type(item).__name__} = {item!r}, expected int/float. "
+                f"Schema-preserving guarantee violated."
+            )
+    def test_crushed_number_array_subset_of_original(self) -> None:
+        """Every value in the crushed array must exist in the original."""
+        crusher = _make_crusher(max_items=10)
+        numbers = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
+        crushed, _ = crusher._crush_number_array(numbers)
+        original_set = set(numbers)
+        for item in crushed:
+            assert item in original_set, (
+                f"Value {item!r} not in original array — generated content detected"
+            )
+    def test_stats_summary_in_strategy_not_in_array(self) -> None:
+        """Statistics should be communicated via strategy string, not array content."""
+        crusher = _make_crusher(max_items=5)
+        numbers = list(range(100))
+        crushed, strategy = crusher._crush_number_array(numbers)
+        # Strategy should contain stats info
+        assert "number:" in strategy
+        # Array should not contain any strings
+        strings_in_result = [x for x in crushed if isinstance(x, str)]
+        assert strings_in_result == [], f"Found string(s) in numeric array: {strings_in_result}"
+    def test_number_array_passthrough_for_small(self) -> None:
+        """Arrays with n <= 8 should pass through unchanged."""
+        crusher = _make_crusher()
+        small = [1, 2, 3, 4, 5]
+        crushed, strategy = crusher._crush_number_array(small)
+        assert crushed == small
+        assert strategy == "number:passthrough"
+    def test_number_array_preserves_outliers(self) -> None:
+        """Outlier values should be preserved in the crushed output."""
+        crusher = _make_crusher(max_items=10)
+        # Normal range + extreme outlier
+        numbers = [10] * 20 + [10000]
+        crushed, strategy = crusher._crush_number_array(numbers)
+        assert 10000 in crushed, "Outlier value 10000 was dropped"
+    def test_number_array_preserves_boundaries(self) -> None:
+        """First and last values should always be kept."""
+        crusher = _make_crusher(max_items=5)
+        numbers = list(range(100))
+        crushed, strategy = crusher._crush_number_array(numbers)
+        assert crushed[0] == 0, "First value not preserved"
+        assert numbers[-1] in crushed, "Last value not preserved"
+    def test_non_finite_passthrough(self) -> None:
+        """All-NaN/Inf arrays should return unchanged."""
+        crusher = _make_crusher()
+        nans = [float("nan")] * 10
+        crushed, strategy = crusher._crush_number_array(nans)
+        assert strategy == "number:no_finite"
+        assert len(crushed) == 10
+    def test_full_crush_pipeline_number_array_types(self) -> None:
+        """End-to-end: crushing a JSON number array via the public API."""
+        crusher = _make_crusher(max_items=10)
+        content = json.dumps(list(range(50)))
+        result, was_modified, info = crusher._smart_crush_content(content)
+        if was_modified:
+            parsed = json.loads(result)
+            assert isinstance(parsed, list)
+            for item in parsed:
+                assert isinstance(item, int | float), (
+                    f"Public API returned non-numeric item {item!r} in number array"
+                )
+# ---------------------------------------------------------------------------
+# Bug 2: Race condition on _current_field_semantics
+# ---------------------------------------------------------------------------
+class TestFieldSemanticsThreadSafety:
+    """_current_field_semantics must not leak between concurrent crushes.
+    Previously it was stored as instance state (self._current_field_semantics)
+    which created a race condition when the same SmartCrusher instance
+    was used from multiple threads.
+    """
+    def test_concurrent_crushes_no_cross_contamination(self) -> None:
+        """Two concurrent crushes must not share field_semantics state."""
+        crusher = _make_crusher(max_items=5)
+        # Two different array payloads
+        payload_a = json.dumps([{"name": f"item_{i}", "value": i} for i in range(20)])
+        payload_b = json.dumps([{"key": f"k_{i}", "score": i * 0.1} for i in range(20)])
+        results: dict[str, str] = {}
+        errors: list[Exception] = []
+        def crush_task(label: str, content: str) -> None:
+            try:
+                result, modified, info = crusher._smart_crush_content(content)
+                results[label] = result
+            except Exception as e:
+                errors.append(e)
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = []
+            # Run many concurrent crushes to increase race probability
+            for i in range(20):
+                futures.append(executor.submit(crush_task, f"a_{i}", payload_a))
+                futures.append(executor.submit(crush_task, f"b_{i}", payload_b))
+            for f in as_completed(futures):
+                f.result()  # Re-raise exceptions
+        assert not errors, f"Concurrent crushes raised errors: {errors}"
+        # After all crushes, thread-local state must be clean
+        tl = getattr(crusher, "_thread_local", None)
+        if tl is not None:
+            semantics = getattr(tl, "field_semantics", None)
+            assert semantics is None, f"field_semantics leaked in thread-local: {semantics}"
+# ---------------------------------------------------------------------------
+# Issue 7: Recursion depth limit
+# ---------------------------------------------------------------------------
+class TestRecursionDepthLimit:
+    """_process_value must not crash on deeply nested JSON."""
+    def test_deeply_nested_json_does_not_crash(self) -> None:
+        """Nesting deeper than _MAX_PROCESS_DEPTH should return value unchanged."""
+        crusher = _make_crusher()
+        # Build a 100-level nested structure
+        nested: dict = {"leaf": "value"}
+        for _i in range(100):
+            nested = {"level": nested}
+        content = json.dumps(nested)
+        result, was_modified, info = crusher._smart_crush_content(content)
+        # Should not raise RecursionError
+        parsed = json.loads(result)
+        # The deep structure should be preserved (returned as-is past depth limit)
+        assert isinstance(parsed, dict)
+    def test_deeply_nested_list_does_not_crash(self) -> None:
+        """Deeply nested lists should also be handled safely."""
+        crusher = _make_crusher()
+        nested: list = ["leaf"]
+        for _i in range(100):
+            nested = [nested]
+        content = json.dumps(nested)
+        result, was_modified, info = crusher._smart_crush_content(content)
+        parsed = json.loads(result)
+        assert isinstance(parsed, list)

tests/test_transforms/test_universal_json_crush.py CHANGED Viewed

@@ -172,14 +172,15 @@ class TestCrushNumberArray:
         assert len(crushed) < len(numbers)
         assert "number:adaptive" in strategy
-    def test_summary_prepended(self, crusher):
         numbers = list(range(100))
         crushed, strategy = crusher._crush_number_array(numbers)
-        # First element should be the stats summary string
-        assert isinstance(crushed[0], str)
-        assert "numbers:" in crushed[0]
-        assert "min=" in crushed[0]
-        assert "max=" in crushed[0]
     def test_outliers_preserved(self, crusher):
         # Normal values around 50 with one extreme outlier
@@ -193,13 +194,13 @@ class TestCrushNumberArray:
         crushed, strategy = crusher._crush_number_array(numbers)
         # With all identical, should compress heavily
         # Summary + a few representatives
-        numeric_values = [v for v in crushed if isinstance(v, (int, float))]
         assert all(v == 42.0 for v in numeric_values)
     def test_first_last_kept(self, crusher):
         numbers = list(range(50))
         crushed, strategy = crusher._crush_number_array(numbers)
-        numeric_values = [v for v in crushed if isinstance(v, (int, float))]
         assert 0 in numeric_values  # First
         assert 49 in numeric_values  # Last
@@ -207,7 +208,7 @@ class TestCrushNumberArray:
         # Stable at 10, then jumps to 100
         numbers = [10.0] * 50 + [100.0] * 50
         crushed, strategy = crusher_large_k._crush_number_array(numbers)
-        numeric_values = [v for v in crushed if isinstance(v, (int, float))]
         # Both 10.0 and 100.0 should be present
         assert 10.0 in numeric_values
         assert 100.0 in numeric_values
@@ -215,23 +216,24 @@ class TestCrushNumberArray:
     def test_nan_inf_filtered(self, crusher):
         numbers = [1.0, 2.0, float("nan"), float("inf"), 3.0] * 10
         crushed, strategy = crusher._crush_number_array(numbers)
-        # Should not crash; stats should be based on finite values
-        assert isinstance(crushed[0], str)
     def test_integers_preserved_as_int(self, crusher):
         numbers = list(range(50))
         crushed, strategy = crusher._crush_number_array(numbers)
-        numeric_values = [v for v in crushed if isinstance(v, (int, float))]
         # Integers should remain integers (not converted to float)
         assert any(isinstance(v, int) for v in numeric_values)
     def test_statistics_accuracy(self, crusher):
         numbers = list(range(1, 101))  # 1 to 100
         crushed, strategy = crusher._crush_number_array(numbers)
-        summary = crushed[0]
-        assert "min=1" in summary
-        assert "max=100" in summary
-        assert "mean=50.5" in summary
 # =====================================================================
@@ -382,7 +384,7 @@ class TestSafetyGuarantees:
             assert items[-1] in crushed
         else:
             crushed, _ = crusher._crush_number_array(items)
-            numeric = [v for v in crushed if isinstance(v, (int, float))]
             assert items[0] in numeric
             assert items[-1] in numeric
@@ -399,7 +401,7 @@ class TestSafetyGuarantees:
         """Arrays below min_items_to_analyze pass through unchanged."""
         if all(isinstance(i, str) for i in items):
             crushed, strategy = crusher._crush_string_array(items)
-        elif all(isinstance(i, (int, float)) for i in items):
             crushed, strategy = crusher._crush_number_array(items)
         else:
             crushed, strategy = crusher._crush_mixed_array(items)

         assert len(crushed) < len(numbers)
         assert "number:adaptive" in strategy
+    def test_stats_in_strategy_not_array(self, crusher):
         numbers = list(range(100))
         crushed, strategy = crusher._crush_number_array(numbers)
+        # Stats should be in the strategy string, not in the array
+        assert "min=" in strategy
+        assert "max=" in strategy
+        # Array should contain only numbers (schema-preserving)
+        for item in crushed:
+            assert isinstance(item, int | float)
     def test_outliers_preserved(self, crusher):
         # Normal values around 50 with one extreme outlier
         crushed, strategy = crusher._crush_number_array(numbers)
         # With all identical, should compress heavily
         # Summary + a few representatives
+        numeric_values = [v for v in crushed if isinstance(v, int | float)]
         assert all(v == 42.0 for v in numeric_values)
     def test_first_last_kept(self, crusher):
         numbers = list(range(50))
         crushed, strategy = crusher._crush_number_array(numbers)
+        numeric_values = [v for v in crushed if isinstance(v, int | float)]
         assert 0 in numeric_values  # First
         assert 49 in numeric_values  # Last
         # Stable at 10, then jumps to 100
         numbers = [10.0] * 50 + [100.0] * 50
         crushed, strategy = crusher_large_k._crush_number_array(numbers)
+        numeric_values = [v for v in crushed if isinstance(v, int | float)]
         # Both 10.0 and 100.0 should be present
         assert 10.0 in numeric_values
         assert 100.0 in numeric_values
     def test_nan_inf_filtered(self, crusher):
         numbers = [1.0, 2.0, float("nan"), float("inf"), 3.0] * 10
         crushed, strategy = crusher._crush_number_array(numbers)
+        # Should not crash; stats in strategy based on finite values
+        assert "min=" in strategy
+        assert "max=" in strategy
     def test_integers_preserved_as_int(self, crusher):
         numbers = list(range(50))
         crushed, strategy = crusher._crush_number_array(numbers)
+        numeric_values = [v for v in crushed if isinstance(v, int | float)]
         # Integers should remain integers (not converted to float)
         assert any(isinstance(v, int) for v in numeric_values)
     def test_statistics_accuracy(self, crusher):
         numbers = list(range(1, 101))  # 1 to 100
         crushed, strategy = crusher._crush_number_array(numbers)
+        # Stats are in the strategy string
+        assert "min=1" in strategy
+        assert "max=100" in strategy
+        assert "mean=50.5" in strategy
 # =====================================================================
             assert items[-1] in crushed
         else:
             crushed, _ = crusher._crush_number_array(items)
+            numeric = [v for v in crushed if isinstance(v, int | float)]
             assert items[0] in numeric
             assert items[-1] in numeric
         """Arrays below min_items_to_analyze pass through unchanged."""
         if all(isinstance(i, str) for i in items):
             crushed, strategy = crusher._crush_string_array(items)
+        elif all(isinstance(i, int | float) for i in items):
             crushed, strategy = crusher._crush_number_array(items)
         else:
             crushed, strategy = crusher._crush_mixed_array(items)

tests/test_ws_memory_relay.py ADDED Viewed

	@@ -0,0 +1,523 @@

+"""Tests for WebSocket memory tool interception in the Codex Responses API relay.
+Verifies that:
+1. Memory tool events are suppressed from reaching Codex
+2. response.created is buffered and only flushed for non-memory responses
+3. Tool execution happens and continuation is sent upstream
+4. Non-memory responses pass through with normal streaming latency
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from typing import Any
+from headroom.proxy.memory_handler import MEMORY_TOOL_NAMES
+# ---------------------------------------------------------------------------
+# Minimal WS relay state machine (mirrors the logic in openai.py)
+# ---------------------------------------------------------------------------
+@dataclass
+class WSMemoryRelayState:
+    """State machine for WS event processing with memory tool interception.
+    This mirrors the logic in ``_upstream_to_client`` but is decoupled from
+    actual WebSocket I/O so it can be unit-tested.
+    """
+    memory_tool_names: set[str] = field(default_factory=lambda: set(MEMORY_TOOL_NAMES))
+    # Per-response state (reset after each response.completed)
+    event_buffer: list[str] = field(default_factory=list)
+    decided: bool = False
+    suppress_response: bool = False
+    pending_function_calls: list[dict[str, Any]] = field(default_factory=list)
+    last_response_id: str | None = None
+    def process_event(self, msg_str: str) -> dict[str, Any]:
+        """Process a single upstream WS event.
+        Returns a dict with possible keys:
+            relay: list[str]        — events to send to Codex
+            execute_tools: list     — function_call items to execute
+            send_continuation: dict — continuation payload to send upstream
+        """
+        result: dict[str, Any] = {"relay": [], "execute_tools": [], "send_continuation": None}
+        try:
+            event = json.loads(msg_str)
+        except (json.JSONDecodeError, TypeError):
+            # Not JSON — always relay
+            result["relay"].append(msg_str)
+            return result
+        event_type = event.get("type", "")
+        # ---- Phase 1: Buffering (before first output item) ----
+        if not self.decided:
+            self.event_buffer.append(msg_str)
+            if event_type == "response.output_item.added":
+                item = event.get("item", {})
+                if (
+                    item.get("type") == "function_call"
+                    and item.get("name") in self.memory_tool_names
+                ):
+                    # Memory tool is first output → suppress entire response
+                    self.suppress_response = True
+                    self.decided = True
+                    self.event_buffer.clear()
+                else:
+                    # Non-memory item → flush buffer and pass through
+                    self.decided = True
+                    result["relay"].extend(self.event_buffer)
+                    self.event_buffer.clear()
+            elif event_type == "response.completed":
+                # Response completed with no output items — flush all
+                self.decided = True
+                result["relay"].extend(self.event_buffer)
+                self.event_buffer.clear()
+            return result
+        # ---- Phase 2a: Suppress mode (memory tool response) ----
+        if self.suppress_response:
+            # Capture completed function_call items
+            if event_type == "response.output_item.done":
+                item = event.get("item", {})
+                if (
+                    item.get("type") == "function_call"
+                    and item.get("name") in self.memory_tool_names
+                ):
+                    self.pending_function_calls.append(item)
+            if event_type == "response.completed":
+                resp = event.get("response", {})
+                self.last_response_id = resp.get("id")
+                if self.pending_function_calls:
+                    result["execute_tools"] = list(self.pending_function_calls)
+                    # Build continuation payload
+                    # (actual tool execution + output building done by caller)
+                    result["send_continuation"] = {
+                        "response_id": self.last_response_id,
+                        "function_calls": list(self.pending_function_calls),
+                    }
+                # Reset for next response (continuation)
+                self._reset_response_state()
+            return result  # Nothing relayed in suppress mode
+        # ---- Phase 2b: Pass-through mode (normal response) ----
+        result["relay"].append(msg_str)
+        return result
+    def _reset_response_state(self) -> None:
+        """Reset per-response state for the next response."""
+        self.event_buffer.clear()
+        self.decided = False
+        self.suppress_response = False
+        self.pending_function_calls.clear()
+        self.last_response_id = None
+# ---------------------------------------------------------------------------
+# Test helpers
+# ---------------------------------------------------------------------------
+def _make_event(event_type: str, **kwargs: Any) -> str:
+    data: dict[str, Any] = {"type": event_type}
+    data.update(kwargs)
+    return json.dumps(data)
+def _response_created(response_id: str = "resp_A") -> str:
+    return _make_event("response.created", response={"id": response_id})
+def _output_item_added_text(index: int = 0) -> str:
+    return _make_event(
+        "response.output_item.added",
+        output_index=index,
+        item={"type": "message", "role": "assistant"},
+    )
+def _output_item_added_function_call(name: str, index: int = 0, call_id: str = "call_1") -> str:
+    return _make_event(
+        "response.output_item.added",
+        output_index=index,
+        item={"type": "function_call", "name": name, "call_id": call_id},
+    )
+def _function_call_args_delta(index: int = 0, delta: str = '{"qu') -> str:
+    return _make_event(
+        "response.function_call_arguments.delta",
+        output_index=index,
+        delta=delta,
+    )
+def _function_call_args_done(index: int = 0, arguments: str = '{"query": "codename"}') -> str:
+    return _make_event(
+        "response.function_call_arguments.done",
+        output_index=index,
+        arguments=arguments,
+    )
+def _output_item_done_function_call(
+    name: str,
+    index: int = 0,
+    call_id: str = "call_1",
+    arguments: str = '{"query": "codename"}',
+) -> str:
+    return _make_event(
+        "response.output_item.done",
+        output_index=index,
+        item={
+            "type": "function_call",
+            "name": name,
+            "call_id": call_id,
+            "arguments": arguments,
+        },
+    )
+def _output_text_delta(index: int = 0, text: str = "Hello") -> str:
+    return _make_event(
+        "response.output_text.delta",
+        output_index=index,
+        delta=text,
+    )
+def _output_item_done_text(index: int = 0) -> str:
+    return _make_event(
+        "response.output_item.done",
+        output_index=index,
+        item={"type": "message", "role": "assistant"},
+    )
+def _response_completed(response_id: str = "resp_A") -> str:
+    return _make_event(
+        "response.completed",
+        response={"id": response_id, "status": "completed"},
+    )
+def _output_item_added_shell(index: int = 0) -> str:
+    """Simulate a Codex built-in tool (shell) that should pass through."""
+    return _make_event(
+        "response.output_item.added",
+        output_index=index,
+        item={"type": "function_call", "name": "shell", "call_id": "call_shell"},
+    )
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+class TestWSMemoryRelayNonMemory:
+    """Responses with no memory tools pass through normally."""
+    def test_text_response_relayed_immediately(self):
+        """Text-only response: all events relayed, no buffering after first item."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created(),
+            _output_item_added_text(),
+            _output_text_delta(text="The answer is 42"),
+            _output_item_done_text(),
+            _response_completed(),
+        ]
+        all_relayed: list[str] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            assert result["execute_tools"] == []
+            assert result["send_continuation"] is None
+        # All 5 events should be relayed
+        assert len(all_relayed) == 5
+        # First event (response.created) should be buffered then flushed
+        # with the second event (output_item_added_text)
+        types = [json.loads(e)["type"] for e in all_relayed]
+        assert types == [
+            "response.created",
+            "response.output_item.added",
+            "response.output_text.delta",
+            "response.output_item.done",
+            "response.completed",
+        ]
+    def test_shell_tool_relayed(self):
+        """Codex built-in tools (shell) pass through without interception."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created(),
+            _output_item_added_shell(),
+            _response_completed(),
+        ]
+        all_relayed: list[str] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            assert result["execute_tools"] == []
+            assert result["send_continuation"] is None
+        assert len(all_relayed) == 3
+    def test_empty_response_relayed(self):
+        """Response with no output items still relays created + completed."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created(),
+            _response_completed(),
+        ]
+        all_relayed: list[str] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+        assert len(all_relayed) == 2
+class TestWSMemoryRelayMemoryTool:
+    """Responses with memory tools are intercepted transparently."""
+    def test_memory_search_fully_suppressed(self):
+        """memory_search call: ALL events suppressed from Codex."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created("resp_A"),
+            _output_item_added_function_call("memory_search", index=0),
+            _function_call_args_delta(index=0),
+            _function_call_args_done(index=0),
+            _output_item_done_function_call("memory_search", index=0),
+            _response_completed("resp_A"),
+        ]
+        all_relayed: list[str] = []
+        tool_executions: list[Any] = []
+        continuations: list[Any] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            tool_executions.extend(result["execute_tools"])
+            if result["send_continuation"]:
+                continuations.append(result["send_continuation"])
+        # ZERO events relayed to Codex
+        assert len(all_relayed) == 0, (
+            f"Expected 0 relayed events, got {len(all_relayed)}: "
+            f"{[json.loads(e)['type'] for e in all_relayed]}"
+        )
+        # Tool execution triggered
+        assert len(tool_executions) == 1
+        assert tool_executions[0]["name"] == "memory_search"
+        # Continuation requested
+        assert len(continuations) == 1
+        assert continuations[0]["response_id"] == "resp_A"
+    def test_memory_save_also_suppressed(self):
+        """memory_save call is also intercepted."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created("resp_B"),
+            _output_item_added_function_call("memory_save", index=0, call_id="call_save"),
+            _function_call_args_done(index=0, arguments='{"content": "user likes dark mode"}'),
+            _output_item_done_function_call(
+                "memory_save",
+                index=0,
+                call_id="call_save",
+                arguments='{"content": "user likes dark mode"}',
+            ),
+            _response_completed("resp_B"),
+        ]
+        all_relayed: list[str] = []
+        tool_executions: list[Any] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            tool_executions.extend(result["execute_tools"])
+        assert len(all_relayed) == 0
+        assert len(tool_executions) == 1
+        assert tool_executions[0]["name"] == "memory_save"
+    def test_continuation_response_relayed_normally(self):
+        """After memory tool handling, the continuation response passes through."""
+        relay = WSMemoryRelayState()
+        # --- First response: memory_search (suppressed) ---
+        first_response_events = [
+            _response_created("resp_A"),
+            _output_item_added_function_call("memory_search", index=0),
+            _function_call_args_done(index=0),
+            _output_item_done_function_call("memory_search", index=0),
+            _response_completed("resp_A"),
+        ]
+        for ev in first_response_events:
+            relay.process_event(ev)
+        # --- Second response: continuation text (relayed) ---
+        continuation_events = [
+            _response_created("resp_B"),
+            _output_item_added_text(index=0),
+            _output_text_delta(index=0, text="The codename is Pegasus-2"),
+            _output_item_done_text(index=0),
+            _response_completed("resp_B"),
+        ]
+        all_relayed: list[str] = []
+        for ev in continuation_events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            assert result["execute_tools"] == []
+            assert result["send_continuation"] is None
+        # All continuation events relayed
+        assert len(all_relayed) == 5
+        types = [json.loads(e)["type"] for e in all_relayed]
+        assert types[0] == "response.created"
+        assert types[-1] == "response.completed"
+        # Verify the text content
+        text_events = [
+            json.loads(e)
+            for e in all_relayed
+            if json.loads(e)["type"] == "response.output_text.delta"
+        ]
+        assert len(text_events) == 1
+        assert text_events[0]["delta"] == "The codename is Pegasus-2"
+    def test_non_json_message_always_relayed(self):
+        """Binary or non-JSON messages pass through regardless."""
+        relay = WSMemoryRelayState()
+        result = relay.process_event("not valid json {{{")
+        assert len(result["relay"]) == 1
+        assert result["relay"][0] == "not valid json {{{"
+    def test_multiple_memory_tools_in_one_response(self):
+        """Multiple memory tools in one response — all suppressed."""
+        relay = WSMemoryRelayState()
+        events = [
+            _response_created("resp_multi"),
+            _output_item_added_function_call("memory_search", index=0, call_id="call_1"),
+            _output_item_done_function_call("memory_search", index=0, call_id="call_1"),
+            # The model decides to save something too
+            _output_item_added_function_call("memory_save", index=1, call_id="call_2"),
+            _output_item_done_function_call(
+                "memory_save",
+                index=1,
+                call_id="call_2",
+                arguments='{"content": "test"}',
+            ),
+            _response_completed("resp_multi"),
+        ]
+        all_relayed: list[str] = []
+        tool_executions: list[Any] = []
+        continuations: list[Any] = []
+        for ev in events:
+            result = relay.process_event(ev)
+            all_relayed.extend(result["relay"])
+            tool_executions.extend(result["execute_tools"])
+            if result["send_continuation"]:
+                continuations.append(result["send_continuation"])
+        assert len(all_relayed) == 0
+        assert len(tool_executions) == 2
+        assert {t["name"] for t in tool_executions} == {"memory_search", "memory_save"}
+        assert len(continuations) == 1
+class TestWSMemoryRelayStateReset:
+    """State resets properly between responses."""
+    def test_state_resets_after_memory_response(self):
+        """After a memory response, the relay is ready for a fresh response."""
+        relay = WSMemoryRelayState()
+        # Memory response
+        for ev in [
+            _response_created("resp_A"),
+            _output_item_added_function_call("memory_search"),
+            _output_item_done_function_call("memory_search"),
+            _response_completed("resp_A"),
+        ]:
+            relay.process_event(ev)
+        # State should be reset
+        assert relay.decided is False
+        assert relay.suppress_response is False
+        assert len(relay.pending_function_calls) == 0
+        assert len(relay.event_buffer) == 0
+    def test_alternating_memory_and_normal(self):
+        """Memory response → normal response → both work correctly."""
+        relay = WSMemoryRelayState()
+        # 1. Memory response (suppressed)
+        for ev in [
+            _response_created("resp_A"),
+            _output_item_added_function_call("memory_search"),
+            _output_item_done_function_call("memory_search"),
+            _response_completed("resp_A"),
+        ]:
+            relay.process_event(ev)
+        # 2. Continuation text response (relayed)
+        relayed: list[str] = []
+        for ev in [
+            _response_created("resp_B"),
+            _output_item_added_text(),
+            _output_text_delta(text="Pegasus-2"),
+            _output_item_done_text(),
+            _response_completed("resp_B"),
+        ]:
+            result = relay.process_event(ev)
+            relayed.extend(result["relay"])
+        assert len(relayed) == 5
+        # 3. Another normal response should also work
+        relayed2: list[str] = []
+        for ev in [
+            _response_created("resp_C"),
+            _output_item_added_shell(),
+            _response_completed("resp_C"),
+        ]:
+            result = relay.process_event(ev)
+            relayed2.extend(result["relay"])
+        assert len(relayed2) == 3