| """ |
| test_alive_loop_e2e.py -- A-Z end-to-end simulation of the alive skill system. |
| |
| This is the canary. If any handoff in the alive loop regresses, this |
| test fails. It chains every module the loop depends on in a single |
| pytest function, against a synthetic wiki fixture, with zero network |
| and zero touches to ~/.claude/. |
| |
| The chain mirrors the product-intent user journey: |
| |
| 1. User writes code (simulated: three tool-use invocations carrying |
| unmatched signals). |
| 2. context_monitor accumulates those signals across invocations |
| and, once the cumulative threshold fires, writes |
| pending-skills.json with a ranked graph-derived bundle. This is |
| the regression-pinned behaviour of code-reviewer BLOCKER fixed |
| in commit 6569387 β every individual invocation below the |
| threshold does NOT write, only the cumulative state does. |
| 3. bundle_orchestrator reads pending-skills.json and renders a |
| user-facing message with the top-K bundle categorised by |
| Skills / Agents / MCPs (contract pinned in test_bundle_ |
| orchestrator.py; here we only assert the shape is sane). |
| 4. User approves a skill β ctx-skill-install copies the body into |
| the live skills dir, bumps entity status to `installed`, and adds |
| a manifest load entry. The A-Z test also covers agent install and a |
| mocked ctx-mcp-install registration through the Claude MCP wrapper. |
| 5. The manifest reconciles install entries across entity types |
| via the install_utils tuple-based dedup (pinned in P1.3 |
| regression suite). |
| 6. User decides the skill is stale β skill_unload.unload_skill |
| drops the load entry and adds an unload entry for the typed entity. |
| |
| When this test passes, the install/unload leg of the alive loop |
| works against a representative corpus. When it fails, something |
| between the modules broke β CI should gate a PR landing on this |
| test going green. |
| |
| NOT covered here (by design): |
| - The lifecycle auto-demote threshold firing (tested in |
| test_ctx_lifecycle.py); would require a time-mock setup that's |
| cleaner in its own test. |
| - Real sentence-transformer embeddings. We use a tiny synthetic |
| graph and don't exercise the embedding cache. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import sys |
| from pathlib import Path |
|
|
| import networkx as nx |
| import pytest |
|
|
| sys.path.insert(0, str(Path(__file__).parents[1])) |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _write_skill_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path: |
| entity = wiki_dir / "entities" / "skills" / f"{slug}.md" |
| entity.parent.mkdir(parents=True, exist_ok=True) |
| tags_block = "\n".join(f" - {t}" for t in tags) |
| entity.write_text( |
| f"---\n" |
| f"title: {slug}\n" |
| f"type: skill\n" |
| f"status: cataloged\n" |
| f"tags:\n{tags_block}\n" |
| f"---\n" |
| f"# {slug}\n\n" |
| f"Synthetic skill body for E2E test.\n", |
| encoding="utf-8", |
| ) |
| return entity |
|
|
|
|
| def _write_skill_body(wiki_dir: Path, slug: str, body: str) -> Path: |
| """Drop `converted/<slug>/SKILL.md` so ctx-skill-install finds content.""" |
| path = wiki_dir / "converted" / slug / "SKILL.md" |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(body, encoding="utf-8") |
| return path |
|
|
|
|
| def _write_agent_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path: |
| entity = wiki_dir / "entities" / "agents" / f"{slug}.md" |
| entity.parent.mkdir(parents=True, exist_ok=True) |
| tags_block = "\n".join(f" - {t}" for t in tags) |
| entity.write_text( |
| f"---\n" |
| f"title: {slug}\n" |
| f"type: agent\n" |
| f"status: cataloged\n" |
| f"tags:\n{tags_block}\n" |
| f"---\n" |
| f"# {slug}\n\nSynthetic agent card for E2E test.\n", |
| encoding="utf-8", |
| ) |
| return entity |
|
|
|
|
| def _write_agent_body(wiki_dir: Path, slug: str, body: str) -> Path: |
| """Drop `converted-agents/<slug>.md` so ctx-agent-install finds content.""" |
| path = wiki_dir / "converted-agents" / f"{slug}.md" |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(body, encoding="utf-8") |
| return path |
|
|
|
|
| def _write_mcp_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path: |
| shard = slug[0] if slug and slug[0].isalpha() else "0-9" |
| entity = wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md" |
| entity.parent.mkdir(parents=True, exist_ok=True) |
| tags_block = "\n".join(f" - {t}" for t in tags) |
| entity.write_text( |
| f"---\n" |
| f"type: mcp-server\n" |
| f"slug: {slug}\n" |
| f"name: {slug}\n" |
| f"status: cataloged\n" |
| f"tags:\n{tags_block}\n" |
| f"---\n" |
| f"# {slug}\n\nSynthetic MCP card for E2E test.\n", |
| encoding="utf-8", |
| ) |
| return entity |
|
|
|
|
| def _write_graph_json(wiki_dir: Path, nodes: list[dict], edges: list[dict]) -> Path: |
| """Build a graph.json that context_monitor.graph_suggest can walk.""" |
| G = nx.Graph() |
| G.graph["semantic_build_floor"] = 0.5 |
| G.graph["external_catalog_nodes"] = {"skills.sh": 1} |
| G.graph["source_catalog_nodes"] = {"skills.sh": 1} |
| for n in nodes: |
| G.add_node(n["id"], **{k: v for k, v in n.items() if k != "id"}) |
| for e in edges: |
| G.add_edge(e["source"], e["target"], **{k: v for k, v in e.items() if k not in {"source", "target"}}) |
| data = nx.node_link_data(G, edges="edges") |
| path = wiki_dir / "graphify-out" / "graph.json" |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(json.dumps(data), encoding="utf-8") |
| return path |
|
|
|
|
| @pytest.fixture() |
| def e2e_world(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> dict[str, Path]: |
| """A hermetic synthetic world for the alive-loop E2E test. |
| |
| Returns a dict of paths the test uses to drive each module. |
| Monkeypatches every module-level path constant so no real |
| ~/.claude/ content is touched. |
| """ |
| claude_dir = tmp_path / "claude" |
| wiki_dir = claude_dir / "skill-wiki" |
| skills_dir = claude_dir / "skills" |
| agents_dir = claude_dir / "agents" |
| for d in (claude_dir, wiki_dir, skills_dir, agents_dir): |
| d.mkdir(parents=True) |
|
|
| manifest = claude_dir / "skill-manifest.json" |
| intent_log = claude_dir / "intent-log.jsonl" |
| pending_skills = claude_dir / "pending-skills.json" |
|
|
| |
| |
| _write_skill_entity(wiki_dir, "python-patterns", tags=["python", "patterns"]) |
| _write_skill_body( |
| wiki_dir, "python-patterns", |
| "# python-patterns\n\nBest practices for idiomatic Python.\n", |
| ) |
| _write_skill_entity(wiki_dir, "security-basics", tags=["security"]) |
| _write_skill_body( |
| wiki_dir, "security-basics", |
| "# security-basics\n\nSecurity fundamentals.\n", |
| ) |
|
|
| _write_agent_entity(wiki_dir, "code-reviewer", tags=["python", "review"]) |
| _write_agent_body( |
| wiki_dir, "code-reviewer", |
| "---\nname: code-reviewer\ndescription: review code\n---\n\n" |
| "Review code for quality and security issues.\n", |
| ) |
| _write_agent_entity(wiki_dir, "devops-engineer", tags=["devops", "docker"]) |
| _write_agent_body( |
| wiki_dir, "devops-engineer", |
| "---\nname: devops-engineer\ndescription: devops\n---\n\n" |
| "DevOps tooling and CI/CD.\n", |
| ) |
|
|
| _write_mcp_entity(wiki_dir, "anthropic-python-sdk", tags=["python", "sdk"]) |
| _write_mcp_entity(wiki_dir, "atlassian-cloud", tags=["saas"]) |
|
|
| |
| _write_graph_json( |
| wiki_dir, |
| nodes=[ |
| {"id": "skill:python-patterns", "label": "python-patterns", |
| "type": "skill", "tags": ["python", "patterns"]}, |
| {"id": "skill:security-basics", "label": "security-basics", |
| "type": "skill", "tags": ["security"]}, |
| {"id": "agent:code-reviewer", "label": "code-reviewer", |
| "type": "agent", "tags": ["python", "review"]}, |
| {"id": "agent:devops-engineer", "label": "devops-engineer", |
| "type": "agent", "tags": ["devops", "docker"]}, |
| {"id": "mcp-server:anthropic-python-sdk", |
| "label": "anthropic-python-sdk", |
| "type": "mcp-server", "tags": ["python", "sdk"]}, |
| {"id": "mcp-server:atlassian-cloud", |
| "label": "atlassian-cloud", |
| "type": "mcp-server", "tags": ["saas"]}, |
| ], |
| edges=[ |
| {"source": "skill:python-patterns", "target": "agent:code-reviewer", |
| "weight": 1, "shared_tags": ["python"]}, |
| {"source": "skill:python-patterns", |
| "target": "mcp-server:anthropic-python-sdk", |
| "weight": 1, "shared_tags": ["python"]}, |
| {"source": "agent:code-reviewer", |
| "target": "mcp-server:anthropic-python-sdk", |
| "weight": 1, "shared_tags": ["python"]}, |
| ], |
| ) |
|
|
| |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
| monkeypatch.setattr(_cm, "CLAUDE_DIR", claude_dir) |
| monkeypatch.setattr(_cm, "INTENT_LOG", intent_log) |
| monkeypatch.setattr(_cm, "PENDING_SKILLS", pending_skills) |
| monkeypatch.setattr(_cm, "MANIFEST_PATH", manifest) |
|
|
| from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo |
| monkeypatch.setattr(_bo, "CLAUDE_DIR", claude_dir) |
| monkeypatch.setattr(_bo, "PENDING_SKILLS", pending_skills) |
| monkeypatch.setattr(_bo, "PENDING_UNLOAD", claude_dir / "pending-unload.json") |
| monkeypatch.setattr(_bo, "SHOWN_FLAG", claude_dir / ".bundle-shown") |
|
|
| from ctx.adapters.claude_code.install import install_utils as _iu |
| monkeypatch.setattr(_iu, "MANIFEST_PATH", manifest) |
|
|
| import ctx_config |
| monkeypatch.setattr( |
| ctx_config.cfg, |
| "graph_semantic_cache_dir", |
| tmp_path / "semantic-cache", |
| ) |
|
|
| return { |
| "claude": claude_dir, |
| "wiki": wiki_dir, |
| "skills": skills_dir, |
| "agents": agents_dir, |
| "manifest": manifest, |
| "intent_log": intent_log, |
| "pending_skills": pending_skills, |
| } |
|
|
|
|
| |
| |
| |
|
|
|
|
| class TestAliveLoopE2E: |
| """One test per chain link + one test for the full journey.""" |
|
|
| def test_signals_below_threshold_do_not_fire( |
| self, e2e_world, monkeypatch, |
| ): |
| """Single invocation with one unmatched signal stays below the |
| default threshold=3 β pending-skills.json must NOT be written. |
| |
| Pins the regression from code-reviewer BLOCKER (commit 6569387): |
| pre-fix, len(unmatched)>=THRESHOLD never fired; post-fix, the |
| cumulative check also doesn't fire until enough signals have |
| accumulated.""" |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
|
|
| monkeypatch.setattr(sys, "argv", [ |
| "context_monitor.py", "--tool", "Read", |
| "--input", json.dumps({"file_path": "App.tsx"}), |
| ]) |
| _cm.main() |
|
|
| assert not e2e_world["pending_skills"].exists(), ( |
| "pending-skills.json fired after 1 signal (threshold=3)" |
| ) |
|
|
| def test_cumulative_threshold_fires_pending_write( |
| self, e2e_world, monkeypatch, |
| ): |
| """Cumulative signals across multiple invocations trigger the |
| pending-skills write once the threshold is reached. The alive |
| loop's suggestion arm now fires in production β code-reviewer |
| BLOCKER verified in-situ.""" |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
|
|
| |
| |
| |
| invocations = [ |
| ("Read", {"file_path": "App.tsx"}), |
| ("Read", {"file_path": "Dockerfile"}), |
| ("Bash", {"command": "pip install fastapi"}), |
| ] |
| for tool, tool_input in invocations: |
| monkeypatch.setattr(sys, "argv", [ |
| "context_monitor.py", "--tool", tool, |
| "--input", json.dumps(tool_input), |
| ]) |
| _cm.main() |
|
|
| assert e2e_world["pending_skills"].exists() |
| payload = json.loads(e2e_world["pending_skills"].read_text(encoding="utf-8")) |
| |
| |
| assert len(payload.get("unmatched_signals", [])) >= 3 |
| |
| |
| assert "graph_suggestions" in payload |
|
|
| def test_graph_suggest_returns_cross_type_bundle( |
| self, e2e_world, monkeypatch, |
| ): |
| """With a python signal and a graph containing a python skill + |
| python agent + python MCP, graph_suggest returns entries |
| spanning all three types. Pins the cross-type contract the |
| bundle orchestrator depends on.""" |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
|
|
| sugs = _cm.graph_suggest(["python"]) |
| assert len(sugs) > 0 |
| types = {s["type"] for s in sugs} |
| |
| |
| assert len(types & {"skill", "agent", "mcp-server"}) >= 2, ( |
| f"expected multi-type bundle, got types={types}" |
| ) |
|
|
| def test_bundle_orchestrator_renders_categorised_message( |
| self, e2e_world, capsys, monkeypatch, |
| ): |
| """Fire the cumulative threshold, then run the bundle orchestrator |
| hook. Output must contain the three-type categorised layout and |
| be valid Claude-Code-hook JSON.""" |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
| from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo |
|
|
| |
| |
| invocations = [ |
| ("Read", {"file_path": "App.tsx"}), |
| ("Read", {"file_path": "Dockerfile"}), |
| ("Bash", {"command": "pip install fastapi"}), |
| ] |
| for tool, tool_input in invocations: |
| monkeypatch.setattr(sys, "argv", [ |
| "context_monitor.py", "--tool", tool, |
| "--input", json.dumps(tool_input), |
| ]) |
| _cm.main() |
|
|
| |
| assert e2e_world["pending_skills"].exists() |
|
|
| _bo.main() |
| out = capsys.readouterr().out.strip() |
| assert out, "bundle_orchestrator emitted no output despite pending file" |
| envelope = json.loads(out) |
| assert envelope["hookSpecificOutput"]["hookEventName"] == "PostToolUse" |
| msg = envelope["hookSpecificOutput"]["additionalContext"] |
| |
| |
| |
| headers = {h for h in ("Skills:", "Agents:", "MCPs:") if h in msg} |
| assert len(headers) >= 2, ( |
| f"expected multi-type categorisation, got headers={headers}" |
| ) |
|
|
| def test_install_skill_copies_body_and_records_manifest( |
| self, e2e_world, |
| ): |
| """User approves a skill from the bundle β skill_install copies |
| wiki body into the live skills dir, flips entity status to |
| `installed`, records (slug, 'skill') in the manifest load list.""" |
| from ctx.adapters.claude_code.install.skill_install import install_skill |
|
|
| result = install_skill( |
| "python-patterns", |
| wiki_dir=e2e_world["wiki"], |
| skills_dir=e2e_world["skills"], |
| ) |
|
|
| assert result.status == "installed" |
| assert (e2e_world["skills"] / "python-patterns" / "SKILL.md").is_file() |
|
|
| manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8")) |
| loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]} |
| assert ("python-patterns", "skill") in loaded |
|
|
| |
| entity_text = ( |
| e2e_world["wiki"] / "entities" / "skills" / "python-patterns.md" |
| ).read_text(encoding="utf-8") |
| assert "status: installed" in entity_text |
|
|
| def test_install_agent_writes_body_and_records_manifest_distinct_from_skill( |
| self, e2e_world, |
| ): |
| """Approving an agent with the SAME slug as an installed skill |
| must not collide β the manifest dedups on (slug, entity_type) |
| tuple, not slug alone. Pins the P1.3 regression.""" |
| from ctx.adapters.claude_code.install.skill_install import install_skill |
| from ctx.adapters.claude_code.install.agent_install import install_agent |
|
|
| |
| |
| |
| install_skill( |
| "python-patterns", |
| wiki_dir=e2e_world["wiki"], |
| skills_dir=e2e_world["skills"], |
| ) |
| install_agent( |
| "code-reviewer", |
| wiki_dir=e2e_world["wiki"], |
| agents_dir=e2e_world["agents"], |
| ) |
|
|
| manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8")) |
| pairs = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]} |
| assert ("python-patterns", "skill") in pairs |
| assert ("code-reviewer", "agent") in pairs |
|
|
| def test_install_and_uninstall_mcp_records_manifest( |
| self, e2e_world, monkeypatch, |
| ): |
| """MCP approval and removal reconcile typed manifest entries.""" |
| from ctx.adapters.claude_code.install import mcp_install |
|
|
| calls: list[list[str]] = [] |
|
|
| def fake_run(args: list[str]) -> tuple[int, str, str]: |
| calls.append(args) |
| return 0, "ok", "" |
|
|
| monkeypatch.setattr(mcp_install, "_run_claude_mcp", fake_run) |
|
|
| install_result = mcp_install.install_mcp( |
| "anthropic-python-sdk", |
| wiki_dir=e2e_world["wiki"], |
| command="npx -y @anthropic-ai/sdk-mcp", |
| auto=True, |
| ) |
| assert install_result.status == "installed" |
| assert calls[0] == [ |
| "add", |
| "anthropic-python-sdk", |
| "--", |
| "npx", |
| "-y", |
| "@anthropic-ai/sdk-mcp", |
| ] |
|
|
| manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8")) |
| loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]} |
| assert ("anthropic-python-sdk", "mcp-server") in loaded |
|
|
| entity = ( |
| e2e_world["wiki"] |
| / "entities" |
| / "mcp-servers" |
| / "a" |
| / "anthropic-python-sdk.md" |
| ) |
| assert "status: installed" in entity.read_text(encoding="utf-8") |
|
|
| uninstall_result = mcp_install.uninstall_mcp( |
| "anthropic-python-sdk", |
| wiki_dir=e2e_world["wiki"], |
| ) |
| assert uninstall_result.status == "uninstalled" |
| assert calls[-1] == ["remove", "anthropic-python-sdk"] |
|
|
| manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8")) |
| loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]} |
| unloaded = { |
| (e.get("skill"), e.get("entity_type")) for e in manifest["unload"] |
| } |
| assert ("anthropic-python-sdk", "mcp-server") not in loaded |
| assert ("anthropic-python-sdk", "mcp-server") in unloaded |
| assert "status: cataloged" in entity.read_text(encoding="utf-8") |
|
|
| def test_full_user_journey_signals_to_install( |
| self, e2e_world, capsys, monkeypatch, |
| ): |
| """The A-Z. |
| |
| A user who: |
| 1. opens Python files (three signals) |
| 2. sees the categorised bundle surfaced via the hook |
| 3. approves a skill + an agent + an MCP from the bundle |
| |
| must end the journey with: |
| - pending-skills.json written with a cross-type bundle |
| - skill installed at ~/.claude/skills/<slug>/ |
| - agent installed at ~/.claude/agents/<slug>.md |
| - MCP registered through the Claude MCP wrapper |
| - manifest carrying all entries keyed on (slug, type) |
| - entity status fields flipped to `installed` on all cards |
| |
| If any link in the chain regresses, this test fails and CI |
| blocks the PR. This is the load-bearing integration contract. |
| """ |
| from ctx.adapters.claude_code.hooks import context_monitor as _cm |
| from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo |
| from ctx.adapters.claude_code.install.skill_install import install_skill |
| from ctx.adapters.claude_code.install.agent_install import install_agent |
| from ctx.adapters.claude_code.install import mcp_install |
|
|
| mcp_calls: list[list[str]] = [] |
|
|
| def fake_mcp_run(args: list[str]) -> tuple[int, str, str]: |
| mcp_calls.append(args) |
| return 0, "ok", "" |
|
|
| monkeypatch.setattr( |
| mcp_install, |
| "_run_claude_mcp", |
| fake_mcp_run, |
| ) |
|
|
| |
| |
| |
| invocations = [ |
| ("Read", {"file_path": "App.tsx"}), |
| ("Read", {"file_path": "Dockerfile"}), |
| ("Bash", {"command": "pip install fastapi"}), |
| ] |
| for tool, tool_input in invocations: |
| monkeypatch.setattr(sys, "argv", [ |
| "context_monitor.py", "--tool", tool, |
| "--input", json.dumps(tool_input), |
| ]) |
| _cm.main() |
| assert e2e_world["pending_skills"].exists(), ( |
| "alive loop broken: cumulative threshold didn't fire" |
| ) |
|
|
| |
| _bo.main() |
| out = capsys.readouterr().out.strip() |
| assert out, "bundle_orchestrator emitted nothing" |
| msg = json.loads(out)["hookSpecificOutput"]["additionalContext"] |
| assert "python-patterns" in msg or "code-reviewer" in msg, ( |
| f"bundle missing expected python entities, got: {msg}" |
| ) |
|
|
| |
| skill_result = install_skill( |
| "python-patterns", |
| wiki_dir=e2e_world["wiki"], |
| skills_dir=e2e_world["skills"], |
| ) |
| agent_result = install_agent( |
| "code-reviewer", |
| wiki_dir=e2e_world["wiki"], |
| agents_dir=e2e_world["agents"], |
| ) |
| mcp_result = mcp_install.install_mcp( |
| "anthropic-python-sdk", |
| wiki_dir=e2e_world["wiki"], |
| command="npx -y @anthropic-ai/sdk-mcp", |
| auto=True, |
| ) |
| assert skill_result.status == "installed" |
| assert agent_result.status == "installed" |
| assert mcp_result.status == "installed" |
| assert mcp_calls and mcp_calls[0][0] == "add" |
|
|
| |
| assert (e2e_world["skills"] / "python-patterns" / "SKILL.md").is_file() |
| assert (e2e_world["agents"] / "code-reviewer.md").is_file() |
|
|
| manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8")) |
| loaded_pairs = { |
| (e.get("skill"), e.get("entity_type")) for e in manifest["load"] |
| } |
| assert { |
| ("python-patterns", "skill"), |
| ("code-reviewer", "agent"), |
| ("anthropic-python-sdk", "mcp-server"), |
| } <= loaded_pairs |
|
|
| for entity_rel in ( |
| "entities/skills/python-patterns.md", |
| "entities/agents/code-reviewer.md", |
| "entities/mcp-servers/a/anthropic-python-sdk.md", |
| ): |
| text = (e2e_world["wiki"] / entity_rel).read_text(encoding="utf-8") |
| assert "status: installed" in text, ( |
| f"status not flipped on {entity_rel}" |
| ) |
|
|