ctx / src /tests /test_alive_loop_e2e.py

Sync ctx a4e14d3

31e282f verified 14 days ago

25.6 kB

	"""
	test_alive_loop_e2e.py -- A-Z end-to-end simulation of the alive skill system.

	This is the canary. If any handoff in the alive loop regresses, this
	test fails. It chains every module the loop depends on in a single
	pytest function, against a synthetic wiki fixture, with zero network
	and zero touches to ~/.claude/.

	The chain mirrors the product-intent user journey:

	1. User writes code (simulated: three tool-use invocations carrying
	unmatched signals).
	2. context_monitor accumulates those signals across invocations
	and, once the cumulative threshold fires, writes
	pending-skills.json with a ranked graph-derived bundle. This is
	the regression-pinned behaviour of code-reviewer BLOCKER fixed
	in commit 6569387 — every individual invocation below the
	threshold does NOT write, only the cumulative state does.
	3. bundle_orchestrator reads pending-skills.json and renders a
	user-facing message with the top-K bundle categorised by
	Skills / Agents / MCPs (contract pinned in test_bundle_
	orchestrator.py; here we only assert the shape is sane).
	4. User approves a skill → ctx-skill-install copies the body into
	the live skills dir, bumps entity status to `installed`, and adds
	a manifest load entry. The A-Z test also covers agent install and a
	mocked ctx-mcp-install registration through the Claude MCP wrapper.
	5. The manifest reconciles install entries across entity types
	via the install_utils tuple-based dedup (pinned in P1.3
	regression suite).
	6. User decides the skill is stale → skill_unload.unload_skill
	drops the load entry and adds an unload entry for the typed entity.

	When this test passes, the install/unload leg of the alive loop
	works against a representative corpus. When it fails, something
	between the modules broke — CI should gate a PR landing on this
	test going green.

	NOT covered here (by design):
	- The lifecycle auto-demote threshold firing (tested in
	test_ctx_lifecycle.py); would require a time-mock setup that's
	cleaner in its own test.
	- Real sentence-transformer embeddings. We use a tiny synthetic
	graph and don't exercise the embedding cache.
	"""

	from __future__ import annotations

	import json
	import sys
	from pathlib import Path

	import networkx as nx
	import pytest

	sys.path.insert(0, str(Path(__file__).parents[1]))


	# ────────────────────────────────────────────────────────────────────
	# Synthetic wiki fixture — a minimal corpus with all three entity types
	# ────────────────────────────────────────────────────────────────────


	def _write_skill_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path:
	entity = wiki_dir / "entities" / "skills" / f"{slug}.md"
	entity.parent.mkdir(parents=True, exist_ok=True)
	tags_block = "\n".join(f" - {t}" for t in tags)
	entity.write_text(
	f"---\n"
	f"title: {slug}\n"
	f"type: skill\n"
	f"status: cataloged\n"
	f"tags:\n{tags_block}\n"
	f"---\n"
	f"# {slug}\n\n"
	f"Synthetic skill body for E2E test.\n",
	encoding="utf-8",
	)
	return entity


	def _write_skill_body(wiki_dir: Path, slug: str, body: str) -> Path:
	"""Drop `converted/<slug>/SKILL.md` so ctx-skill-install finds content."""
	path = wiki_dir / "converted" / slug / "SKILL.md"
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(body, encoding="utf-8")
	return path


	def _write_agent_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path:
	entity = wiki_dir / "entities" / "agents" / f"{slug}.md"
	entity.parent.mkdir(parents=True, exist_ok=True)
	tags_block = "\n".join(f" - {t}" for t in tags)
	entity.write_text(
	f"---\n"
	f"title: {slug}\n"
	f"type: agent\n"
	f"status: cataloged\n"
	f"tags:\n{tags_block}\n"
	f"---\n"
	f"# {slug}\n\nSynthetic agent card for E2E test.\n",
	encoding="utf-8",
	)
	return entity


	def _write_agent_body(wiki_dir: Path, slug: str, body: str) -> Path:
	"""Drop `converted-agents/<slug>.md` so ctx-agent-install finds content."""
	path = wiki_dir / "converted-agents" / f"{slug}.md"
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(body, encoding="utf-8")
	return path


	def _write_mcp_entity(wiki_dir: Path, slug: str, *, tags: list[str]) -> Path:
	shard = slug[0] if slug and slug[0].isalpha() else "0-9"
	entity = wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
	entity.parent.mkdir(parents=True, exist_ok=True)
	tags_block = "\n".join(f" - {t}" for t in tags)
	entity.write_text(
	f"---\n"
	f"type: mcp-server\n"
	f"slug: {slug}\n"
	f"name: {slug}\n"
	f"status: cataloged\n"
	f"tags:\n{tags_block}\n"
	f"---\n"
	f"# {slug}\n\nSynthetic MCP card for E2E test.\n",
	encoding="utf-8",
	)
	return entity


	def _write_graph_json(wiki_dir: Path, nodes: list[dict], edges: list[dict]) -> Path:
	"""Build a graph.json that context_monitor.graph_suggest can walk."""
	G = nx.Graph()
	G.graph["semantic_build_floor"] = 0.5
	G.graph["external_catalog_nodes"] = {"skills.sh": 1}
	G.graph["source_catalog_nodes"] = {"skills.sh": 1}
	for n in nodes:
	G.add_node(n["id"], **{k: v for k, v in n.items() if k != "id"})
	for e in edges:
	G.add_edge(e["source"], e["target"], **{k: v for k, v in e.items() if k not in {"source", "target"}})
	data = nx.node_link_data(G, edges="edges")
	path = wiki_dir / "graphify-out" / "graph.json"
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(json.dumps(data), encoding="utf-8")
	return path


	@pytest.fixture()
	def e2e_world(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> dict[str, Path]:
	"""A hermetic synthetic world for the alive-loop E2E test.

	Returns a dict of paths the test uses to drive each module.
	Monkeypatches every module-level path constant so no real
	~/.claude/ content is touched.
	"""
	claude_dir = tmp_path / "claude"
	wiki_dir = claude_dir / "skill-wiki"
	skills_dir = claude_dir / "skills"
	agents_dir = claude_dir / "agents"
	for d in (claude_dir, wiki_dir, skills_dir, agents_dir):
	d.mkdir(parents=True)

	manifest = claude_dir / "skill-manifest.json"
	intent_log = claude_dir / "intent-log.jsonl"
	pending_skills = claude_dir / "pending-skills.json"

	# ── Synthetic corpus: 2 skills (python + security), 2 agents, 2 MCPs ──
	# The "python" signal is the trigger we'll feed into the monitor.
	_write_skill_entity(wiki_dir, "python-patterns", tags=["python", "patterns"])
	_write_skill_body(
	wiki_dir, "python-patterns",
	"# python-patterns\n\nBest practices for idiomatic Python.\n",
	)
	_write_skill_entity(wiki_dir, "security-basics", tags=["security"])
	_write_skill_body(
	wiki_dir, "security-basics",
	"# security-basics\n\nSecurity fundamentals.\n",
	)

	_write_agent_entity(wiki_dir, "code-reviewer", tags=["python", "review"])
	_write_agent_body(
	wiki_dir, "code-reviewer",
	"---\nname: code-reviewer\ndescription: review code\n---\n\n"
	"Review code for quality and security issues.\n",
	)
	_write_agent_entity(wiki_dir, "devops-engineer", tags=["devops", "docker"])
	_write_agent_body(
	wiki_dir, "devops-engineer",
	"---\nname: devops-engineer\ndescription: devops\n---\n\n"
	"DevOps tooling and CI/CD.\n",
	)

	_write_mcp_entity(wiki_dir, "anthropic-python-sdk", tags=["python", "sdk"])
	_write_mcp_entity(wiki_dir, "atlassian-cloud", tags=["saas"])

	# ── Synthetic graph: link every python-tagged entity via an explicit edge ──
	_write_graph_json(
	wiki_dir,
	nodes=[
	{"id": "skill:python-patterns", "label": "python-patterns",
	"type": "skill", "tags": ["python", "patterns"]},
	{"id": "skill:security-basics", "label": "security-basics",
	"type": "skill", "tags": ["security"]},
	{"id": "agent:code-reviewer", "label": "code-reviewer",
	"type": "agent", "tags": ["python", "review"]},
	{"id": "agent:devops-engineer", "label": "devops-engineer",
	"type": "agent", "tags": ["devops", "docker"]},
	{"id": "mcp-server:anthropic-python-sdk",
	"label": "anthropic-python-sdk",
	"type": "mcp-server", "tags": ["python", "sdk"]},
	{"id": "mcp-server:atlassian-cloud",
	"label": "atlassian-cloud",
	"type": "mcp-server", "tags": ["saas"]},
	],
	edges=[
	{"source": "skill:python-patterns", "target": "agent:code-reviewer",
	"weight": 1, "shared_tags": ["python"]},
	{"source": "skill:python-patterns",
	"target": "mcp-server:anthropic-python-sdk",
	"weight": 1, "shared_tags": ["python"]},
	{"source": "agent:code-reviewer",
	"target": "mcp-server:anthropic-python-sdk",
	"weight": 1, "shared_tags": ["python"]},
	],
	)

	# ── Monkeypatch every module's path constants ─────────────────────────
	from ctx.adapters.claude_code.hooks import context_monitor as _cm
	monkeypatch.setattr(_cm, "CLAUDE_DIR", claude_dir)
	monkeypatch.setattr(_cm, "INTENT_LOG", intent_log)
	monkeypatch.setattr(_cm, "PENDING_SKILLS", pending_skills)
	monkeypatch.setattr(_cm, "MANIFEST_PATH", manifest)

	from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo
	monkeypatch.setattr(_bo, "CLAUDE_DIR", claude_dir)
	monkeypatch.setattr(_bo, "PENDING_SKILLS", pending_skills)
	monkeypatch.setattr(_bo, "PENDING_UNLOAD", claude_dir / "pending-unload.json")
	monkeypatch.setattr(_bo, "SHOWN_FLAG", claude_dir / ".bundle-shown")

	from ctx.adapters.claude_code.install import install_utils as _iu
	monkeypatch.setattr(_iu, "MANIFEST_PATH", manifest)

	import ctx_config
	monkeypatch.setattr(
	ctx_config.cfg,
	"graph_semantic_cache_dir",
	tmp_path / "semantic-cache",
	)

	return {
	"claude": claude_dir,
	"wiki": wiki_dir,
	"skills": skills_dir,
	"agents": agents_dir,
	"manifest": manifest,
	"intent_log": intent_log,
	"pending_skills": pending_skills,
	}


	# ────────────────────────────────────────────────────────────────────
	# The A-Z test
	# ────────────────────────────────────────────────────────────────────


	class TestAliveLoopE2E:
	"""One test per chain link + one test for the full journey."""

	def test_signals_below_threshold_do_not_fire(
	self, e2e_world, monkeypatch,
	):
	"""Single invocation with one unmatched signal stays below the
	default threshold=3 — pending-skills.json must NOT be written.

	Pins the regression from code-reviewer BLOCKER (commit 6569387):
	pre-fix, len(unmatched)>=THRESHOLD never fired; post-fix, the
	cumulative check also doesn't fire until enough signals have
	accumulated."""
	from ctx.adapters.claude_code.hooks import context_monitor as _cm

	monkeypatch.setattr(sys, "argv", [
	"context_monitor.py", "--tool", "Read",
	"--input", json.dumps({"file_path": "App.tsx"}),
	])
	_cm.main()

	assert not e2e_world["pending_skills"].exists(), (
	"pending-skills.json fired after 1 signal (threshold=3)"
	)

	def test_cumulative_threshold_fires_pending_write(
	self, e2e_world, monkeypatch,
	):
	"""Cumulative signals across multiple invocations trigger the
	pending-skills write once the threshold is reached. The alive
	loop's suggestion arm now fires in production — code-reviewer
	BLOCKER verified in-situ."""
	from ctx.adapters.claude_code.hooks import context_monitor as _cm

	# Mix Read + Bash to exercise extension-based AND command-based
	# signal extraction. Each invocation surfaces a distinct signal
	# set; cumulative union crosses threshold=3.
	invocations = [
	("Read", {"file_path": "App.tsx"}), # -> react
	("Read", {"file_path": "Dockerfile"}), # -> docker
	("Bash", {"command": "pip install fastapi"}), # -> fastapi, python
	]
	for tool, tool_input in invocations:
	monkeypatch.setattr(sys, "argv", [
	"context_monitor.py", "--tool", tool,
	"--input", json.dumps(tool_input),
	])
	_cm.main()

	assert e2e_world["pending_skills"].exists()
	payload = json.loads(e2e_world["pending_skills"].read_text(encoding="utf-8"))
	# Cumulative union includes ALL signals seen today, not just
	# the current invocation's.
	assert len(payload.get("unmatched_signals", [])) >= 3
	# Graph-derived bundle populated (may be empty for signals that
	# didn't match any graph node, but the key must exist).
	assert "graph_suggestions" in payload

	def test_graph_suggest_returns_cross_type_bundle(
	self, e2e_world, monkeypatch,
	):
	"""With a python signal and a graph containing a python skill +
	python agent + python MCP, graph_suggest returns entries
	spanning all three types. Pins the cross-type contract the
	bundle orchestrator depends on."""
	from ctx.adapters.claude_code.hooks import context_monitor as _cm

	sugs = _cm.graph_suggest(["python"])
	assert len(sugs) > 0
	types = {s["type"] for s in sugs}
	# At least two of the three types should be ranked in (the
	# python-tagged skill AND agent AND MCP all share the tag).
	assert len(types & {"skill", "agent", "mcp-server"}) >= 2, (
	f"expected multi-type bundle, got types={types}"
	)

	def test_bundle_orchestrator_renders_categorised_message(
	self, e2e_world, capsys, monkeypatch,
	):
	"""Fire the cumulative threshold, then run the bundle orchestrator
	hook. Output must contain the three-type categorised layout and
	be valid Claude-Code-hook JSON."""
	from ctx.adapters.claude_code.hooks import context_monitor as _cm
	from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo

	# Feed signals that include a python-adjacent one so
	# graph_suggest hits our python-tagged nodes.
	invocations = [
	("Read", {"file_path": "App.tsx"}),
	("Read", {"file_path": "Dockerfile"}),
	("Bash", {"command": "pip install fastapi"}),
	]
	for tool, tool_input in invocations:
	monkeypatch.setattr(sys, "argv", [
	"context_monitor.py", "--tool", tool,
	"--input", json.dumps(tool_input),
	])
	_cm.main()

	# Sanity: pending-skills.json was written.
	assert e2e_world["pending_skills"].exists()

	_bo.main()
	out = capsys.readouterr().out.strip()
	assert out, "bundle_orchestrator emitted no output despite pending file"
	envelope = json.loads(out)
	assert envelope["hookSpecificOutput"]["hookEventName"] == "PostToolUse"
	msg = envelope["hookSpecificOutput"]["additionalContext"]
	# The exact set of headers depends on which types the graph
	# returned; we assert at least two of three are present since
	# the python signal hits our skill + agent + MCP all.
	headers = {h for h in ("Skills:", "Agents:", "MCPs:") if h in msg}
	assert len(headers) >= 2, (
	f"expected multi-type categorisation, got headers={headers}"
	)

	def test_install_skill_copies_body_and_records_manifest(
	self, e2e_world,
	):
	"""User approves a skill from the bundle → skill_install copies
	wiki body into the live skills dir, flips entity status to
	`installed`, records (slug, 'skill') in the manifest load list."""
	from ctx.adapters.claude_code.install.skill_install import install_skill

	result = install_skill(
	"python-patterns",
	wiki_dir=e2e_world["wiki"],
	skills_dir=e2e_world["skills"],
	)

	assert result.status == "installed"
	assert (e2e_world["skills"] / "python-patterns" / "SKILL.md").is_file()

	manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8"))
	loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]}
	assert ("python-patterns", "skill") in loaded

	# Entity status bumped.
	entity_text = (
	e2e_world["wiki"] / "entities" / "skills" / "python-patterns.md"
	).read_text(encoding="utf-8")
	assert "status: installed" in entity_text

	def test_install_agent_writes_body_and_records_manifest_distinct_from_skill(
	self, e2e_world,
	):
	"""Approving an agent with the SAME slug as an installed skill
	must not collide — the manifest dedups on (slug, entity_type)
	tuple, not slug alone. Pins the P1.3 regression."""
	from ctx.adapters.claude_code.install.skill_install import install_skill
	from ctx.adapters.claude_code.install.agent_install import install_agent

	# Install a skill and an agent sharing the slug "same-slug".
	# This fixture doesn't have one; use the distinct slugs from
	# the corpus and assert the manifest has both.
	install_skill(
	"python-patterns",
	wiki_dir=e2e_world["wiki"],
	skills_dir=e2e_world["skills"],
	)
	install_agent(
	"code-reviewer",
	wiki_dir=e2e_world["wiki"],
	agents_dir=e2e_world["agents"],
	)

	manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8"))
	pairs = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]}
	assert ("python-patterns", "skill") in pairs
	assert ("code-reviewer", "agent") in pairs

	def test_install_and_uninstall_mcp_records_manifest(
	self, e2e_world, monkeypatch,
	):
	"""MCP approval and removal reconcile typed manifest entries."""
	from ctx.adapters.claude_code.install import mcp_install

	calls: list[list[str]] = []

	def fake_run(args: list[str]) -> tuple[int, str, str]:
	calls.append(args)
	return 0, "ok", ""

	monkeypatch.setattr(mcp_install, "_run_claude_mcp", fake_run)

	install_result = mcp_install.install_mcp(
	"anthropic-python-sdk",
	wiki_dir=e2e_world["wiki"],
	command="npx -y @anthropic-ai/sdk-mcp",
	auto=True,
	)
	assert install_result.status == "installed"
	assert calls[0] == [
	"add",
	"anthropic-python-sdk",
	"--",
	"npx",
	"-y",
	"@anthropic-ai/sdk-mcp",
	]

	manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8"))
	loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]}
	assert ("anthropic-python-sdk", "mcp-server") in loaded

	entity = (
	e2e_world["wiki"]
	/ "entities"
	/ "mcp-servers"
	/ "a"
	/ "anthropic-python-sdk.md"
	)
	assert "status: installed" in entity.read_text(encoding="utf-8")

	uninstall_result = mcp_install.uninstall_mcp(
	"anthropic-python-sdk",
	wiki_dir=e2e_world["wiki"],
	)
	assert uninstall_result.status == "uninstalled"
	assert calls[-1] == ["remove", "anthropic-python-sdk"]

	manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8"))
	loaded = {(e.get("skill"), e.get("entity_type")) for e in manifest["load"]}
	unloaded = {
	(e.get("skill"), e.get("entity_type")) for e in manifest["unload"]
	}
	assert ("anthropic-python-sdk", "mcp-server") not in loaded
	assert ("anthropic-python-sdk", "mcp-server") in unloaded
	assert "status: cataloged" in entity.read_text(encoding="utf-8")

	def test_full_user_journey_signals_to_install(
	self, e2e_world, capsys, monkeypatch,
	):
	"""The A-Z.

	A user who:
	1. opens Python files (three signals)
	2. sees the categorised bundle surfaced via the hook
	3. approves a skill + an agent + an MCP from the bundle

	must end the journey with:
	- pending-skills.json written with a cross-type bundle
	- skill installed at ~/.claude/skills/<slug>/
	- agent installed at ~/.claude/agents/<slug>.md
	- MCP registered through the Claude MCP wrapper
	- manifest carrying all entries keyed on (slug, type)
	- entity status fields flipped to `installed` on all cards

	If any link in the chain regresses, this test fails and CI
	blocks the PR. This is the load-bearing integration contract.
	"""
	from ctx.adapters.claude_code.hooks import context_monitor as _cm
	from ctx.adapters.claude_code.hooks import bundle_orchestrator as _bo
	from ctx.adapters.claude_code.install.skill_install import install_skill
	from ctx.adapters.claude_code.install.agent_install import install_agent
	from ctx.adapters.claude_code.install import mcp_install

	mcp_calls: list[list[str]] = []

	def fake_mcp_run(args: list[str]) -> tuple[int, str, str]:
	mcp_calls.append(args)
	return 0, "ok", ""

	monkeypatch.setattr(
	mcp_install,
	"_run_claude_mcp",
	fake_mcp_run,
	)

	# ── 1. Accumulate signals ────────────────────────────────────
	# Three invocations spanning extension-based AND command-based
	# signal extraction. Crosses the default threshold=3.
	invocations = [
	("Read", {"file_path": "App.tsx"}),
	("Read", {"file_path": "Dockerfile"}),
	("Bash", {"command": "pip install fastapi"}),
	]
	for tool, tool_input in invocations:
	monkeypatch.setattr(sys, "argv", [
	"context_monitor.py", "--tool", tool,
	"--input", json.dumps(tool_input),
	])
	_cm.main()
	assert e2e_world["pending_skills"].exists(), (
	"alive loop broken: cumulative threshold didn't fire"
	)

	# ── 2. Bundle orchestrator renders recommendation ────────────
	_bo.main()
	out = capsys.readouterr().out.strip()
	assert out, "bundle_orchestrator emitted nothing"
	msg = json.loads(out)["hookSpecificOutput"]["additionalContext"]
	assert "python-patterns" in msg or "code-reviewer" in msg, (
	f"bundle missing expected python entities, got: {msg}"
	)

	# ── 3. User approves skill + agent ───────────────────────────
	skill_result = install_skill(
	"python-patterns",
	wiki_dir=e2e_world["wiki"],
	skills_dir=e2e_world["skills"],
	)
	agent_result = install_agent(
	"code-reviewer",
	wiki_dir=e2e_world["wiki"],
	agents_dir=e2e_world["agents"],
	)
	mcp_result = mcp_install.install_mcp(
	"anthropic-python-sdk",
	wiki_dir=e2e_world["wiki"],
	command="npx -y @anthropic-ai/sdk-mcp",
	auto=True,
	)
	assert skill_result.status == "installed"
	assert agent_result.status == "installed"
	assert mcp_result.status == "installed"
	assert mcp_calls and mcp_calls[0][0] == "add"

	# ── 4. End state: filesystem + manifest + entity status ──────
	assert (e2e_world["skills"] / "python-patterns" / "SKILL.md").is_file()
	assert (e2e_world["agents"] / "code-reviewer.md").is_file()

	manifest = json.loads(e2e_world["manifest"].read_text(encoding="utf-8"))
	loaded_pairs = {
	(e.get("skill"), e.get("entity_type")) for e in manifest["load"]
	}
	assert {
	("python-patterns", "skill"),
	("code-reviewer", "agent"),
	("anthropic-python-sdk", "mcp-server"),
	} <= loaded_pairs

	for entity_rel in (
	"entities/skills/python-patterns.md",
	"entities/agents/code-reviewer.md",
	"entities/mcp-servers/a/anthropic-python-sdk.md",
	):
	text = (e2e_world["wiki"] / entity_rel).read_text(encoding="utf-8")
	assert "status: installed" in text, (
	f"status not flipped on {entity_rel}"
	)