Spaces:

minhtudragon
/

headroom

Running

headroom / tests /test_memory /test_memory_bridge.py

tudragon154203

fix: route count_tokens to api.anthropic.com, not proxy base_url

0adb431 27 days ago

18.4 kB

	"""Tests for the Memory Bridge (markdown <-> Headroom bidirectional sync).

	Parser tests are pure functions (no backend needed).
	Bridge tests use a temp LocalBackend with a temporary database.

	Run with: pytest tests/test_memory_bridge.py -v
	"""

	from __future__ import annotations

	import json
	import uuid

	import pytest

	from headroom.memory.bridge_config import BridgeConfig, MarkdownFormat
	from headroom.memory.bridge_parsers import (
	ParsedSection,
	detect_format,
	extract_entities_from_text,
	extract_relationships_from_section,
	parse_chatgpt_facts,
	parse_claude_code_memory,
	parse_generic_markdown,
	parse_markdown,
	)

	# Sample content for testing
	CLAUDE_CODE_MEMORY = """\
	# Project Memory

	## Project Overview
	- Headroom: Context optimization layer for LLM applications
	- Repos: OSS at ~/claude-projects/headroom

	## Key Architecture
	- 186 Python files, 34 packages, 100K+ lines
	- 6 compression algorithms: SmartCrusher, CacheAligner, ContentRouter

	## Competitors
	- Direct: Compresr (YC W26), Token Company
	- Gateways: Portkey, Helicone, LiteLLM
	"""

	CHATGPT_FACTS = """\
	User prefers Python over JavaScript
	User works at Netflix
	User likes dark mode
	- User has a cat named Luna
	"""

	GENERIC_MARKDOWN = """\
	# Notes

	## Architecture
	The system uses FastAPI for the proxy layer.
	- SQLite for storage
	- HNSW for vector search

	## TODO
	- Add caching layer
	- Improve error handling
	"""


	# =============================================================================
	# Parser Tests (pure functions, no backend)
	# =============================================================================


	class TestClaudeCodeParser:
	def test_parse_sections(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	# H1 + 3 H2 sections
	assert len(parsed.sections) >= 3
	assert parsed.format == "claude_code"

	def test_heading_levels(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	headings = {s.heading: s.heading_level for s in parsed.sections if s.heading}
	assert headings.get("Project Overview") == 2
	assert headings.get("Key Architecture") == 2
	assert headings.get("Competitors") == 2

	def test_bullets_become_facts(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	overview = next(s for s in parsed.sections if s.heading == "Project Overview")
	assert len(overview.facts) == 2
	assert any("Headroom" in f for f in overview.facts)
	assert any("Repos" in f for f in overview.facts)

	def test_bold_text_extracted_as_entities(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	overview = next(s for s in parsed.sections if s.heading == "Project Overview")
	assert "Headroom" in overview.entities
	assert "Repos" in overview.entities

	def test_content_hash_computed(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	for section in parsed.sections:
	if section.content:
	assert section.content_hash
	assert len(section.content_hash) == 64 # SHA-256

	def test_content_hash_deterministic(self):
	parsed1 = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	parsed2 = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	for s1, s2 in zip(parsed1.sections, parsed2.sections):
	assert s1.content_hash == s2.content_hash

	def test_file_hash_computed(self):
	parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
	assert parsed.file_hash
	assert len(parsed.file_hash) == 64


	class TestChatGPTParser:
	def test_parse_flat_facts(self):
	parsed = parse_chatgpt_facts(CHATGPT_FACTS)
	assert parsed.format == "chatgpt"
	assert len(parsed.sections) == 1
	assert len(parsed.sections[0].facts) == 4

	def test_bullet_prefix_stripped(self):
	parsed = parse_chatgpt_facts(CHATGPT_FACTS)
	facts = parsed.sections[0].facts
	assert "User has a cat named Luna" in facts

	def test_empty_lines_skipped(self):
	content = "Fact 1\n\n\nFact 2\n\n"
	parsed = parse_chatgpt_facts(content)
	assert len(parsed.sections[0].facts) == 2

	def test_empty_content(self):
	parsed = parse_chatgpt_facts("")
	assert len(parsed.sections) == 0


	class TestGenericParser:
	def test_parse_multi_level_headers(self):
	parsed = parse_generic_markdown(GENERIC_MARKDOWN)
	assert parsed.format == "generic"
	headings = [s.heading for s in parsed.sections if s.heading]
	assert "Architecture" in headings
	assert "TODO" in headings

	def test_non_bullet_lines_are_facts(self):
	parsed = parse_generic_markdown(GENERIC_MARKDOWN)
	arch = next(s for s in parsed.sections if s.heading == "Architecture")
	# "The system uses FastAPI..." and bullets should all be facts
	assert len(arch.facts) >= 3


	class TestFormatDetection:
	def test_detect_claude_code(self):
	assert detect_format(CLAUDE_CODE_MEMORY) == "claude_code"

	def test_detect_chatgpt(self):
	assert detect_format(CHATGPT_FACTS) == "chatgpt"

	def test_detect_generic(self):
	content = "Some long paragraph without headers or bullet points that goes on and on describing things in great detail.\nAnother very long line that describes more things in this generic format."
	assert detect_format(content) in ("generic", "chatgpt")

	def test_empty_content(self):
	assert detect_format("") == "generic"


	class TestAutoParser:
	def test_auto_parses_claude_code(self):
	parsed = parse_markdown(CLAUDE_CODE_MEMORY)
	assert parsed.format == "claude_code"

	def test_auto_parses_chatgpt(self):
	parsed = parse_markdown(CHATGPT_FACTS)
	assert parsed.format == "chatgpt"

	def test_force_format(self):
	parsed = parse_markdown(CLAUDE_CODE_MEMORY, format="generic")
	assert parsed.format == "generic"


	class TestEntityExtraction:
	def test_bold_text(self):
	entities = extract_entities_from_text("I use Python and FastAPI")
	assert "Python" in entities
	assert "FastAPI" in entities

	def test_camel_case(self):
	entities = extract_entities_from_text("Using SmartCrusher and CacheAligner")
	assert "SmartCrusher" in entities
	assert "CacheAligner" in entities

	def test_no_false_positives_on_stop_words(self):
	entities = extract_entities_from_text("The system is very important and useful")
	# "The" and other stop words should not appear
	assert "The" not in entities

	def test_all_caps(self):
	entities = extract_entities_from_text("Using HNSW and SQLite")
	assert "HNSW" in entities


	class TestRelationshipExtraction:
	def test_bold_colon_pattern(self):
	section = ParsedSection(
	heading="Test",
	heading_level=2,
	content="- Headroom: Context optimization layer",
	facts=["Headroom: Context optimization layer"],
	)
	rels = extract_relationships_from_section(section)
	assert len(rels) >= 1
	assert rels[0]["source"] == "Headroom"
	assert rels[0]["relationship"] == "is"

	def test_verb_patterns(self):
	section = ParsedSection(
	heading="Test",
	heading_level=2,
	content="Headroom uses SQLite for storage",
	facts=["Headroom uses SQLite for storage"],
	)
	rels = extract_relationships_from_section(section)
	uses_rels = [r for r in rels if r["relationship"] == "uses"]
	assert len(uses_rels) >= 1


	# =============================================================================
	# Bridge Tests (require backend)
	# =============================================================================


	@pytest.fixture
	def tmp_dir(tmp_path):
	"""Provide a temporary directory for test files."""
	return tmp_path


	@pytest.fixture
	def user_id():
	"""Unique user ID for test isolation."""
	return f"test_bridge_{uuid.uuid4().hex[:8]}"


	@pytest.fixture
	def bridge_config(tmp_dir):
	"""Create a BridgeConfig with test paths."""
	return BridgeConfig(
	user_id="test_user",
	sync_state_path=tmp_dir / "bridge_state.json",
	dedup_similarity_threshold=0.95,
	)


	@pytest.fixture
	async def backend(tmp_dir):
	"""Create a LocalBackend with temp database."""
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=str(tmp_dir / "test_memory.db"))
	backend = LocalBackend(config)
	await backend._ensure_initialized()
	yield backend
	await backend.close()


	@pytest.fixture
	def bridge(bridge_config, backend):
	"""Create a MemoryBridge."""
	from headroom.memory.bridge import MemoryBridge

	return MemoryBridge(bridge_config, backend)


	class TestMemoryBridgeImport:
	@pytest.mark.asyncio
	async def test_import_claude_code_memory(self, bridge, tmp_dir, backend):
	"""Import a MEMORY.md file and verify memories are stored."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

	assert stats.files_processed == 1
	assert stats.sections_imported > 0
	assert stats.total_facts > 0

	# Verify memories exist in backend
	memories = await backend.get_user_memories("test_user", limit=100)
	assert len(memories) > 0

	@pytest.mark.asyncio
	async def test_import_skips_unchanged_file(self, bridge, tmp_dir):
	"""Second import of same file should skip (hash unchanged)."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	stats1 = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
	assert stats1.sections_imported > 0

	stats2 = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
	assert stats2.files_skipped_unchanged == 1
	assert stats2.sections_imported == 0

	@pytest.mark.asyncio
	async def test_import_detects_changes(self, bridge, tmp_dir):
	"""Modified file should re-import changed sections."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

	# Modify file
	modified = CLAUDE_CODE_MEMORY + "\n## New Section\n- Brand new fact\n"
	md_path.write_text(modified, encoding="utf-8")

	stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
	assert stats.files_processed == 1
	assert stats.sections_imported >= 1 # At least the new section

	@pytest.mark.asyncio
	async def test_import_force(self, bridge, tmp_dir):
	"""Force import should re-import even if unchanged."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

	stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user", force=True)
	# Force should process the file, though sections may be deduped by semantic search
	assert stats.files_processed == 1

	@pytest.mark.asyncio
	async def test_import_chatgpt_facts(self, bridge, tmp_dir, backend):
	"""Import ChatGPT-style facts."""
	md_path = tmp_dir / "chatgpt.txt"
	md_path.write_text(CHATGPT_FACTS, encoding="utf-8")

	bridge._config.md_format = MarkdownFormat.CHATGPT
	stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
	assert stats.sections_imported > 0

	@pytest.mark.asyncio
	async def test_import_missing_file(self, bridge, tmp_dir):
	"""Missing file should be skipped gracefully."""
	from pathlib import Path

	stats = await bridge.import_from_markdown(
	paths=[Path(tmp_dir / "nonexistent.md")], user_id="test_user"
	)
	assert stats.files_processed == 0

	@pytest.mark.asyncio
	async def test_metadata_preserved(self, bridge, tmp_dir, backend):
	"""Imported memories should have bridge metadata."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

	memories = await backend.get_user_memories("test_user", limit=100)
	for memory in memories:
	metadata = memory.metadata or {}
	assert metadata.get("source") == "memory_bridge"
	assert "source_file" in metadata


	class TestMemoryBridgeExport:
	@pytest.mark.asyncio
	async def test_export_claude_code_style(self, bridge, tmp_dir, backend):
	"""Export memories as Claude Code style markdown."""
	# Add some memories
	await backend.save_memory(
	content="Headroom is a context optimization layer",
	user_id="test_user",
	importance=0.8,
	metadata={"section_heading": "Overview"},
	)
	await backend.save_memory(
	content="Uses SQLite for storage",
	user_id="test_user",
	importance=0.7,
	metadata={"section_heading": "Architecture"},
	)

	export_path = tmp_dir / "export.md"
	markdown = await bridge.export_to_markdown(
	path=export_path,
	user_id="test_user",
	format=MarkdownFormat.CLAUDE_CODE,
	)

	assert "# Memory" in markdown
	assert "## Overview" in markdown
	assert "## Architecture" in markdown
	assert "Headroom" in markdown
	assert export_path.exists()

	@pytest.mark.asyncio
	async def test_export_chatgpt_style(self, bridge, backend):
	"""Export as flat facts."""
	await backend.save_memory(
	content="User prefers Python",
	user_id="test_user",
	importance=0.7,
	)

	markdown = await bridge.export_to_markdown(
	user_id="test_user",
	format=MarkdownFormat.CHATGPT,
	)

	assert "User prefers Python" in markdown
	# Should NOT have headers
	assert "## " not in markdown

	@pytest.mark.asyncio
	async def test_export_empty(self, bridge):
	"""Export with no memories should produce placeholder."""
	markdown = await bridge.export_to_markdown(user_id="nonexistent_user")
	assert "No memories" in markdown


	class TestMemoryBridgeSync:
	@pytest.mark.asyncio
	async def test_sync_imports_and_exports(self, bridge, tmp_dir, backend):
	"""Full sync: import from file, add organic memory, sync exports it."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text("## Facts\n- User likes Python\n", encoding="utf-8")
	bridge._config.md_paths = [md_path]

	# First sync: imports from file
	stats = await bridge.sync(user_id="test_user")
	assert stats.import_stats.sections_imported > 0

	# Add an organic memory (not from bridge)
	await backend.save_memory(
	content="User also likes Rust",
	user_id="test_user",
	importance=0.7,
	metadata={}, # No source tag = organic
	)

	# Second sync: should export the organic memory
	stats2 = await bridge.sync(user_id="test_user")
	assert stats2.memories_exported >= 1

	# Verify the file now contains the new memory
	updated_content = md_path.read_text(encoding="utf-8")
	assert "Rust" in updated_content

	@pytest.mark.asyncio
	async def test_source_tag_prevents_reexport(self, bridge, tmp_dir, backend):
	"""Memories imported via bridge should not be re-exported."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text("## Facts\n- Imported fact\n", encoding="utf-8")
	bridge._config.md_paths = [md_path]

	# Import
	await bridge.sync(user_id="test_user")

	# Sync again - nothing should be exported (all memories have source tag)
	stats = await bridge.sync(user_id="test_user")
	assert stats.memories_exported == 0


	class TestSyncStatePersistence:
	@pytest.mark.asyncio
	async def test_state_saved_and_loaded(self, tmp_dir, backend):
	"""Sync state should persist across bridge instances."""
	from headroom.memory.bridge import MemoryBridge

	state_path = tmp_dir / "state.json"
	config = BridgeConfig(
	user_id="test_user",
	sync_state_path=state_path,
	)

	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	# First bridge instance: import
	bridge1 = MemoryBridge(config, backend)
	await bridge1.import_from_markdown(paths=[md_path], user_id="test_user")

	# Verify state file exists
	assert state_path.exists()
	state = json.loads(state_path.read_text())
	assert "files" in state
	assert str(md_path) in state["files"]

	# Second bridge instance: should detect unchanged file
	bridge2 = MemoryBridge(config, backend)
	stats = await bridge2.import_from_markdown(paths=[md_path], user_id="test_user")
	assert stats.files_skipped_unchanged == 1


	class TestRoundTrip:
	@pytest.mark.asyncio
	async def test_import_export_preserves_facts(self, bridge, tmp_dir, backend):
	"""Import a MEMORY.md, export it, verify all facts are present."""
	md_path = tmp_dir / "MEMORY.md"
	md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

	# Import
	await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

	# Export
	export_path = tmp_dir / "exported.md"
	markdown = await bridge.export_to_markdown(
	path=export_path,
	user_id="test_user",
	format=MarkdownFormat.CLAUDE_CODE,
	)

	# Key facts should survive the round trip
	assert "Headroom" in markdown
	assert "compression" in markdown.lower() or "SmartCrusher" in markdown
	assert "Compresr" in markdown or "Portkey" in markdown