Spaces:

minhtudragon
/

headroom

Running

headroom / tests /test_memory /test_memory_integration.py

tudragon154203

fix: route count_tokens to api.anthropic.com, not proxy base_url

0adb431 24 days ago

24.9 kB

	"""Integration tests for Headroom Memory System.

	These tests use REAL API calls - no mocks.
	Tests verify the full flow from LLM tool calls to memory storage.

	Requirements:
	- OPENAI_API_KEY environment variable must be set
	- Run with: pytest tests/test_memory_integration.py -v -s
	"""

	from __future__ import annotations

	import os
	import tempfile
	import uuid

	import pytest
	from openai import OpenAI

	# API keys must be set externally via environment variables
	# Tests will be skipped if OPENAI_API_KEY is not available


	@pytest.mark.skipif(
	not os.environ.get("OPENAI_API_KEY"),
	reason="OPENAI_API_KEY environment variable not set",
	)
	class TestMemoryIntegration:
	"""Integration tests for the memory system with real LLM calls."""

	@pytest.fixture
	def openai_client(self):
	"""Create an OpenAI client."""
	return OpenAI()

	@pytest.fixture
	def temp_db_path(self):
	"""Create a temporary database path."""
	with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
	yield f.name
	# Cleanup
	try:
	os.unlink(f.name)
	except OSError:
	pass

	@pytest.fixture
	def user_id(self):
	"""Generate a unique user ID for test isolation."""
	return f"test_user_{uuid.uuid4().hex[:8]}"

	# =========================================================================
	# Test 1: Verify optimized tools include pre-extraction fields
	# =========================================================================

	def test_optimized_tools_have_extraction_fields(self):
	"""Verify that optimized tools include pre-extraction fields."""
	from headroom.memory.tools import get_memory_tools, get_memory_tools_optimized

	# Standard tools should NOT have facts/extracted_entities
	standard_tools = get_memory_tools()
	memory_save = next(t for t in standard_tools if t["function"]["name"] == "memory_save")
	props = memory_save["function"]["parameters"]["properties"]
	assert "facts" not in props, "Standard tools should not have 'facts'"
	assert "extracted_entities" not in props, (
	"Standard tools should not have 'extracted_entities'"
	)

	# Optimized tools SHOULD have facts/extracted_entities/extracted_relationships
	optimized_tools = get_memory_tools_optimized()
	memory_save_opt = next(t for t in optimized_tools if t["function"]["name"] == "memory_save")
	props_opt = memory_save_opt["function"]["parameters"]["properties"]
	assert "facts" in props_opt, "Optimized tools should have 'facts'"
	assert "extracted_entities" in props_opt, "Optimized tools should have 'extracted_entities'"
	assert "extracted_relationships" in props_opt, (
	"Optimized tools should have 'extracted_relationships'"
	)
	assert "background" in props_opt, "Optimized tools should have 'background'"

	# =========================================================================
	# Test 2: Verify wrapper uses correct tools based on optimized flag
	# =========================================================================

	def test_wrapper_uses_correct_tools(self, openai_client, temp_db_path, user_id):
	"""Verify wrapper uses standard vs optimized tools correctly."""
	from headroom.memory import with_memory_tools
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	# Create non-optimized wrapper
	wrapper_standard = with_memory_tools(
	openai_client, backend=backend, user_id=user_id, optimized=False
	)

	# Create optimized wrapper
	wrapper_optimized = with_memory_tools(
	openai_client, backend=backend, user_id=user_id, optimized=True
	)

	# Verify internal flags are set correctly
	assert wrapper_standard._optimized is False
	assert wrapper_optimized._optimized is True
	assert wrapper_optimized._inject_extraction_prompt is True

	# =========================================================================
	# Test 3: Verify extraction prompt is injected in optimized mode
	# =========================================================================

	def test_extraction_prompt_injection(self, openai_client, temp_db_path, user_id):
	"""Verify extraction prompt is injected into system message."""
	from headroom.memory import with_memory_tools
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
	from headroom.memory.extraction import EXTRACTION_SYSTEM_PROMPT

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	wrapper = with_memory_tools(
	openai_client,
	backend=backend,
	user_id=user_id,
	optimized=True,
	inject_extraction_prompt=True,
	)

	# Get the completions object
	completions = wrapper.chat.completions

	# Test _prepare_messages with existing system message
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Hello"},
	]

	prepared = completions._prepare_messages(messages)

	# Verify system message has extraction prompt appended
	assert len(prepared) == 2
	assert EXTRACTION_SYSTEM_PROMPT in prepared[0]["content"]
	assert "You are a helpful assistant." in prepared[0]["content"]

	# Test _prepare_messages without existing system message
	messages_no_system = [{"role": "user", "content": "Hello"}]
	prepared_no_system = completions._prepare_messages(messages_no_system)

	# Verify system message was inserted
	assert len(prepared_no_system) == 2
	assert prepared_no_system[0]["role"] == "system"
	assert EXTRACTION_SYSTEM_PROMPT.strip() in prepared_no_system[0]["content"]

	# =========================================================================
	# Test 4: LocalBackend accepts pre-extraction fields
	# =========================================================================

	@pytest.mark.asyncio
	async def test_local_backend_pre_extraction(self, temp_db_path, user_id):
	"""Test LocalBackend save_memory with pre-extraction fields."""
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	# Save with pre-extraction fields
	# Note: relationships must reference entities that are in extracted_entities
	memory = await backend.save_memory(
	content="John works at Netflix using Python and TensorFlow.",
	user_id=user_id,
	importance=0.8,
	facts=["John works at Netflix", "John uses Python", "John uses TensorFlow"],
	extracted_entities=[
	{"entity": "John", "entity_type": "person"},
	{"entity": "Netflix", "entity_type": "organization"},
	{"entity": "Python", "entity_type": "technology"},
	{"entity": "TensorFlow", "entity_type": "technology"},
	],
	extracted_relationships=[
	{
	"source": "John",
	"relationship": "works_at",
	"destination": "Netflix",
	},
	{"source": "John", "relationship": "uses", "destination": "Python"},
	{
	"source": "John",
	"relationship": "uses",
	"destination": "TensorFlow",
	},
	],
	)

	# Verify memory was created
	assert memory is not None
	assert memory.user_id == user_id
	assert memory.metadata.get("_pre_extracted") is True
	assert memory.metadata.get("_fact_count") == 3

	# Verify entities were added to graph
	graph = await backend.get_graph()
	netflix_entity = await graph.get_entity_by_name(user_id, "Netflix")
	assert netflix_entity is not None
	assert netflix_entity.entity_type == "organization"

	python_entity = await graph.get_entity_by_name(user_id, "Python")
	assert python_entity is not None
	assert python_entity.entity_type == "technology"

	john_entity = await graph.get_entity_by_name(user_id, "John")
	assert john_entity is not None
	assert john_entity.entity_type == "person"

	# Verify relationships were added by querying via public API
	from headroom.memory.adapters.graph_models import RelationshipDirection

	# Verify John has outgoing relationships
	john_id = john_entity.id
	john_rels = await graph.get_relationships(john_id, RelationshipDirection.OUTGOING)
	assert len(john_rels) >= 3, (
	f"Expected John to have at least 3 outgoing relationships, got {len(john_rels)}"
	)

	await backend.close()

	# =========================================================================
	# Test 5: End-to-end with real LLM - Standard Mode
	# =========================================================================

	def test_e2e_standard_mode_llm_call(self, openai_client, temp_db_path, user_id):
	"""Test end-to-end flow with real LLM call in standard mode."""
	from headroom.memory import with_memory_tools
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	client = with_memory_tools(
	openai_client,
	backend=backend,
	user_id=user_id,
	optimized=False, # Standard mode
	)

	# Make a real LLM call that should trigger memory_save
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that remembers important user information. When the user shares personal information, save it to memory using the memory_save tool.",
	},
	{
	"role": "user",
	"content": "Hi! My name is Alex and I work as a data scientist at Google.",
	},
	],
	)

	# Verify response was generated
	assert response is not None
	assert response.choices is not None
	assert len(response.choices) > 0

	# Check if memory tool was called
	message = response.choices[0].message
	if message.tool_calls:
	# Verify memory_save was called
	tool_names = [tc.function.name for tc in message.tool_calls]
	print(f"Tools called: {tool_names}")

	# Check if auto-handled
	if hasattr(response, "_memory_tool_results"):
	print(f"Memory tool results: {response._memory_tool_results}")
	assert len(response._memory_tool_results) > 0

	# =========================================================================
	# Test 6: End-to-end with real LLM - Optimized Mode
	# =========================================================================

	def test_e2e_optimized_mode_llm_call(self, openai_client, temp_db_path, user_id):
	"""Test end-to-end flow with real LLM call in optimized mode."""
	from headroom.memory import with_memory_tools
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	client = with_memory_tools(
	openai_client,
	backend=backend,
	user_id=user_id,
	optimized=True, # Optimized mode - should extract facts/entities
	)

	# Make a real LLM call
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": "I'm Sarah, a software engineer at Microsoft. I use Python, React, and PostgreSQL daily.",
	},
	],
	)

	# Verify response was generated
	assert response is not None
	assert response.choices is not None

	# Check if memory tool was called with pre-extraction
	message = response.choices[0].message
	if message.tool_calls:
	for tc in message.tool_calls:
	if tc.function.name == "memory_save":
	import json

	args = json.loads(tc.function.arguments)
	print(f"memory_save arguments: {json.dumps(args, indent=2)}")

	# In optimized mode, LLM SHOULD include facts/entities
	# (depends on LLM following the extraction prompt)
	if "facts" in args:
	print(f"Pre-extracted facts: {args['facts']}")
	if "extracted_entities" in args:
	print(f"Pre-extracted entities: {args['extracted_entities']}")
	if "extracted_relationships" in args:
	print(f"Pre-extracted relationships: {args['extracted_relationships']}")

	# Check auto-handled results
	if hasattr(response, "_memory_tool_results"):
	print(f"Memory tool results: {response._memory_tool_results}")

	# =========================================================================
	# Test 7: Verify memory search works after save
	# =========================================================================

	@pytest.mark.asyncio
	async def test_memory_search_after_save(self, temp_db_path, user_id):
	"""Test that saved memories can be searched."""
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	# Save some memories
	await backend.save_memory(
	content="User prefers Python for backend development",
	user_id=user_id,
	importance=0.9,
	entities=["Python"],
	extracted_entities=[{"entity": "Python", "entity_type": "technology"}],
	)

	await backend.save_memory(
	content="User works at Netflix as a senior engineer",
	user_id=user_id,
	importance=0.8,
	entities=["Netflix"],
	extracted_entities=[{"entity": "Netflix", "entity_type": "organization"}],
	)

	# Search for memories
	results = await backend.search_memories(
	query="What programming language does the user prefer?",
	user_id=user_id,
	top_k=5,
	)

	assert len(results) > 0, "Expected at least one search result"
	print(f"Search results: {[(r.memory.content, r.score) for r in results]}")

	# Search with entity filter
	results_netflix = await backend.search_memories(
	query="Where does the user work?",
	user_id=user_id,
	entities=["Netflix"],
	top_k=5,
	)

	# Should find the Netflix-related memory
	assert any("Netflix" in r.memory.content for r in results_netflix), (
	"Expected Netflix in results"
	)

	await backend.close()

	# =========================================================================
	# Test 8: Test include_related graph expansion
	# =========================================================================

	@pytest.mark.asyncio
	async def test_include_related_graph_expansion(self, temp_db_path, user_id):
	"""Test that include_related expands results via graph."""
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	# Save memories with related entities
	await backend.save_memory(
	content="Alice is a data scientist",
	user_id=user_id,
	importance=0.8,
	entities=["Alice"],
	extracted_entities=[{"entity": "Alice", "entity_type": "person"}],
	)

	await backend.save_memory(
	content="Alice works at Acme Corp",
	user_id=user_id,
	importance=0.8,
	entities=["Alice", "Acme Corp"],
	extracted_entities=[
	{"entity": "Alice", "entity_type": "person"},
	{"entity": "Acme Corp", "entity_type": "organization"},
	],
	extracted_relationships=[
	{
	"source": "Alice",
	"relationship": "works_at",
	"destination": "Acme Corp",
	}
	],
	)

	await backend.save_memory(
	content="Acme Corp is a tech company in San Francisco",
	user_id=user_id,
	importance=0.7,
	entities=["Acme Corp", "San Francisco"],
	extracted_entities=[
	{"entity": "Acme Corp", "entity_type": "organization"},
	{"entity": "San Francisco", "entity_type": "location"},
	],
	)

	# Search for Alice - should expand to related memories via graph
	results_with_related = await backend.search_memories(
	query="Tell me about Alice",
	user_id=user_id,
	top_k=10,
	include_related=True,
	)

	# Search without related
	results_without_related = await backend.search_memories(
	query="Tell me about Alice",
	user_id=user_id,
	top_k=10,
	include_related=False,
	)

	print(f"With related: {[r.memory.content for r in results_with_related]}")
	print(f"Without related: {[r.memory.content for r in results_without_related]}")

	# With related should potentially include the Acme Corp memory via Alice connection
	# (This depends on graph expansion finding the connection)
	assert len(results_with_related) >= len(results_without_related), (
	"include_related should return same or more results"
	)

	await backend.close()

	# =========================================================================
	# Test 9: Test MemorySystem tool dispatch
	# =========================================================================

	@pytest.mark.asyncio
	async def test_memory_system_tool_dispatch(self, temp_db_path, user_id):
	"""Test MemorySystem processes tool calls correctly."""
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
	from headroom.memory.system import MemorySystem

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)
	system = MemorySystem(backend, user_id=user_id)

	# Test memory_save dispatch
	save_result = await system.process_tool_call(
	"memory_save",
	{
	"content": "User likes dark mode",
	"importance": 0.7,
	"facts": ["Prefers dark mode"],
	"extracted_entities": [{"entity": "dark mode", "entity_type": "preference"}],
	},
	)

	assert save_result["success"] is True
	assert "memory_id" in save_result or "data" in save_result
	print(f"Save result: {save_result}")

	# Test memory_search dispatch
	search_result = await system.process_tool_call(
	"memory_search", {"query": "dark mode preferences", "top_k": 5}
	)

	assert search_result["success"] is True
	print(f"Search result: {search_result}")

	await backend.close()

	# =========================================================================
	# Test 10: Full flow - LLM saves, then retrieves via search
	# =========================================================================

	def test_full_flow_save_then_search(self, openai_client, temp_db_path, user_id):
	"""Test complete flow: LLM saves memory, then searches for it."""
	import json

	from headroom.memory import with_memory_tools
	from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

	config = LocalBackendConfig(db_path=temp_db_path)
	backend = LocalBackend(config)

	client = with_memory_tools(
	openai_client,
	backend=backend,
	user_id=user_id,
	optimized=True,
	)

	# First: Have LLM save some information
	save_response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": "Remember this: My favorite programming language is Rust and I'm working on a CLI tool called headroom.",
	},
	],
	)

	print(f"Save response: {save_response.choices[0].message}")

	# Process tool calls if any
	if save_response.choices[0].message.tool_calls:
	print(
	f"Tool calls made: {[tc.function.name for tc in save_response.choices[0].message.tool_calls]}"
	)
	if hasattr(save_response, "_memory_tool_results"):
	print(f"Results: {save_response._memory_tool_results}")

	# Second: Ask LLM to recall the information
	recall_response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": "What is my favorite programming language? Search your memory.",
	},
	],
	)

	print(f"Recall response: {recall_response.choices[0].message}")

	# Check if search was invoked
	if recall_response.choices[0].message.tool_calls:
	for tc in recall_response.choices[0].message.tool_calls:
	print(f"Tool: {tc.function.name}, Args: {tc.function.arguments}")
	if hasattr(recall_response, "_memory_tool_results"):
	results = recall_response._memory_tool_results.get(tc.id, {})
	print(f"Tool result: {json.dumps(results, indent=2, default=str)}")


	class TestExtractionPrompts:
	"""Tests for extraction prompt templates."""

	def test_extraction_prompts_exist_and_valid(self):
	"""Verify extraction prompts are defined and non-empty."""
	from headroom.memory.extraction import (
	ENTITY_EXTRACTION_PROMPT,
	EXTRACTION_SYSTEM_PROMPT,
	FACT_EXTRACTION_PROMPT,
	RELATIONSHIP_EXTRACTION_PROMPT,
	)

	assert len(EXTRACTION_SYSTEM_PROMPT) > 100, "System prompt should be substantial"
	assert len(FACT_EXTRACTION_PROMPT) > 100, "Fact prompt should be substantial"
	assert len(ENTITY_EXTRACTION_PROMPT) > 100, "Entity prompt should be substantial"
	assert len(RELATIONSHIP_EXTRACTION_PROMPT) > 100, (
	"Relationship prompt should be substantial"
	)

	# Verify they mention key concepts
	assert "facts" in EXTRACTION_SYSTEM_PROMPT.lower()
	assert "entities" in EXTRACTION_SYSTEM_PROMPT.lower()
	assert "relationships" in EXTRACTION_SYSTEM_PROMPT.lower()


	class TestWrapperToolsModule:
	"""Tests for wrapper_tools.py module."""

	def test_wrapper_tools_imports(self):
	"""Verify all necessary imports work."""
	from headroom.memory.wrapper_tools import (
	MemoryToolsChatCompletions,
	MemoryToolsCompletions,
	MemoryToolsWrapper,
	with_memory_tools,
	)

	assert with_memory_tools is not None
	assert MemoryToolsWrapper is not None
	assert MemoryToolsChatCompletions is not None
	assert MemoryToolsCompletions is not None

	def test_with_memory_tools_accepts_optimized_param(self):
	"""Verify with_memory_tools accepts optimized parameter."""
	import inspect

	from headroom.memory.wrapper_tools import with_memory_tools

	sig = inspect.signature(with_memory_tools)
	params = list(sig.parameters.keys())

	assert "optimized" in params, "with_memory_tools should accept 'optimized' param"
	assert "inject_extraction_prompt" in params, (
	"with_memory_tools should accept 'inject_extraction_prompt' param"
	)


	if __name__ == "__main__":
	pytest.main([__file__, "-v", "-s"])