Spaces:

minhtudragon
/

headroom

Running

File size: 24,940 Bytes

"""Integration tests for Headroom Memory System.

These tests use REAL API calls - no mocks.
Tests verify the full flow from LLM tool calls to memory storage.

Requirements:
    - OPENAI_API_KEY environment variable must be set
    - Run with: pytest tests/test_memory_integration.py -v -s
"""

from __future__ import annotations

import os
import tempfile
import uuid

import pytest
from openai import OpenAI

# API keys must be set externally via environment variables
# Tests will be skipped if OPENAI_API_KEY is not available


@pytest.mark.skipif(
    not os.environ.get("OPENAI_API_KEY"),
    reason="OPENAI_API_KEY environment variable not set",
)
class TestMemoryIntegration:
    """Integration tests for the memory system with real LLM calls."""

    @pytest.fixture
    def openai_client(self):
        """Create an OpenAI client."""
        return OpenAI()

    @pytest.fixture
    def temp_db_path(self):
        """Create a temporary database path."""
        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
            yield f.name
        # Cleanup
        try:
            os.unlink(f.name)
        except OSError:
            pass

    @pytest.fixture
    def user_id(self):
        """Generate a unique user ID for test isolation."""
        return f"test_user_{uuid.uuid4().hex[:8]}"

    # =========================================================================
    # Test 1: Verify optimized tools include pre-extraction fields
    # =========================================================================

    def test_optimized_tools_have_extraction_fields(self):
        """Verify that optimized tools include pre-extraction fields."""
        from headroom.memory.tools import get_memory_tools, get_memory_tools_optimized

        # Standard tools should NOT have facts/extracted_entities
        standard_tools = get_memory_tools()
        memory_save = next(t for t in standard_tools if t["function"]["name"] == "memory_save")
        props = memory_save["function"]["parameters"]["properties"]
        assert "facts" not in props, "Standard tools should not have 'facts'"
        assert "extracted_entities" not in props, (
            "Standard tools should not have 'extracted_entities'"
        )

        # Optimized tools SHOULD have facts/extracted_entities/extracted_relationships
        optimized_tools = get_memory_tools_optimized()
        memory_save_opt = next(t for t in optimized_tools if t["function"]["name"] == "memory_save")
        props_opt = memory_save_opt["function"]["parameters"]["properties"]
        assert "facts" in props_opt, "Optimized tools should have 'facts'"
        assert "extracted_entities" in props_opt, "Optimized tools should have 'extracted_entities'"
        assert "extracted_relationships" in props_opt, (
            "Optimized tools should have 'extracted_relationships'"
        )
        assert "background" in props_opt, "Optimized tools should have 'background'"

    # =========================================================================
    # Test 2: Verify wrapper uses correct tools based on optimized flag
    # =========================================================================

    def test_wrapper_uses_correct_tools(self, openai_client, temp_db_path, user_id):
        """Verify wrapper uses standard vs optimized tools correctly."""
        from headroom.memory import with_memory_tools
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        # Create non-optimized wrapper
        wrapper_standard = with_memory_tools(
            openai_client, backend=backend, user_id=user_id, optimized=False
        )

        # Create optimized wrapper
        wrapper_optimized = with_memory_tools(
            openai_client, backend=backend, user_id=user_id, optimized=True
        )

        # Verify internal flags are set correctly
        assert wrapper_standard._optimized is False
        assert wrapper_optimized._optimized is True
        assert wrapper_optimized._inject_extraction_prompt is True

    # =========================================================================
    # Test 3: Verify extraction prompt is injected in optimized mode
    # =========================================================================

    def test_extraction_prompt_injection(self, openai_client, temp_db_path, user_id):
        """Verify extraction prompt is injected into system message."""
        from headroom.memory import with_memory_tools
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
        from headroom.memory.extraction import EXTRACTION_SYSTEM_PROMPT

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        wrapper = with_memory_tools(
            openai_client,
            backend=backend,
            user_id=user_id,
            optimized=True,
            inject_extraction_prompt=True,
        )

        # Get the completions object
        completions = wrapper.chat.completions

        # Test _prepare_messages with existing system message
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello"},
        ]

        prepared = completions._prepare_messages(messages)

        # Verify system message has extraction prompt appended
        assert len(prepared) == 2
        assert EXTRACTION_SYSTEM_PROMPT in prepared[0]["content"]
        assert "You are a helpful assistant." in prepared[0]["content"]

        # Test _prepare_messages without existing system message
        messages_no_system = [{"role": "user", "content": "Hello"}]
        prepared_no_system = completions._prepare_messages(messages_no_system)

        # Verify system message was inserted
        assert len(prepared_no_system) == 2
        assert prepared_no_system[0]["role"] == "system"
        assert EXTRACTION_SYSTEM_PROMPT.strip() in prepared_no_system[0]["content"]

    # =========================================================================
    # Test 4: LocalBackend accepts pre-extraction fields
    # =========================================================================

    @pytest.mark.asyncio
    async def test_local_backend_pre_extraction(self, temp_db_path, user_id):
        """Test LocalBackend save_memory with pre-extraction fields."""
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        # Save with pre-extraction fields
        # Note: relationships must reference entities that are in extracted_entities
        memory = await backend.save_memory(
            content="John works at Netflix using Python and TensorFlow.",
            user_id=user_id,
            importance=0.8,
            facts=["John works at Netflix", "John uses Python", "John uses TensorFlow"],
            extracted_entities=[
                {"entity": "John", "entity_type": "person"},
                {"entity": "Netflix", "entity_type": "organization"},
                {"entity": "Python", "entity_type": "technology"},
                {"entity": "TensorFlow", "entity_type": "technology"},
            ],
            extracted_relationships=[
                {
                    "source": "John",
                    "relationship": "works_at",
                    "destination": "Netflix",
                },
                {"source": "John", "relationship": "uses", "destination": "Python"},
                {
                    "source": "John",
                    "relationship": "uses",
                    "destination": "TensorFlow",
                },
            ],
        )

        # Verify memory was created
        assert memory is not None
        assert memory.user_id == user_id
        assert memory.metadata.get("_pre_extracted") is True
        assert memory.metadata.get("_fact_count") == 3

        # Verify entities were added to graph
        graph = await backend.get_graph()
        netflix_entity = await graph.get_entity_by_name(user_id, "Netflix")
        assert netflix_entity is not None
        assert netflix_entity.entity_type == "organization"

        python_entity = await graph.get_entity_by_name(user_id, "Python")
        assert python_entity is not None
        assert python_entity.entity_type == "technology"

        john_entity = await graph.get_entity_by_name(user_id, "John")
        assert john_entity is not None
        assert john_entity.entity_type == "person"

        # Verify relationships were added by querying via public API
        from headroom.memory.adapters.graph_models import RelationshipDirection

        # Verify John has outgoing relationships
        john_id = john_entity.id
        john_rels = await graph.get_relationships(john_id, RelationshipDirection.OUTGOING)
        assert len(john_rels) >= 3, (
            f"Expected John to have at least 3 outgoing relationships, got {len(john_rels)}"
        )

        await backend.close()

    # =========================================================================
    # Test 5: End-to-end with real LLM - Standard Mode
    # =========================================================================

    def test_e2e_standard_mode_llm_call(self, openai_client, temp_db_path, user_id):
        """Test end-to-end flow with real LLM call in standard mode."""
        from headroom.memory import with_memory_tools
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        client = with_memory_tools(
            openai_client,
            backend=backend,
            user_id=user_id,
            optimized=False,  # Standard mode
        )

        # Make a real LLM call that should trigger memory_save
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that remembers important user information. When the user shares personal information, save it to memory using the memory_save tool.",
                },
                {
                    "role": "user",
                    "content": "Hi! My name is Alex and I work as a data scientist at Google.",
                },
            ],
        )

        # Verify response was generated
        assert response is not None
        assert response.choices is not None
        assert len(response.choices) > 0

        # Check if memory tool was called
        message = response.choices[0].message
        if message.tool_calls:
            # Verify memory_save was called
            tool_names = [tc.function.name for tc in message.tool_calls]
            print(f"Tools called: {tool_names}")

            # Check if auto-handled
            if hasattr(response, "_memory_tool_results"):
                print(f"Memory tool results: {response._memory_tool_results}")
                assert len(response._memory_tool_results) > 0

    # =========================================================================
    # Test 6: End-to-end with real LLM - Optimized Mode
    # =========================================================================

    def test_e2e_optimized_mode_llm_call(self, openai_client, temp_db_path, user_id):
        """Test end-to-end flow with real LLM call in optimized mode."""
        from headroom.memory import with_memory_tools
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        client = with_memory_tools(
            openai_client,
            backend=backend,
            user_id=user_id,
            optimized=True,  # Optimized mode - should extract facts/entities
        )

        # Make a real LLM call
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": "I'm Sarah, a software engineer at Microsoft. I use Python, React, and PostgreSQL daily.",
                },
            ],
        )

        # Verify response was generated
        assert response is not None
        assert response.choices is not None

        # Check if memory tool was called with pre-extraction
        message = response.choices[0].message
        if message.tool_calls:
            for tc in message.tool_calls:
                if tc.function.name == "memory_save":
                    import json

                    args = json.loads(tc.function.arguments)
                    print(f"memory_save arguments: {json.dumps(args, indent=2)}")

                    # In optimized mode, LLM SHOULD include facts/entities
                    # (depends on LLM following the extraction prompt)
                    if "facts" in args:
                        print(f"Pre-extracted facts: {args['facts']}")
                    if "extracted_entities" in args:
                        print(f"Pre-extracted entities: {args['extracted_entities']}")
                    if "extracted_relationships" in args:
                        print(f"Pre-extracted relationships: {args['extracted_relationships']}")

        # Check auto-handled results
        if hasattr(response, "_memory_tool_results"):
            print(f"Memory tool results: {response._memory_tool_results}")

    # =========================================================================
    # Test 7: Verify memory search works after save
    # =========================================================================

    @pytest.mark.asyncio
    async def test_memory_search_after_save(self, temp_db_path, user_id):
        """Test that saved memories can be searched."""
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        # Save some memories
        await backend.save_memory(
            content="User prefers Python for backend development",
            user_id=user_id,
            importance=0.9,
            entities=["Python"],
            extracted_entities=[{"entity": "Python", "entity_type": "technology"}],
        )

        await backend.save_memory(
            content="User works at Netflix as a senior engineer",
            user_id=user_id,
            importance=0.8,
            entities=["Netflix"],
            extracted_entities=[{"entity": "Netflix", "entity_type": "organization"}],
        )

        # Search for memories
        results = await backend.search_memories(
            query="What programming language does the user prefer?",
            user_id=user_id,
            top_k=5,
        )

        assert len(results) > 0, "Expected at least one search result"
        print(f"Search results: {[(r.memory.content, r.score) for r in results]}")

        # Search with entity filter
        results_netflix = await backend.search_memories(
            query="Where does the user work?",
            user_id=user_id,
            entities=["Netflix"],
            top_k=5,
        )

        # Should find the Netflix-related memory
        assert any("Netflix" in r.memory.content for r in results_netflix), (
            "Expected Netflix in results"
        )

        await backend.close()

    # =========================================================================
    # Test 8: Test include_related graph expansion
    # =========================================================================

    @pytest.mark.asyncio
    async def test_include_related_graph_expansion(self, temp_db_path, user_id):
        """Test that include_related expands results via graph."""
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        # Save memories with related entities
        await backend.save_memory(
            content="Alice is a data scientist",
            user_id=user_id,
            importance=0.8,
            entities=["Alice"],
            extracted_entities=[{"entity": "Alice", "entity_type": "person"}],
        )

        await backend.save_memory(
            content="Alice works at Acme Corp",
            user_id=user_id,
            importance=0.8,
            entities=["Alice", "Acme Corp"],
            extracted_entities=[
                {"entity": "Alice", "entity_type": "person"},
                {"entity": "Acme Corp", "entity_type": "organization"},
            ],
            extracted_relationships=[
                {
                    "source": "Alice",
                    "relationship": "works_at",
                    "destination": "Acme Corp",
                }
            ],
        )

        await backend.save_memory(
            content="Acme Corp is a tech company in San Francisco",
            user_id=user_id,
            importance=0.7,
            entities=["Acme Corp", "San Francisco"],
            extracted_entities=[
                {"entity": "Acme Corp", "entity_type": "organization"},
                {"entity": "San Francisco", "entity_type": "location"},
            ],
        )

        # Search for Alice - should expand to related memories via graph
        results_with_related = await backend.search_memories(
            query="Tell me about Alice",
            user_id=user_id,
            top_k=10,
            include_related=True,
        )

        # Search without related
        results_without_related = await backend.search_memories(
            query="Tell me about Alice",
            user_id=user_id,
            top_k=10,
            include_related=False,
        )

        print(f"With related: {[r.memory.content for r in results_with_related]}")
        print(f"Without related: {[r.memory.content for r in results_without_related]}")

        # With related should potentially include the Acme Corp memory via Alice connection
        # (This depends on graph expansion finding the connection)
        assert len(results_with_related) >= len(results_without_related), (
            "include_related should return same or more results"
        )

        await backend.close()

    # =========================================================================
    # Test 9: Test MemorySystem tool dispatch
    # =========================================================================

    @pytest.mark.asyncio
    async def test_memory_system_tool_dispatch(self, temp_db_path, user_id):
        """Test MemorySystem processes tool calls correctly."""
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig
        from headroom.memory.system import MemorySystem

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)
        system = MemorySystem(backend, user_id=user_id)

        # Test memory_save dispatch
        save_result = await system.process_tool_call(
            "memory_save",
            {
                "content": "User likes dark mode",
                "importance": 0.7,
                "facts": ["Prefers dark mode"],
                "extracted_entities": [{"entity": "dark mode", "entity_type": "preference"}],
            },
        )

        assert save_result["success"] is True
        assert "memory_id" in save_result or "data" in save_result
        print(f"Save result: {save_result}")

        # Test memory_search dispatch
        search_result = await system.process_tool_call(
            "memory_search", {"query": "dark mode preferences", "top_k": 5}
        )

        assert search_result["success"] is True
        print(f"Search result: {search_result}")

        await backend.close()

    # =========================================================================
    # Test 10: Full flow - LLM saves, then retrieves via search
    # =========================================================================

    def test_full_flow_save_then_search(self, openai_client, temp_db_path, user_id):
        """Test complete flow: LLM saves memory, then searches for it."""
        import json

        from headroom.memory import with_memory_tools
        from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

        config = LocalBackendConfig(db_path=temp_db_path)
        backend = LocalBackend(config)

        client = with_memory_tools(
            openai_client,
            backend=backend,
            user_id=user_id,
            optimized=True,
        )

        # First: Have LLM save some information
        save_response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": "Remember this: My favorite programming language is Rust and I'm working on a CLI tool called headroom.",
                },
            ],
        )

        print(f"Save response: {save_response.choices[0].message}")

        # Process tool calls if any
        if save_response.choices[0].message.tool_calls:
            print(
                f"Tool calls made: {[tc.function.name for tc in save_response.choices[0].message.tool_calls]}"
            )
            if hasattr(save_response, "_memory_tool_results"):
                print(f"Results: {save_response._memory_tool_results}")

        # Second: Ask LLM to recall the information
        recall_response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": "What is my favorite programming language? Search your memory.",
                },
            ],
        )

        print(f"Recall response: {recall_response.choices[0].message}")

        # Check if search was invoked
        if recall_response.choices[0].message.tool_calls:
            for tc in recall_response.choices[0].message.tool_calls:
                print(f"Tool: {tc.function.name}, Args: {tc.function.arguments}")
                if hasattr(recall_response, "_memory_tool_results"):
                    results = recall_response._memory_tool_results.get(tc.id, {})
                    print(f"Tool result: {json.dumps(results, indent=2, default=str)}")


class TestExtractionPrompts:
    """Tests for extraction prompt templates."""

    def test_extraction_prompts_exist_and_valid(self):
        """Verify extraction prompts are defined and non-empty."""
        from headroom.memory.extraction import (
            ENTITY_EXTRACTION_PROMPT,
            EXTRACTION_SYSTEM_PROMPT,
            FACT_EXTRACTION_PROMPT,
            RELATIONSHIP_EXTRACTION_PROMPT,
        )

        assert len(EXTRACTION_SYSTEM_PROMPT) > 100, "System prompt should be substantial"
        assert len(FACT_EXTRACTION_PROMPT) > 100, "Fact prompt should be substantial"
        assert len(ENTITY_EXTRACTION_PROMPT) > 100, "Entity prompt should be substantial"
        assert len(RELATIONSHIP_EXTRACTION_PROMPT) > 100, (
            "Relationship prompt should be substantial"
        )

        # Verify they mention key concepts
        assert "facts" in EXTRACTION_SYSTEM_PROMPT.lower()
        assert "entities" in EXTRACTION_SYSTEM_PROMPT.lower()
        assert "relationships" in EXTRACTION_SYSTEM_PROMPT.lower()


class TestWrapperToolsModule:
    """Tests for wrapper_tools.py module."""

    def test_wrapper_tools_imports(self):
        """Verify all necessary imports work."""
        from headroom.memory.wrapper_tools import (
            MemoryToolsChatCompletions,
            MemoryToolsCompletions,
            MemoryToolsWrapper,
            with_memory_tools,
        )

        assert with_memory_tools is not None
        assert MemoryToolsWrapper is not None
        assert MemoryToolsChatCompletions is not None
        assert MemoryToolsCompletions is not None

    def test_with_memory_tools_accepts_optimized_param(self):
        """Verify with_memory_tools accepts optimized parameter."""
        import inspect

        from headroom.memory.wrapper_tools import with_memory_tools

        sig = inspect.signature(with_memory_tools)
        params = list(sig.parameters.keys())

        assert "optimized" in params, "with_memory_tools should accept 'optimized' param"
        assert "inject_extraction_prompt" in params, (
            "with_memory_tools should accept 'inject_extraction_prompt' param"
        )


if __name__ == "__main__":
    pytest.main([__file__, "-v", "-s"])