Spaces:

minhtudragon
/

headroom

Running

File size: 9,928 Bytes

7a05808
 
 
e4a41fa
 
 
7a05808
 
 
e4a41fa
 
7a05808
e4a41fa
7a05808

"""Tests for SemanticCache and SemanticCacheLayer."""

import time

import pytest

from headroom.cache import (
    AnthropicCacheOptimizer,
    OptimizationContext,
    SemanticCache,
    SemanticCacheLayer,
)
from headroom.cache.semantic import SemanticCacheConfig


class TestSemanticCacheConfig:
    """Test SemanticCacheConfig."""

    def test_default_values(self):
        """Test default configuration values."""
        config = SemanticCacheConfig()
        assert config.similarity_threshold == 0.95
        assert config.max_entries == 1000
        assert config.ttl_seconds == 300
        assert config.use_exact_matching is True


class TestSemanticCache:
    """Test SemanticCache functionality."""

    @pytest.fixture
    def cache(self):
        """Create cache instance."""
        config = SemanticCacheConfig(
            max_entries=10,
            ttl_seconds=60,
        )
        return SemanticCache(config)

    def test_put_and_get_exact_match(self, cache):
        """Test storing and retrieving with exact hash matching."""
        response = {"text": "Hello, how can I help?"}
        cache.put("What is the weather?", response, messages_hash="hash123")

        entry = cache.get("What is the weather?", messages_hash="hash123")
        assert entry is not None
        assert entry.response == response

    def test_get_miss(self, cache):
        """Test cache miss."""
        entry = cache.get("Unknown query", messages_hash="unknown")
        assert entry is None

    def test_lru_eviction(self):
        """Test LRU eviction when at capacity."""
        config = SemanticCacheConfig(max_entries=3)
        cache = SemanticCache(config)

        # Fill cache
        cache.put("query1", "response1", messages_hash="h1")
        cache.put("query2", "response2", messages_hash="h2")
        cache.put("query3", "response3", messages_hash="h3")

        # Access query1 to make it recently used
        cache.get("query1", messages_hash="h1")

        # Add new entry, should evict query2 (oldest unused)
        cache.put("query4", "response4", messages_hash="h4")

        # query1 should still be there (recently accessed)
        assert cache.get("query1", messages_hash="h1") is not None
        # query2 should be evicted
        assert cache.get("query2", messages_hash="h2") is None
        # query3 and query4 should be there
        assert cache.get("query3", messages_hash="h3") is not None
        assert cache.get("query4", messages_hash="h4") is not None

    def test_ttl_expiration(self):
        """Test TTL expiration."""
        config = SemanticCacheConfig(ttl_seconds=1)
        cache = SemanticCache(config)

        cache.put("expiring query", "response", messages_hash="exp1")

        # Should be available immediately
        assert cache.get("expiring query", messages_hash="exp1") is not None

        # Wait for TTL
        time.sleep(1.1)

        # Should be expired
        assert cache.get("expiring query", messages_hash="exp1") is None

    def test_invalidate(self, cache):
        """Test invalidating an entry."""
        key = cache.put("query", "response", messages_hash="inv1")

        assert cache.get("query", messages_hash="inv1") is not None

        cache.invalidate(key)

        assert cache.get("query", messages_hash="inv1") is None

    def test_clear(self, cache):
        """Test clearing cache."""
        cache.put("query1", "response1", messages_hash="c1")
        cache.put("query2", "response2", messages_hash="c2")

        cache.clear()

        stats = cache.get_stats()
        assert stats["entries"] == 0

    def test_stats(self, cache):
        """Test statistics."""
        cache.put("query", "response", messages_hash="s1")
        cache.get("query", messages_hash="s1")  # hit
        cache.get("unknown", messages_hash="unknown")  # miss

        stats = cache.get_stats()

        assert stats["entries"] == 1
        assert stats["hits"] == 1
        assert stats["misses"] == 1
        assert stats["hit_rate"] == 0.5

    def test_access_count(self, cache):
        """Test that access count is tracked."""
        cache.put("query", "response", messages_hash="ac1")

        # Access multiple times
        for _ in range(5):
            entry = cache.get("query", messages_hash="ac1")

        # Initial count is 1, plus 5 accesses = 6
        assert entry.access_count == 6

    def test_semantic_similarity_with_embedding_fn(self):
        """Test semantic similarity with custom embedding function."""

        def mock_embedding(text: str) -> list[float]:
            # Simple mock: return consistent embedding for similar queries
            if "weather" in text.lower():
                return [1.0, 0.0, 0.0]
            elif "time" in text.lower():
                return [0.0, 1.0, 0.0]
            else:
                return [0.0, 0.0, 1.0]

        config = SemanticCacheConfig(similarity_threshold=0.9)
        cache = SemanticCache(config, embedding_fn=mock_embedding)

        # Store a weather query
        cache.put("What is the weather today?", "It's sunny", messages_hash="w1")

        # Similar weather query should hit
        entry = cache.get("How is the weather?")
        assert entry is not None
        assert entry.response == "It's sunny"

        # Different query should miss
        entry = cache.get("What time is it?")
        assert entry is None


class TestSemanticCacheLayer:
    """Test SemanticCacheLayer functionality."""

    @pytest.fixture
    def layer(self):
        """Create cache layer with Anthropic optimizer."""
        optimizer = AnthropicCacheOptimizer()
        return SemanticCacheLayer(
            optimizer,
            similarity_threshold=0.95,
            max_entries=100,
            ttl_seconds=60,
        )

    @pytest.fixture
    def context(self):
        """Create optimization context."""
        return OptimizationContext(
            provider="anthropic",
            model="claude-3-opus",
        )

    def test_process_no_cache_hit(self, layer, context):
        """Test processing with no cache hit."""
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Hello!"},
        ]

        result = layer.process(messages, context)

        assert result.semantic_cache_hit is False
        assert result.cached_response is None

    def test_process_with_cache_hit(self, layer, context):
        """Test processing with cache hit."""
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "What is 2+2?"},
        ]

        # First, store a response
        layer.store_response(messages, {"text": "4"}, context)

        # Now process same messages
        result = layer.process(messages, context)

        assert result.semantic_cache_hit is True
        assert result.cached_response == {"text": "4"}

    def test_store_response(self, layer, context):
        """Test storing a response."""
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Tell me a joke"},
        ]

        key = layer.store_response(messages, {"text": "Why did..."}, context)

        assert key is not None
        assert len(key) > 0

    def test_get_stats(self, layer, context):
        """Test getting statistics."""
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Hello"},
        ]

        layer.process(messages, context)
        stats = layer.get_stats()

        assert "semantic_cache" in stats
        assert "provider_optimizer" in stats
        assert stats["provider_optimizer"] == "anthropic-cache-optimizer"

    def test_query_extraction(self, layer, context):
        """Test query extraction from messages."""
        messages = [
            {"role": "system", "content": "System"},
            {"role": "user", "content": "First question"},
            {"role": "assistant", "content": "Answer"},
            {"role": "user", "content": "Second question"},
        ]

        # Store response
        layer.store_response(messages, {"text": "Response"}, context)

        # The query should be the last user message
        result = layer.process(messages, context)
        assert result.semantic_cache_hit is True

    def test_query_from_context(self, layer):
        """Test using query from context."""
        messages = [
            {"role": "user", "content": "Some message"},
        ]
        context = OptimizationContext(
            query="Specific query for caching",
        )

        layer.store_response(messages, {"text": "Response"}, context)
        result = layer.process(messages, context)

        assert result.semantic_cache_hit is True

    def test_provider_optimizer_fallback(self, layer, context):
        """Test that provider optimizer is used on cache miss."""
        messages = [
            {"role": "system", "content": "You are helpful. " * 500},
            {"role": "user", "content": "New uncached question"},
        ]

        result = layer.process(messages, context)

        # Should have used provider optimizer
        assert result.semantic_cache_hit is False
        # Provider optimizer should have processed
        assert result.metrics.stable_prefix_hash != ""

    def test_content_block_query_extraction(self, layer, context):
        """Test query extraction from content block format."""
        messages = [
            {"role": "system", "content": "System"},
            {
                "role": "user",
                "content": [{"type": "text", "text": "Block format question"}],
            },
        ]

        layer.store_response(messages, {"text": "Response"}, context)
        result = layer.process(messages, context)

        assert result.semantic_cache_hit is True