Spaces:

minhtudragon
/

headroom

Running

App Files Files Community

headroom / tests /test_cache /test_semantic.py

chopratejas

Fix all ruff lint and format errors for CI

e4a41fa 5 months ago

raw

history blame contribute delete

9.93 kB

	"""Tests for SemanticCache and SemanticCacheLayer."""

	import time

	import pytest

	from headroom.cache import (
	AnthropicCacheOptimizer,
	OptimizationContext,
	SemanticCache,
	SemanticCacheLayer,
	)
	from headroom.cache.semantic import SemanticCacheConfig


	class TestSemanticCacheConfig:
	"""Test SemanticCacheConfig."""

	def test_default_values(self):
	"""Test default configuration values."""
	config = SemanticCacheConfig()
	assert config.similarity_threshold == 0.95
	assert config.max_entries == 1000
	assert config.ttl_seconds == 300
	assert config.use_exact_matching is True


	class TestSemanticCache:
	"""Test SemanticCache functionality."""

	@pytest.fixture
	def cache(self):
	"""Create cache instance."""
	config = SemanticCacheConfig(
	max_entries=10,
	ttl_seconds=60,
	)
	return SemanticCache(config)

	def test_put_and_get_exact_match(self, cache):
	"""Test storing and retrieving with exact hash matching."""
	response = {"text": "Hello, how can I help?"}
	cache.put("What is the weather?", response, messages_hash="hash123")

	entry = cache.get("What is the weather?", messages_hash="hash123")
	assert entry is not None
	assert entry.response == response

	def test_get_miss(self, cache):
	"""Test cache miss."""
	entry = cache.get("Unknown query", messages_hash="unknown")
	assert entry is None

	def test_lru_eviction(self):
	"""Test LRU eviction when at capacity."""
	config = SemanticCacheConfig(max_entries=3)
	cache = SemanticCache(config)

	# Fill cache
	cache.put("query1", "response1", messages_hash="h1")
	cache.put("query2", "response2", messages_hash="h2")
	cache.put("query3", "response3", messages_hash="h3")

	# Access query1 to make it recently used
	cache.get("query1", messages_hash="h1")

	# Add new entry, should evict query2 (oldest unused)
	cache.put("query4", "response4", messages_hash="h4")

	# query1 should still be there (recently accessed)
	assert cache.get("query1", messages_hash="h1") is not None
	# query2 should be evicted
	assert cache.get("query2", messages_hash="h2") is None
	# query3 and query4 should be there
	assert cache.get("query3", messages_hash="h3") is not None
	assert cache.get("query4", messages_hash="h4") is not None

	def test_ttl_expiration(self):
	"""Test TTL expiration."""
	config = SemanticCacheConfig(ttl_seconds=1)
	cache = SemanticCache(config)

	cache.put("expiring query", "response", messages_hash="exp1")

	# Should be available immediately
	assert cache.get("expiring query", messages_hash="exp1") is not None

	# Wait for TTL
	time.sleep(1.1)

	# Should be expired
	assert cache.get("expiring query", messages_hash="exp1") is None

	def test_invalidate(self, cache):
	"""Test invalidating an entry."""
	key = cache.put("query", "response", messages_hash="inv1")

	assert cache.get("query", messages_hash="inv1") is not None

	cache.invalidate(key)

	assert cache.get("query", messages_hash="inv1") is None

	def test_clear(self, cache):
	"""Test clearing cache."""
	cache.put("query1", "response1", messages_hash="c1")
	cache.put("query2", "response2", messages_hash="c2")

	cache.clear()

	stats = cache.get_stats()
	assert stats["entries"] == 0

	def test_stats(self, cache):
	"""Test statistics."""
	cache.put("query", "response", messages_hash="s1")
	cache.get("query", messages_hash="s1") # hit
	cache.get("unknown", messages_hash="unknown") # miss

	stats = cache.get_stats()

	assert stats["entries"] == 1
	assert stats["hits"] == 1
	assert stats["misses"] == 1
	assert stats["hit_rate"] == 0.5

	def test_access_count(self, cache):
	"""Test that access count is tracked."""
	cache.put("query", "response", messages_hash="ac1")

	# Access multiple times
	for _ in range(5):
	entry = cache.get("query", messages_hash="ac1")

	# Initial count is 1, plus 5 accesses = 6
	assert entry.access_count == 6

	def test_semantic_similarity_with_embedding_fn(self):
	"""Test semantic similarity with custom embedding function."""

	def mock_embedding(text: str) -> list[float]:
	# Simple mock: return consistent embedding for similar queries
	if "weather" in text.lower():
	return [1.0, 0.0, 0.0]
	elif "time" in text.lower():
	return [0.0, 1.0, 0.0]
	else:
	return [0.0, 0.0, 1.0]

	config = SemanticCacheConfig(similarity_threshold=0.9)
	cache = SemanticCache(config, embedding_fn=mock_embedding)

	# Store a weather query
	cache.put("What is the weather today?", "It's sunny", messages_hash="w1")

	# Similar weather query should hit
	entry = cache.get("How is the weather?")
	assert entry is not None
	assert entry.response == "It's sunny"

	# Different query should miss
	entry = cache.get("What time is it?")
	assert entry is None


	class TestSemanticCacheLayer:
	"""Test SemanticCacheLayer functionality."""

	@pytest.fixture
	def layer(self):
	"""Create cache layer with Anthropic optimizer."""
	optimizer = AnthropicCacheOptimizer()
	return SemanticCacheLayer(
	optimizer,
	similarity_threshold=0.95,
	max_entries=100,
	ttl_seconds=60,
	)

	@pytest.fixture
	def context(self):
	"""Create optimization context."""
	return OptimizationContext(
	provider="anthropic",
	model="claude-3-opus",
	)

	def test_process_no_cache_hit(self, layer, context):
	"""Test processing with no cache hit."""
	messages = [
	{"role": "system", "content": "You are helpful."},
	{"role": "user", "content": "Hello!"},
	]

	result = layer.process(messages, context)

	assert result.semantic_cache_hit is False
	assert result.cached_response is None

	def test_process_with_cache_hit(self, layer, context):
	"""Test processing with cache hit."""
	messages = [
	{"role": "system", "content": "You are helpful."},
	{"role": "user", "content": "What is 2+2?"},
	]

	# First, store a response
	layer.store_response(messages, {"text": "4"}, context)

	# Now process same messages
	result = layer.process(messages, context)

	assert result.semantic_cache_hit is True
	assert result.cached_response == {"text": "4"}

	def test_store_response(self, layer, context):
	"""Test storing a response."""
	messages = [
	{"role": "system", "content": "You are helpful."},
	{"role": "user", "content": "Tell me a joke"},
	]

	key = layer.store_response(messages, {"text": "Why did..."}, context)

	assert key is not None
	assert len(key) > 0

	def test_get_stats(self, layer, context):
	"""Test getting statistics."""
	messages = [
	{"role": "system", "content": "You are helpful."},
	{"role": "user", "content": "Hello"},
	]

	layer.process(messages, context)
	stats = layer.get_stats()

	assert "semantic_cache" in stats
	assert "provider_optimizer" in stats
	assert stats["provider_optimizer"] == "anthropic-cache-optimizer"

	def test_query_extraction(self, layer, context):
	"""Test query extraction from messages."""
	messages = [
	{"role": "system", "content": "System"},
	{"role": "user", "content": "First question"},
	{"role": "assistant", "content": "Answer"},
	{"role": "user", "content": "Second question"},
	]

	# Store response
	layer.store_response(messages, {"text": "Response"}, context)

	# The query should be the last user message
	result = layer.process(messages, context)
	assert result.semantic_cache_hit is True

	def test_query_from_context(self, layer):
	"""Test using query from context."""
	messages = [
	{"role": "user", "content": "Some message"},
	]
	context = OptimizationContext(
	query="Specific query for caching",
	)

	layer.store_response(messages, {"text": "Response"}, context)
	result = layer.process(messages, context)

	assert result.semantic_cache_hit is True

	def test_provider_optimizer_fallback(self, layer, context):
	"""Test that provider optimizer is used on cache miss."""
	messages = [
	{"role": "system", "content": "You are helpful. " * 500},
	{"role": "user", "content": "New uncached question"},
	]

	result = layer.process(messages, context)

	# Should have used provider optimizer
	assert result.semantic_cache_hit is False
	# Provider optimizer should have processed
	assert result.metrics.stable_prefix_hash != ""

	def test_content_block_query_extraction(self, layer, context):
	"""Test query extraction from content block format."""
	messages = [
	{"role": "system", "content": "System"},
	{
	"role": "user",
	"content": [{"type": "text", "text": "Block format question"}],
	},
	]

	layer.store_response(messages, {"text": "Response"}, context)
	result = layer.process(messages, context)

	assert result.semantic_cache_hit is True