Spaces:
Running
Running
File size: 9,928 Bytes
7a05808 e4a41fa 7a05808 e4a41fa 7a05808 e4a41fa 7a05808 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | """Tests for SemanticCache and SemanticCacheLayer."""
import time
import pytest
from headroom.cache import (
AnthropicCacheOptimizer,
OptimizationContext,
SemanticCache,
SemanticCacheLayer,
)
from headroom.cache.semantic import SemanticCacheConfig
class TestSemanticCacheConfig:
"""Test SemanticCacheConfig."""
def test_default_values(self):
"""Test default configuration values."""
config = SemanticCacheConfig()
assert config.similarity_threshold == 0.95
assert config.max_entries == 1000
assert config.ttl_seconds == 300
assert config.use_exact_matching is True
class TestSemanticCache:
"""Test SemanticCache functionality."""
@pytest.fixture
def cache(self):
"""Create cache instance."""
config = SemanticCacheConfig(
max_entries=10,
ttl_seconds=60,
)
return SemanticCache(config)
def test_put_and_get_exact_match(self, cache):
"""Test storing and retrieving with exact hash matching."""
response = {"text": "Hello, how can I help?"}
cache.put("What is the weather?", response, messages_hash="hash123")
entry = cache.get("What is the weather?", messages_hash="hash123")
assert entry is not None
assert entry.response == response
def test_get_miss(self, cache):
"""Test cache miss."""
entry = cache.get("Unknown query", messages_hash="unknown")
assert entry is None
def test_lru_eviction(self):
"""Test LRU eviction when at capacity."""
config = SemanticCacheConfig(max_entries=3)
cache = SemanticCache(config)
# Fill cache
cache.put("query1", "response1", messages_hash="h1")
cache.put("query2", "response2", messages_hash="h2")
cache.put("query3", "response3", messages_hash="h3")
# Access query1 to make it recently used
cache.get("query1", messages_hash="h1")
# Add new entry, should evict query2 (oldest unused)
cache.put("query4", "response4", messages_hash="h4")
# query1 should still be there (recently accessed)
assert cache.get("query1", messages_hash="h1") is not None
# query2 should be evicted
assert cache.get("query2", messages_hash="h2") is None
# query3 and query4 should be there
assert cache.get("query3", messages_hash="h3") is not None
assert cache.get("query4", messages_hash="h4") is not None
def test_ttl_expiration(self):
"""Test TTL expiration."""
config = SemanticCacheConfig(ttl_seconds=1)
cache = SemanticCache(config)
cache.put("expiring query", "response", messages_hash="exp1")
# Should be available immediately
assert cache.get("expiring query", messages_hash="exp1") is not None
# Wait for TTL
time.sleep(1.1)
# Should be expired
assert cache.get("expiring query", messages_hash="exp1") is None
def test_invalidate(self, cache):
"""Test invalidating an entry."""
key = cache.put("query", "response", messages_hash="inv1")
assert cache.get("query", messages_hash="inv1") is not None
cache.invalidate(key)
assert cache.get("query", messages_hash="inv1") is None
def test_clear(self, cache):
"""Test clearing cache."""
cache.put("query1", "response1", messages_hash="c1")
cache.put("query2", "response2", messages_hash="c2")
cache.clear()
stats = cache.get_stats()
assert stats["entries"] == 0
def test_stats(self, cache):
"""Test statistics."""
cache.put("query", "response", messages_hash="s1")
cache.get("query", messages_hash="s1") # hit
cache.get("unknown", messages_hash="unknown") # miss
stats = cache.get_stats()
assert stats["entries"] == 1
assert stats["hits"] == 1
assert stats["misses"] == 1
assert stats["hit_rate"] == 0.5
def test_access_count(self, cache):
"""Test that access count is tracked."""
cache.put("query", "response", messages_hash="ac1")
# Access multiple times
for _ in range(5):
entry = cache.get("query", messages_hash="ac1")
# Initial count is 1, plus 5 accesses = 6
assert entry.access_count == 6
def test_semantic_similarity_with_embedding_fn(self):
"""Test semantic similarity with custom embedding function."""
def mock_embedding(text: str) -> list[float]:
# Simple mock: return consistent embedding for similar queries
if "weather" in text.lower():
return [1.0, 0.0, 0.0]
elif "time" in text.lower():
return [0.0, 1.0, 0.0]
else:
return [0.0, 0.0, 1.0]
config = SemanticCacheConfig(similarity_threshold=0.9)
cache = SemanticCache(config, embedding_fn=mock_embedding)
# Store a weather query
cache.put("What is the weather today?", "It's sunny", messages_hash="w1")
# Similar weather query should hit
entry = cache.get("How is the weather?")
assert entry is not None
assert entry.response == "It's sunny"
# Different query should miss
entry = cache.get("What time is it?")
assert entry is None
class TestSemanticCacheLayer:
"""Test SemanticCacheLayer functionality."""
@pytest.fixture
def layer(self):
"""Create cache layer with Anthropic optimizer."""
optimizer = AnthropicCacheOptimizer()
return SemanticCacheLayer(
optimizer,
similarity_threshold=0.95,
max_entries=100,
ttl_seconds=60,
)
@pytest.fixture
def context(self):
"""Create optimization context."""
return OptimizationContext(
provider="anthropic",
model="claude-3-opus",
)
def test_process_no_cache_hit(self, layer, context):
"""Test processing with no cache hit."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"},
]
result = layer.process(messages, context)
assert result.semantic_cache_hit is False
assert result.cached_response is None
def test_process_with_cache_hit(self, layer, context):
"""Test processing with cache hit."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "What is 2+2?"},
]
# First, store a response
layer.store_response(messages, {"text": "4"}, context)
# Now process same messages
result = layer.process(messages, context)
assert result.semantic_cache_hit is True
assert result.cached_response == {"text": "4"}
def test_store_response(self, layer, context):
"""Test storing a response."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Tell me a joke"},
]
key = layer.store_response(messages, {"text": "Why did..."}, context)
assert key is not None
assert len(key) > 0
def test_get_stats(self, layer, context):
"""Test getting statistics."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello"},
]
layer.process(messages, context)
stats = layer.get_stats()
assert "semantic_cache" in stats
assert "provider_optimizer" in stats
assert stats["provider_optimizer"] == "anthropic-cache-optimizer"
def test_query_extraction(self, layer, context):
"""Test query extraction from messages."""
messages = [
{"role": "system", "content": "System"},
{"role": "user", "content": "First question"},
{"role": "assistant", "content": "Answer"},
{"role": "user", "content": "Second question"},
]
# Store response
layer.store_response(messages, {"text": "Response"}, context)
# The query should be the last user message
result = layer.process(messages, context)
assert result.semantic_cache_hit is True
def test_query_from_context(self, layer):
"""Test using query from context."""
messages = [
{"role": "user", "content": "Some message"},
]
context = OptimizationContext(
query="Specific query for caching",
)
layer.store_response(messages, {"text": "Response"}, context)
result = layer.process(messages, context)
assert result.semantic_cache_hit is True
def test_provider_optimizer_fallback(self, layer, context):
"""Test that provider optimizer is used on cache miss."""
messages = [
{"role": "system", "content": "You are helpful. " * 500},
{"role": "user", "content": "New uncached question"},
]
result = layer.process(messages, context)
# Should have used provider optimizer
assert result.semantic_cache_hit is False
# Provider optimizer should have processed
assert result.metrics.stable_prefix_hash != ""
def test_content_block_query_extraction(self, layer, context):
"""Test query extraction from content block format."""
messages = [
{"role": "system", "content": "System"},
{
"role": "user",
"content": [{"type": "text", "text": "Block format question"}],
},
]
layer.store_response(messages, {"text": "Response"}, context)
result = layer.process(messages, context)
assert result.semantic_cache_hit is True
|