Spaces:
Sleeping
Sleeping
File size: 4,525 Bytes
a686b1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
"""
Testes para sistema de cache de embeddings.
"""
import pytest
import tempfile
import shutil
from pathlib import Path
from src.cache import EmbeddingCache
class TestEmbeddingCache:
"""Testes para classe EmbeddingCache."""
@pytest.fixture
def cache_dir(self):
"""Cria diretorio temporario para cache."""
temp_dir = tempfile.mkdtemp()
yield temp_dir
shutil.rmtree(temp_dir)
@pytest.fixture
def cache(self, cache_dir):
"""Instancia de EmbeddingCache."""
return EmbeddingCache(cache_dir=cache_dir)
def test_cache_initialization(self, cache_dir):
"""Testa inicializacao do cache."""
cache = EmbeddingCache(cache_dir=cache_dir)
assert cache.cache_dir == Path(cache_dir)
assert cache.cache_dir.exists()
def test_get_cache_key(self, cache):
"""Testa geracao de chave de cache."""
text = "Texto de teste"
model_id = "model-123"
key1 = cache._get_cache_key(text, model_id)
key2 = cache._get_cache_key(text, model_id)
assert key1 == key2
assert len(key1) == 64 # SHA256 hex digest
def test_get_cache_key_different_inputs(self, cache):
"""Testa que inputs diferentes geram keys diferentes."""
key1 = cache._get_cache_key("texto1", "model1")
key2 = cache._get_cache_key("texto2", "model1")
key3 = cache._get_cache_key("texto1", "model2")
assert key1 != key2
assert key1 != key3
assert key2 != key3
def test_save_and_load_embedding(self, cache):
"""Testa salvar e carregar embedding."""
text = "Texto de teste"
model_id = "model-123"
embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
# Salvar
cache.save(text, model_id, embedding)
# Carregar
loaded = cache.load(text, model_id)
assert loaded is not None
assert len(loaded) == len(embedding)
assert all(abs(a - b) < 1e-6 for a, b in zip(embedding, loaded))
def test_load_nonexistent(self, cache):
"""Testa carregar embedding que nao existe."""
result = cache.load("texto inexistente", "model-123")
assert result is None
def test_has_cache(self, cache):
"""Testa verificacao de existencia no cache."""
text = "Texto de teste"
model_id = "model-123"
embedding = [0.1, 0.2, 0.3]
assert not cache.has(text, model_id)
cache.save(text, model_id, embedding)
assert cache.has(text, model_id)
def test_clear_cache(self, cache):
"""Testa limpeza do cache."""
# Adicionar varios embeddings
for i in range(5):
cache.save(f"texto{i}", "model", [float(i)])
# Verificar que existem
assert cache.has("texto0", "model")
assert cache.has("texto4", "model")
# Limpar
cache.clear()
# Verificar que foram removidos
assert not cache.has("texto0", "model")
assert not cache.has("texto4", "model")
def test_get_cache_stats(self, cache):
"""Testa estatisticas do cache."""
# Cache vazio
stats = cache.get_stats()
assert stats['num_files'] == 0
assert stats['total_size_bytes'] == 0
# Adicionar embeddings
for i in range(3):
cache.save(f"texto{i}", "model", [float(i)] * 100)
# Verificar estatisticas
stats = cache.get_stats()
assert stats['num_files'] == 3
assert stats['total_size_bytes'] > 0
def test_cache_with_large_embedding(self, cache):
"""Testa cache com embedding grande."""
text = "Texto longo"
model_id = "model-large"
embedding = [float(i) for i in range(1000)]
cache.save(text, model_id, embedding)
loaded = cache.load(text, model_id)
assert loaded is not None
assert len(loaded) == 1000
def test_cache_persistence(self, cache_dir):
"""Testa persistencia do cache entre instancias."""
text = "Texto persistente"
model_id = "model-123"
embedding = [0.5, 0.6, 0.7]
# Primeira instancia
cache1 = EmbeddingCache(cache_dir=cache_dir)
cache1.save(text, model_id, embedding)
# Segunda instancia
cache2 = EmbeddingCache(cache_dir=cache_dir)
loaded = cache2.load(text, model_id)
assert loaded is not None
assert len(loaded) == len(embedding)
if __name__ == "__main__":
pytest.main([__file__, "-v"])
|