File size: 4,525 Bytes
a686b1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
Testes para sistema de cache de embeddings.
"""

import pytest
import tempfile
import shutil
from pathlib import Path
from src.cache import EmbeddingCache


class TestEmbeddingCache:
    """Testes para classe EmbeddingCache."""

    @pytest.fixture
    def cache_dir(self):
        """Cria diretorio temporario para cache."""
        temp_dir = tempfile.mkdtemp()
        yield temp_dir
        shutil.rmtree(temp_dir)

    @pytest.fixture
    def cache(self, cache_dir):
        """Instancia de EmbeddingCache."""
        return EmbeddingCache(cache_dir=cache_dir)

    def test_cache_initialization(self, cache_dir):
        """Testa inicializacao do cache."""
        cache = EmbeddingCache(cache_dir=cache_dir)
        assert cache.cache_dir == Path(cache_dir)
        assert cache.cache_dir.exists()

    def test_get_cache_key(self, cache):
        """Testa geracao de chave de cache."""
        text = "Texto de teste"
        model_id = "model-123"

        key1 = cache._get_cache_key(text, model_id)
        key2 = cache._get_cache_key(text, model_id)

        assert key1 == key2
        assert len(key1) == 64  # SHA256 hex digest

    def test_get_cache_key_different_inputs(self, cache):
        """Testa que inputs diferentes geram keys diferentes."""
        key1 = cache._get_cache_key("texto1", "model1")
        key2 = cache._get_cache_key("texto2", "model1")
        key3 = cache._get_cache_key("texto1", "model2")

        assert key1 != key2
        assert key1 != key3
        assert key2 != key3

    def test_save_and_load_embedding(self, cache):
        """Testa salvar e carregar embedding."""
        text = "Texto de teste"
        model_id = "model-123"
        embedding = [0.1, 0.2, 0.3, 0.4, 0.5]

        # Salvar
        cache.save(text, model_id, embedding)

        # Carregar
        loaded = cache.load(text, model_id)

        assert loaded is not None
        assert len(loaded) == len(embedding)
        assert all(abs(a - b) < 1e-6 for a, b in zip(embedding, loaded))

    def test_load_nonexistent(self, cache):
        """Testa carregar embedding que nao existe."""
        result = cache.load("texto inexistente", "model-123")
        assert result is None

    def test_has_cache(self, cache):
        """Testa verificacao de existencia no cache."""
        text = "Texto de teste"
        model_id = "model-123"
        embedding = [0.1, 0.2, 0.3]

        assert not cache.has(text, model_id)

        cache.save(text, model_id, embedding)

        assert cache.has(text, model_id)

    def test_clear_cache(self, cache):
        """Testa limpeza do cache."""
        # Adicionar varios embeddings
        for i in range(5):
            cache.save(f"texto{i}", "model", [float(i)])

        # Verificar que existem
        assert cache.has("texto0", "model")
        assert cache.has("texto4", "model")

        # Limpar
        cache.clear()

        # Verificar que foram removidos
        assert not cache.has("texto0", "model")
        assert not cache.has("texto4", "model")

    def test_get_cache_stats(self, cache):
        """Testa estatisticas do cache."""
        # Cache vazio
        stats = cache.get_stats()
        assert stats['num_files'] == 0
        assert stats['total_size_bytes'] == 0

        # Adicionar embeddings
        for i in range(3):
            cache.save(f"texto{i}", "model", [float(i)] * 100)

        # Verificar estatisticas
        stats = cache.get_stats()
        assert stats['num_files'] == 3
        assert stats['total_size_bytes'] > 0

    def test_cache_with_large_embedding(self, cache):
        """Testa cache com embedding grande."""
        text = "Texto longo"
        model_id = "model-large"
        embedding = [float(i) for i in range(1000)]

        cache.save(text, model_id, embedding)
        loaded = cache.load(text, model_id)

        assert loaded is not None
        assert len(loaded) == 1000

    def test_cache_persistence(self, cache_dir):
        """Testa persistencia do cache entre instancias."""
        text = "Texto persistente"
        model_id = "model-123"
        embedding = [0.5, 0.6, 0.7]

        # Primeira instancia
        cache1 = EmbeddingCache(cache_dir=cache_dir)
        cache1.save(text, model_id, embedding)

        # Segunda instancia
        cache2 = EmbeddingCache(cache_dir=cache_dir)
        loaded = cache2.load(text, model_id)

        assert loaded is not None
        assert len(loaded) == len(embedding)


if __name__ == "__main__":
    pytest.main([__file__, "-v"])