File size: 5,613 Bytes
1b447de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
"""
Testes para módulo de reranking
"""
import pytest
from src.reranking import Reranker


class TestReranker:
    """Testes para classe Reranker"""

    def test_initialization(self):
        """Testa inicialização do reranker"""
        reranker = Reranker()
        assert reranker.model_id == "cross-encoder/ms-marco-MiniLM-L-6-v2"
        assert reranker.model is None  # Lazy loading

    def test_initialization_custom_model(self):
        """Testa inicialização com modelo customizado"""
        custom_model = "cross-encoder/ms-marco-TinyBERT-L-2-v2"
        reranker = Reranker(model_id=custom_model)
        assert reranker.model_id == custom_model

    def test_rerank_empty_documents(self):
        """Testa reranking com lista vazia"""
        reranker = Reranker()
        result = reranker.rerank("test query", [])
        assert result == []

    def test_rerank_preserves_fields(self):
        """Testa se reranking preserva campos dos documentos"""
        reranker = Reranker()

        docs = [
            {
                "id": 1,
                "title": "Doc 1",
                "content": "Machine learning is a subset of artificial intelligence",
                "score": 0.8
            },
            {
                "id": 2,
                "title": "Doc 2",
                "content": "Python is a programming language",
                "score": 0.7
            }
        ]

        reranked = reranker.rerank("What is machine learning?", docs)

        # Verifica que todos os documentos foram reordenados
        assert len(reranked) == len(docs)

        # Verifica que campos foram preservados
        for doc in reranked:
            assert "id" in doc
            assert "title" in doc
            assert "content" in doc
            assert "score" in doc
            assert "rerank_score" in doc
            assert "original_score" in doc

    def test_rerank_with_top_k(self):
        """Testa reranking com limite top_k"""
        reranker = Reranker()

        docs = [
            {"id": i, "title": f"Doc {i}", "content": f"Content {i}", "score": 0.5}
            for i in range(10)
        ]

        reranked = reranker.rerank("test query", docs, top_k=3)

        assert len(reranked) == 3

    def test_rerank_scores_are_numeric(self):
        """Testa se scores de reranking são numéricos"""
        reranker = Reranker()

        docs = [
            {
                "id": 1,
                "title": "Test",
                "content": "Machine learning algorithms",
                "score": 0.9
            }
        ]

        reranked = reranker.rerank("machine learning", docs)

        assert isinstance(reranked[0]['rerank_score'], float)
        assert isinstance(reranked[0]['original_score'], float)

    def test_get_rerank_comparison(self):
        """Testa geração de dados de comparação"""
        reranker = Reranker()

        original = [
            {"id": 1, "content": "First", "score": 0.9},
            {"id": 2, "content": "Second", "score": 0.8},
            {"id": 3, "content": "Third", "score": 0.7}
        ]

        reranked = [
            {"id": 2, "content": "Second", "original_score": 0.8, "rerank_score": 0.95},
            {"id": 1, "content": "First", "original_score": 0.9, "rerank_score": 0.85},
            {"id": 3, "content": "Third", "original_score": 0.7, "rerank_score": 0.75}
        ]

        comparison = reranker.get_rerank_comparison(original, reranked)

        assert len(comparison) == 3
        assert comparison[0]['new_rank'] == 1
        assert comparison[0]['original_rank'] == 2
        assert comparison[0]['position_change'] == 1  # Subiu 1 posição

    def test_get_model_info(self):
        """Testa obtenção de informações do modelo"""
        reranker = Reranker()
        info = reranker.get_model_info()

        assert "model_id" in info
        assert "available" in info
        assert "type" in info
        assert info["type"] == "cross-encoder"

    def test_is_available(self):
        """Testa verificação de disponibilidade"""
        reranker = Reranker()
        # Nota: Pode falhar se modelo não estiver instalado
        # Por isso, apenas testamos que o método retorna bool
        result = reranker.is_available()
        assert isinstance(result, bool)


class TestRerankingIntegration:
    """Testes de integração do reranking"""

    def test_reranking_changes_order(self):
        """Testa se reranking realmente muda a ordem dos documentos"""
        reranker = Reranker()

        # Documentos onde a query é mais relevante para o último
        docs = [
            {
                "id": 1,
                "content": "Python is a snake",
                "title": "Animals",
                "score": 0.9  # Score alto mas não relevante
            },
            {
                "id": 2,
                "content": "Java is an island",
                "title": "Geography",
                "score": 0.8
            },
            {
                "id": 3,
                "content": "Python is a programming language for data science and machine learning",
                "title": "Programming",
                "score": 0.7  # Score baixo mas muito relevante
            }
        ]

        reranked = reranker.rerank("What is Python programming?", docs)

        # O documento sobre programação deve estar no topo após reranking
        # (assumindo que o cross-encoder funciona corretamente)
        assert reranked[0]['id'] == 3  # Doc sobre programação
        assert reranked[0]['rerank_score'] > reranked[1]['rerank_score']