Spaces:
Sleeping
Sleeping
File size: 5,613 Bytes
1b447de | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | """
Testes para módulo de reranking
"""
import pytest
from src.reranking import Reranker
class TestReranker:
"""Testes para classe Reranker"""
def test_initialization(self):
"""Testa inicialização do reranker"""
reranker = Reranker()
assert reranker.model_id == "cross-encoder/ms-marco-MiniLM-L-6-v2"
assert reranker.model is None # Lazy loading
def test_initialization_custom_model(self):
"""Testa inicialização com modelo customizado"""
custom_model = "cross-encoder/ms-marco-TinyBERT-L-2-v2"
reranker = Reranker(model_id=custom_model)
assert reranker.model_id == custom_model
def test_rerank_empty_documents(self):
"""Testa reranking com lista vazia"""
reranker = Reranker()
result = reranker.rerank("test query", [])
assert result == []
def test_rerank_preserves_fields(self):
"""Testa se reranking preserva campos dos documentos"""
reranker = Reranker()
docs = [
{
"id": 1,
"title": "Doc 1",
"content": "Machine learning is a subset of artificial intelligence",
"score": 0.8
},
{
"id": 2,
"title": "Doc 2",
"content": "Python is a programming language",
"score": 0.7
}
]
reranked = reranker.rerank("What is machine learning?", docs)
# Verifica que todos os documentos foram reordenados
assert len(reranked) == len(docs)
# Verifica que campos foram preservados
for doc in reranked:
assert "id" in doc
assert "title" in doc
assert "content" in doc
assert "score" in doc
assert "rerank_score" in doc
assert "original_score" in doc
def test_rerank_with_top_k(self):
"""Testa reranking com limite top_k"""
reranker = Reranker()
docs = [
{"id": i, "title": f"Doc {i}", "content": f"Content {i}", "score": 0.5}
for i in range(10)
]
reranked = reranker.rerank("test query", docs, top_k=3)
assert len(reranked) == 3
def test_rerank_scores_are_numeric(self):
"""Testa se scores de reranking são numéricos"""
reranker = Reranker()
docs = [
{
"id": 1,
"title": "Test",
"content": "Machine learning algorithms",
"score": 0.9
}
]
reranked = reranker.rerank("machine learning", docs)
assert isinstance(reranked[0]['rerank_score'], float)
assert isinstance(reranked[0]['original_score'], float)
def test_get_rerank_comparison(self):
"""Testa geração de dados de comparação"""
reranker = Reranker()
original = [
{"id": 1, "content": "First", "score": 0.9},
{"id": 2, "content": "Second", "score": 0.8},
{"id": 3, "content": "Third", "score": 0.7}
]
reranked = [
{"id": 2, "content": "Second", "original_score": 0.8, "rerank_score": 0.95},
{"id": 1, "content": "First", "original_score": 0.9, "rerank_score": 0.85},
{"id": 3, "content": "Third", "original_score": 0.7, "rerank_score": 0.75}
]
comparison = reranker.get_rerank_comparison(original, reranked)
assert len(comparison) == 3
assert comparison[0]['new_rank'] == 1
assert comparison[0]['original_rank'] == 2
assert comparison[0]['position_change'] == 1 # Subiu 1 posição
def test_get_model_info(self):
"""Testa obtenção de informações do modelo"""
reranker = Reranker()
info = reranker.get_model_info()
assert "model_id" in info
assert "available" in info
assert "type" in info
assert info["type"] == "cross-encoder"
def test_is_available(self):
"""Testa verificação de disponibilidade"""
reranker = Reranker()
# Nota: Pode falhar se modelo não estiver instalado
# Por isso, apenas testamos que o método retorna bool
result = reranker.is_available()
assert isinstance(result, bool)
class TestRerankingIntegration:
"""Testes de integração do reranking"""
def test_reranking_changes_order(self):
"""Testa se reranking realmente muda a ordem dos documentos"""
reranker = Reranker()
# Documentos onde a query é mais relevante para o último
docs = [
{
"id": 1,
"content": "Python is a snake",
"title": "Animals",
"score": 0.9 # Score alto mas não relevante
},
{
"id": 2,
"content": "Java is an island",
"title": "Geography",
"score": 0.8
},
{
"id": 3,
"content": "Python is a programming language for data science and machine learning",
"title": "Programming",
"score": 0.7 # Score baixo mas muito relevante
}
]
reranked = reranker.rerank("What is Python programming?", docs)
# O documento sobre programação deve estar no topo após reranking
# (assumindo que o cross-encoder funciona corretamente)
assert reranked[0]['id'] == 3 # Doc sobre programação
assert reranked[0]['rerank_score'] > reranked[1]['rerank_score']
|