rag_template / tests /test_reranking.py
Guilherme Favaron
Major update: Add hybrid search, reranking, multiple LLMs, and UI improvements
1b447de
"""
Testes para módulo de reranking
"""
import pytest
from src.reranking import Reranker
class TestReranker:
"""Testes para classe Reranker"""
def test_initialization(self):
"""Testa inicialização do reranker"""
reranker = Reranker()
assert reranker.model_id == "cross-encoder/ms-marco-MiniLM-L-6-v2"
assert reranker.model is None # Lazy loading
def test_initialization_custom_model(self):
"""Testa inicialização com modelo customizado"""
custom_model = "cross-encoder/ms-marco-TinyBERT-L-2-v2"
reranker = Reranker(model_id=custom_model)
assert reranker.model_id == custom_model
def test_rerank_empty_documents(self):
"""Testa reranking com lista vazia"""
reranker = Reranker()
result = reranker.rerank("test query", [])
assert result == []
def test_rerank_preserves_fields(self):
"""Testa se reranking preserva campos dos documentos"""
reranker = Reranker()
docs = [
{
"id": 1,
"title": "Doc 1",
"content": "Machine learning is a subset of artificial intelligence",
"score": 0.8
},
{
"id": 2,
"title": "Doc 2",
"content": "Python is a programming language",
"score": 0.7
}
]
reranked = reranker.rerank("What is machine learning?", docs)
# Verifica que todos os documentos foram reordenados
assert len(reranked) == len(docs)
# Verifica que campos foram preservados
for doc in reranked:
assert "id" in doc
assert "title" in doc
assert "content" in doc
assert "score" in doc
assert "rerank_score" in doc
assert "original_score" in doc
def test_rerank_with_top_k(self):
"""Testa reranking com limite top_k"""
reranker = Reranker()
docs = [
{"id": i, "title": f"Doc {i}", "content": f"Content {i}", "score": 0.5}
for i in range(10)
]
reranked = reranker.rerank("test query", docs, top_k=3)
assert len(reranked) == 3
def test_rerank_scores_are_numeric(self):
"""Testa se scores de reranking são numéricos"""
reranker = Reranker()
docs = [
{
"id": 1,
"title": "Test",
"content": "Machine learning algorithms",
"score": 0.9
}
]
reranked = reranker.rerank("machine learning", docs)
assert isinstance(reranked[0]['rerank_score'], float)
assert isinstance(reranked[0]['original_score'], float)
def test_get_rerank_comparison(self):
"""Testa geração de dados de comparação"""
reranker = Reranker()
original = [
{"id": 1, "content": "First", "score": 0.9},
{"id": 2, "content": "Second", "score": 0.8},
{"id": 3, "content": "Third", "score": 0.7}
]
reranked = [
{"id": 2, "content": "Second", "original_score": 0.8, "rerank_score": 0.95},
{"id": 1, "content": "First", "original_score": 0.9, "rerank_score": 0.85},
{"id": 3, "content": "Third", "original_score": 0.7, "rerank_score": 0.75}
]
comparison = reranker.get_rerank_comparison(original, reranked)
assert len(comparison) == 3
assert comparison[0]['new_rank'] == 1
assert comparison[0]['original_rank'] == 2
assert comparison[0]['position_change'] == 1 # Subiu 1 posição
def test_get_model_info(self):
"""Testa obtenção de informações do modelo"""
reranker = Reranker()
info = reranker.get_model_info()
assert "model_id" in info
assert "available" in info
assert "type" in info
assert info["type"] == "cross-encoder"
def test_is_available(self):
"""Testa verificação de disponibilidade"""
reranker = Reranker()
# Nota: Pode falhar se modelo não estiver instalado
# Por isso, apenas testamos que o método retorna bool
result = reranker.is_available()
assert isinstance(result, bool)
class TestRerankingIntegration:
"""Testes de integração do reranking"""
def test_reranking_changes_order(self):
"""Testa se reranking realmente muda a ordem dos documentos"""
reranker = Reranker()
# Documentos onde a query é mais relevante para o último
docs = [
{
"id": 1,
"content": "Python is a snake",
"title": "Animals",
"score": 0.9 # Score alto mas não relevante
},
{
"id": 2,
"content": "Java is an island",
"title": "Geography",
"score": 0.8
},
{
"id": 3,
"content": "Python is a programming language for data science and machine learning",
"title": "Programming",
"score": 0.7 # Score baixo mas muito relevante
}
]
reranked = reranker.rerank("What is Python programming?", docs)
# O documento sobre programação deve estar no topo após reranking
# (assumindo que o cross-encoder funciona corretamente)
assert reranked[0]['id'] == 3 # Doc sobre programação
assert reranked[0]['rerank_score'] > reranked[1]['rerank_score']