""" Testes para módulo de reranking """ import pytest from src.reranking import Reranker class TestReranker: """Testes para classe Reranker""" def test_initialization(self): """Testa inicialização do reranker""" reranker = Reranker() assert reranker.model_id == "cross-encoder/ms-marco-MiniLM-L-6-v2" assert reranker.model is None # Lazy loading def test_initialization_custom_model(self): """Testa inicialização com modelo customizado""" custom_model = "cross-encoder/ms-marco-TinyBERT-L-2-v2" reranker = Reranker(model_id=custom_model) assert reranker.model_id == custom_model def test_rerank_empty_documents(self): """Testa reranking com lista vazia""" reranker = Reranker() result = reranker.rerank("test query", []) assert result == [] def test_rerank_preserves_fields(self): """Testa se reranking preserva campos dos documentos""" reranker = Reranker() docs = [ { "id": 1, "title": "Doc 1", "content": "Machine learning is a subset of artificial intelligence", "score": 0.8 }, { "id": 2, "title": "Doc 2", "content": "Python is a programming language", "score": 0.7 } ] reranked = reranker.rerank("What is machine learning?", docs) # Verifica que todos os documentos foram reordenados assert len(reranked) == len(docs) # Verifica que campos foram preservados for doc in reranked: assert "id" in doc assert "title" in doc assert "content" in doc assert "score" in doc assert "rerank_score" in doc assert "original_score" in doc def test_rerank_with_top_k(self): """Testa reranking com limite top_k""" reranker = Reranker() docs = [ {"id": i, "title": f"Doc {i}", "content": f"Content {i}", "score": 0.5} for i in range(10) ] reranked = reranker.rerank("test query", docs, top_k=3) assert len(reranked) == 3 def test_rerank_scores_are_numeric(self): """Testa se scores de reranking são numéricos""" reranker = Reranker() docs = [ { "id": 1, "title": "Test", "content": "Machine learning algorithms", "score": 0.9 } ] reranked = reranker.rerank("machine learning", docs) assert isinstance(reranked[0]['rerank_score'], float) assert isinstance(reranked[0]['original_score'], float) def test_get_rerank_comparison(self): """Testa geração de dados de comparação""" reranker = Reranker() original = [ {"id": 1, "content": "First", "score": 0.9}, {"id": 2, "content": "Second", "score": 0.8}, {"id": 3, "content": "Third", "score": 0.7} ] reranked = [ {"id": 2, "content": "Second", "original_score": 0.8, "rerank_score": 0.95}, {"id": 1, "content": "First", "original_score": 0.9, "rerank_score": 0.85}, {"id": 3, "content": "Third", "original_score": 0.7, "rerank_score": 0.75} ] comparison = reranker.get_rerank_comparison(original, reranked) assert len(comparison) == 3 assert comparison[0]['new_rank'] == 1 assert comparison[0]['original_rank'] == 2 assert comparison[0]['position_change'] == 1 # Subiu 1 posição def test_get_model_info(self): """Testa obtenção de informações do modelo""" reranker = Reranker() info = reranker.get_model_info() assert "model_id" in info assert "available" in info assert "type" in info assert info["type"] == "cross-encoder" def test_is_available(self): """Testa verificação de disponibilidade""" reranker = Reranker() # Nota: Pode falhar se modelo não estiver instalado # Por isso, apenas testamos que o método retorna bool result = reranker.is_available() assert isinstance(result, bool) class TestRerankingIntegration: """Testes de integração do reranking""" def test_reranking_changes_order(self): """Testa se reranking realmente muda a ordem dos documentos""" reranker = Reranker() # Documentos onde a query é mais relevante para o último docs = [ { "id": 1, "content": "Python is a snake", "title": "Animals", "score": 0.9 # Score alto mas não relevante }, { "id": 2, "content": "Java is an island", "title": "Geography", "score": 0.8 }, { "id": 3, "content": "Python is a programming language for data science and machine learning", "title": "Programming", "score": 0.7 # Score baixo mas muito relevante } ] reranked = reranker.rerank("What is Python programming?", docs) # O documento sobre programação deve estar no topo após reranking # (assumindo que o cross-encoder funciona corretamente) assert reranked[0]['id'] == 3 # Doc sobre programação assert reranked[0]['rerank_score'] > reranked[1]['rerank_score']