Spaces:
Sleeping
Sleeping
Guilherme Favaron
Major update: Add hybrid search, reranking, multiple LLMs, and UI improvements
1b447de
| """ | |
| Testes para módulo de reranking | |
| """ | |
| import pytest | |
| from src.reranking import Reranker | |
| class TestReranker: | |
| """Testes para classe Reranker""" | |
| def test_initialization(self): | |
| """Testa inicialização do reranker""" | |
| reranker = Reranker() | |
| assert reranker.model_id == "cross-encoder/ms-marco-MiniLM-L-6-v2" | |
| assert reranker.model is None # Lazy loading | |
| def test_initialization_custom_model(self): | |
| """Testa inicialização com modelo customizado""" | |
| custom_model = "cross-encoder/ms-marco-TinyBERT-L-2-v2" | |
| reranker = Reranker(model_id=custom_model) | |
| assert reranker.model_id == custom_model | |
| def test_rerank_empty_documents(self): | |
| """Testa reranking com lista vazia""" | |
| reranker = Reranker() | |
| result = reranker.rerank("test query", []) | |
| assert result == [] | |
| def test_rerank_preserves_fields(self): | |
| """Testa se reranking preserva campos dos documentos""" | |
| reranker = Reranker() | |
| docs = [ | |
| { | |
| "id": 1, | |
| "title": "Doc 1", | |
| "content": "Machine learning is a subset of artificial intelligence", | |
| "score": 0.8 | |
| }, | |
| { | |
| "id": 2, | |
| "title": "Doc 2", | |
| "content": "Python is a programming language", | |
| "score": 0.7 | |
| } | |
| ] | |
| reranked = reranker.rerank("What is machine learning?", docs) | |
| # Verifica que todos os documentos foram reordenados | |
| assert len(reranked) == len(docs) | |
| # Verifica que campos foram preservados | |
| for doc in reranked: | |
| assert "id" in doc | |
| assert "title" in doc | |
| assert "content" in doc | |
| assert "score" in doc | |
| assert "rerank_score" in doc | |
| assert "original_score" in doc | |
| def test_rerank_with_top_k(self): | |
| """Testa reranking com limite top_k""" | |
| reranker = Reranker() | |
| docs = [ | |
| {"id": i, "title": f"Doc {i}", "content": f"Content {i}", "score": 0.5} | |
| for i in range(10) | |
| ] | |
| reranked = reranker.rerank("test query", docs, top_k=3) | |
| assert len(reranked) == 3 | |
| def test_rerank_scores_are_numeric(self): | |
| """Testa se scores de reranking são numéricos""" | |
| reranker = Reranker() | |
| docs = [ | |
| { | |
| "id": 1, | |
| "title": "Test", | |
| "content": "Machine learning algorithms", | |
| "score": 0.9 | |
| } | |
| ] | |
| reranked = reranker.rerank("machine learning", docs) | |
| assert isinstance(reranked[0]['rerank_score'], float) | |
| assert isinstance(reranked[0]['original_score'], float) | |
| def test_get_rerank_comparison(self): | |
| """Testa geração de dados de comparação""" | |
| reranker = Reranker() | |
| original = [ | |
| {"id": 1, "content": "First", "score": 0.9}, | |
| {"id": 2, "content": "Second", "score": 0.8}, | |
| {"id": 3, "content": "Third", "score": 0.7} | |
| ] | |
| reranked = [ | |
| {"id": 2, "content": "Second", "original_score": 0.8, "rerank_score": 0.95}, | |
| {"id": 1, "content": "First", "original_score": 0.9, "rerank_score": 0.85}, | |
| {"id": 3, "content": "Third", "original_score": 0.7, "rerank_score": 0.75} | |
| ] | |
| comparison = reranker.get_rerank_comparison(original, reranked) | |
| assert len(comparison) == 3 | |
| assert comparison[0]['new_rank'] == 1 | |
| assert comparison[0]['original_rank'] == 2 | |
| assert comparison[0]['position_change'] == 1 # Subiu 1 posição | |
| def test_get_model_info(self): | |
| """Testa obtenção de informações do modelo""" | |
| reranker = Reranker() | |
| info = reranker.get_model_info() | |
| assert "model_id" in info | |
| assert "available" in info | |
| assert "type" in info | |
| assert info["type"] == "cross-encoder" | |
| def test_is_available(self): | |
| """Testa verificação de disponibilidade""" | |
| reranker = Reranker() | |
| # Nota: Pode falhar se modelo não estiver instalado | |
| # Por isso, apenas testamos que o método retorna bool | |
| result = reranker.is_available() | |
| assert isinstance(result, bool) | |
| class TestRerankingIntegration: | |
| """Testes de integração do reranking""" | |
| def test_reranking_changes_order(self): | |
| """Testa se reranking realmente muda a ordem dos documentos""" | |
| reranker = Reranker() | |
| # Documentos onde a query é mais relevante para o último | |
| docs = [ | |
| { | |
| "id": 1, | |
| "content": "Python is a snake", | |
| "title": "Animals", | |
| "score": 0.9 # Score alto mas não relevante | |
| }, | |
| { | |
| "id": 2, | |
| "content": "Java is an island", | |
| "title": "Geography", | |
| "score": 0.8 | |
| }, | |
| { | |
| "id": 3, | |
| "content": "Python is a programming language for data science and machine learning", | |
| "title": "Programming", | |
| "score": 0.7 # Score baixo mas muito relevante | |
| } | |
| ] | |
| reranked = reranker.rerank("What is Python programming?", docs) | |
| # O documento sobre programação deve estar no topo após reranking | |
| # (assumindo que o cross-encoder funciona corretamente) | |
| assert reranked[0]['id'] == 3 # Doc sobre programação | |
| assert reranked[0]['rerank_score'] > reranked[1]['rerank_score'] | |