Spaces:
Runtime error
Runtime error
| """ | |
| Testes unitários para os processadores e validadores. | |
| Execute com: python -m pytest tests/test_processors.py -v | |
| """ | |
| import json | |
| import os | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from unittest.mock import MagicMock, patch | |
| import pytest | |
| # Adiciona o diretório pai ao path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| import config | |
| from utils.validators import ( | |
| ValidationError, | |
| sanitize_filename, | |
| validate_file_count, | |
| validate_file_size, | |
| ) | |
| # ============================================================================= | |
| # FIXTURES | |
| # ============================================================================= | |
| def temp_file(): | |
| """Cria um arquivo temporário para testes.""" | |
| with tempfile.NamedTemporaryFile( | |
| mode="wb", | |
| suffix=".pdf", | |
| delete=False | |
| ) as f: | |
| # Escreve conteúdo mínimo de PDF | |
| f.write(b"%PDF-1.4\n") | |
| f.write(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n") | |
| f.write(b"%%EOF\n") | |
| temp_path = f.name | |
| yield temp_path | |
| # Cleanup | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| def large_temp_file(): | |
| """Cria um arquivo temporário grande (> limite).""" | |
| with tempfile.NamedTemporaryFile( | |
| mode="wb", | |
| suffix=".pdf", | |
| delete=False | |
| ) as f: | |
| # Escreve mais que o limite | |
| f.write(b"X" * (config.MAX_FILE_SIZE_BYTES + 1000)) | |
| temp_path = f.name | |
| yield temp_path | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| def empty_temp_file(): | |
| """Cria um arquivo temporário vazio.""" | |
| with tempfile.NamedTemporaryFile( | |
| mode="wb", | |
| suffix=".pdf", | |
| delete=False | |
| ) as f: | |
| temp_path = f.name | |
| yield temp_path | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| # ============================================================================= | |
| # TESTES DE VALIDAÇÃO | |
| # ============================================================================= | |
| class TestValidateFileCount: | |
| """Testes para validate_file_count().""" | |
| def test_valid_count_single(self): | |
| """Teste com um arquivo.""" | |
| assert validate_file_count([1]) is True | |
| def test_valid_count_multiple(self): | |
| """Teste com múltiplos arquivos dentro do limite.""" | |
| files = list(range(config.MAX_FILES_PER_SESSION)) | |
| assert validate_file_count(files) is True | |
| def test_empty_list_raises(self): | |
| """Teste com lista vazia deve falhar.""" | |
| with pytest.raises(ValidationError) as exc_info: | |
| validate_file_count([]) | |
| assert exc_info.value.error_code == "NO_FILES" | |
| def test_too_many_files_raises(self): | |
| """Teste com arquivos demais deve falhar.""" | |
| files = list(range(config.MAX_FILES_PER_SESSION + 1)) | |
| with pytest.raises(ValidationError) as exc_info: | |
| validate_file_count(files) | |
| assert exc_info.value.error_code == "TOO_MANY_FILES" | |
| class TestValidateFileSize: | |
| """Testes para validate_file_size().""" | |
| def test_valid_size(self, temp_file): | |
| """Teste com arquivo de tamanho válido.""" | |
| assert validate_file_size(temp_file) is True | |
| def test_file_too_large(self, large_temp_file): | |
| """Teste com arquivo muito grande.""" | |
| with pytest.raises(ValidationError) as exc_info: | |
| validate_file_size(large_temp_file) | |
| assert exc_info.value.error_code == "FILE_TOO_LARGE" | |
| def test_empty_file(self, empty_temp_file): | |
| """Teste com arquivo vazio.""" | |
| with pytest.raises(ValidationError) as exc_info: | |
| validate_file_size(empty_temp_file) | |
| assert exc_info.value.error_code == "EMPTY_FILE" | |
| def test_file_not_found(self): | |
| """Teste com arquivo inexistente.""" | |
| with pytest.raises(ValidationError) as exc_info: | |
| validate_file_size("/caminho/inexistente/arquivo.pdf") | |
| assert exc_info.value.error_code == "FILE_NOT_FOUND" | |
| class TestSanitizeFilename: | |
| """Testes para sanitize_filename().""" | |
| def test_normal_filename(self): | |
| """Teste com nome normal.""" | |
| assert sanitize_filename("documento.pdf") == "documento.pdf" | |
| def test_special_characters(self): | |
| """Teste com caracteres especiais.""" | |
| result = sanitize_filename("doc<>:test.pdf") | |
| assert "<" not in result | |
| assert ">" not in result | |
| assert ":" not in result | |
| def test_spaces(self): | |
| """Teste com espaços.""" | |
| result = sanitize_filename("meu documento.pdf") | |
| assert result == "meu_documento.pdf" | |
| def test_multiple_underscores(self): | |
| """Teste com underscores múltiplos.""" | |
| result = sanitize_filename("doc___test.pdf") | |
| assert "___" not in result | |
| def test_empty_filename(self): | |
| """Teste com nome vazio.""" | |
| result = sanitize_filename("") | |
| assert result == "arquivo_sem_nome" | |
| def test_long_filename(self): | |
| """Teste com nome muito longo.""" | |
| long_name = "a" * 300 + ".pdf" | |
| result = sanitize_filename(long_name) | |
| assert len(result) <= config.FILENAME_MAX_LENGTH | |
| # ============================================================================= | |
| # TESTES DE FORMATAÇÃO JSON | |
| # ============================================================================= | |
| class TestJSONFormatter: | |
| """Testes para json_formatter.py.""" | |
| def test_format_to_json_basic(self): | |
| """Teste de formatação JSON básica.""" | |
| from processors.json_formatter import format_to_json | |
| # Mock de dados processados | |
| mock_document = MagicMock() | |
| mock_document.export_to_dict.return_value = {"content": "teste"} | |
| processed_data = { | |
| "document": mock_document, | |
| "metadata": {"nome_arquivo": "test.pdf"}, | |
| "tables": [], | |
| "language": "pt", | |
| } | |
| result = format_to_json(processed_data, "test.pdf") | |
| assert isinstance(result, str) | |
| parsed = json.loads(result) | |
| assert parsed["arquivo"] == "test.pdf" | |
| assert parsed["idioma"] == "pt" | |
| assert "processado_em" in parsed | |
| def test_format_to_json_with_tables(self): | |
| """Teste de formatação JSON com tabelas.""" | |
| from processors.json_formatter import format_to_json | |
| mock_document = MagicMock() | |
| mock_document.export_to_dict.return_value = {} | |
| processed_data = { | |
| "document": mock_document, | |
| "metadata": {}, | |
| "tables": [ | |
| {"indice": 1, "dados": [{"col1": "val1"}]} | |
| ], | |
| "language": "en", | |
| } | |
| result = format_to_json(processed_data, "test.pdf") | |
| parsed = json.loads(result) | |
| assert len(parsed["tabelas"]) == 1 | |
| assert parsed["tabelas"][0]["indice"] == 1 | |
| # ============================================================================= | |
| # TESTES DE FORMATAÇÃO MARKDOWN | |
| # ============================================================================= | |
| class TestMarkdownFormatter: | |
| """Testes para markdown_formatter.py.""" | |
| def test_format_to_markdown_basic(self): | |
| """Teste de formatação Markdown básica.""" | |
| from processors.markdown_formatter import format_to_markdown | |
| mock_document = MagicMock() | |
| mock_document.export_to_markdown.return_value = "# Conteúdo\n\nTexto aqui." | |
| processed_data = { | |
| "document": mock_document, | |
| "metadata": {"nome_arquivo": "test.pdf", "num_paginas": 3}, | |
| "tables": [], | |
| "language": "pt", | |
| } | |
| result = format_to_markdown(processed_data) | |
| assert isinstance(result, str) | |
| assert "# " in result or "## " in result # Tem headings | |
| def test_dict_to_markdown_table(self): | |
| """Teste de conversão de dict para tabela MD.""" | |
| from processors.markdown_formatter import _dict_to_markdown_table | |
| data = [ | |
| {"Nome": "Alice", "Idade": 30}, | |
| {"Nome": "Bob", "Idade": 25}, | |
| ] | |
| result = _dict_to_markdown_table(data) | |
| assert "| Nome | Idade |" in result | |
| assert "| --- | --- |" in result | |
| assert "| Alice | 30 |" in result | |
| assert "| Bob | 25 |" in result | |
| def test_empty_table(self): | |
| """Teste com tabela vazia.""" | |
| from processors.markdown_formatter import _dict_to_markdown_table | |
| result = _dict_to_markdown_table([]) | |
| assert "vazia" in result.lower() | |
| # ============================================================================= | |
| # TESTES DE FILE HANDLER | |
| # ============================================================================= | |
| class TestFileHandler: | |
| """Testes para file_handler.py.""" | |
| def test_create_temp_directory(self): | |
| """Teste de criação de diretório temporário.""" | |
| from utils.file_handler import create_temp_directory | |
| temp_dir = create_temp_directory(prefix="test_") | |
| try: | |
| assert temp_dir.exists() | |
| assert temp_dir.is_dir() | |
| assert "test_" in temp_dir.name | |
| finally: | |
| # Cleanup | |
| if temp_dir.exists(): | |
| import shutil | |
| shutil.rmtree(temp_dir) | |
| def test_save_output_file(self): | |
| """Teste de salvamento de arquivo de saída.""" | |
| from utils.file_handler import save_output_file, create_temp_directory | |
| temp_dir = create_temp_directory(prefix="test_") | |
| try: | |
| content = "Conteúdo de teste" | |
| output_path = save_output_file(content, "teste.txt", temp_dir) | |
| assert output_path.exists() | |
| assert output_path.read_text() == content | |
| finally: | |
| import shutil | |
| if temp_dir.exists(): | |
| shutil.rmtree(temp_dir) | |
| def test_format_size(self): | |
| """Teste de formatação de tamanho.""" | |
| from utils.file_handler import format_size | |
| assert "B" in format_size(500) | |
| assert "KB" in format_size(1024 * 5) | |
| assert "MB" in format_size(1024 * 1024 * 10) | |
| assert "GB" in format_size(1024 * 1024 * 1024 * 2) | |
| # ============================================================================= | |
| # TESTES DE INTEGRAÇÃO (MOCK) | |
| # ============================================================================= | |
| class TestDoclingProcessorMock: | |
| """Testes do DoclingProcessor com mocks.""" | |
| def test_processor_initialization(self, mock_converter_class): | |
| """Teste de inicialização do processador.""" | |
| from processors.docling_processor import DoclingProcessor | |
| processor = DoclingProcessor( | |
| enable_ocr=True, | |
| enable_table_detection=True, | |
| use_gpu=False | |
| ) | |
| assert processor.enable_ocr is True | |
| assert processor.enable_table_detection is True | |
| assert processor.use_gpu is False | |
| def test_processor_process_document(self, mock_converter_class): | |
| """Teste de processamento de documento.""" | |
| from processors.docling_processor import DoclingProcessor | |
| # Setup mock | |
| mock_converter = MagicMock() | |
| mock_converter_class.return_value = mock_converter | |
| mock_result = MagicMock() | |
| mock_document = MagicMock() | |
| mock_document.export_to_markdown.return_value = "# Teste" | |
| mock_result.document = mock_document | |
| mock_converter.convert.return_value = mock_result | |
| # Cria arquivo temporário | |
| with tempfile.NamedTemporaryFile( | |
| mode="wb", | |
| suffix=".pdf", | |
| delete=False | |
| ) as f: | |
| f.write(b"%PDF-1.4\n%%EOF\n") | |
| temp_path = f.name | |
| try: | |
| processor = DoclingProcessor() | |
| result = processor.process_document(temp_path) | |
| assert "document" in result | |
| assert "metadata" in result | |
| assert "tables" in result | |
| assert "language" in result | |
| finally: | |
| os.unlink(temp_path) | |
| # ============================================================================= | |
| # EXECUTAR TESTES | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v"]) | |