""" Testes unitários para os processadores e validadores. Execute com: python -m pytest tests/test_processors.py -v """ import json import os import sys import tempfile from pathlib import Path from unittest.mock import MagicMock, patch import pytest # Adiciona o diretório pai ao path sys.path.insert(0, str(Path(__file__).parent.parent)) import config from utils.validators import ( ValidationError, sanitize_filename, validate_file_count, validate_file_size, ) # ============================================================================= # FIXTURES # ============================================================================= @pytest.fixture def temp_file(): """Cria um arquivo temporário para testes.""" with tempfile.NamedTemporaryFile( mode="wb", suffix=".pdf", delete=False ) as f: # Escreve conteúdo mínimo de PDF f.write(b"%PDF-1.4\n") f.write(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n") f.write(b"%%EOF\n") temp_path = f.name yield temp_path # Cleanup if os.path.exists(temp_path): os.unlink(temp_path) @pytest.fixture def large_temp_file(): """Cria um arquivo temporário grande (> limite).""" with tempfile.NamedTemporaryFile( mode="wb", suffix=".pdf", delete=False ) as f: # Escreve mais que o limite f.write(b"X" * (config.MAX_FILE_SIZE_BYTES + 1000)) temp_path = f.name yield temp_path if os.path.exists(temp_path): os.unlink(temp_path) @pytest.fixture def empty_temp_file(): """Cria um arquivo temporário vazio.""" with tempfile.NamedTemporaryFile( mode="wb", suffix=".pdf", delete=False ) as f: temp_path = f.name yield temp_path if os.path.exists(temp_path): os.unlink(temp_path) # ============================================================================= # TESTES DE VALIDAÇÃO # ============================================================================= class TestValidateFileCount: """Testes para validate_file_count().""" def test_valid_count_single(self): """Teste com um arquivo.""" assert validate_file_count([1]) is True def test_valid_count_multiple(self): """Teste com múltiplos arquivos dentro do limite.""" files = list(range(config.MAX_FILES_PER_SESSION)) assert validate_file_count(files) is True def test_empty_list_raises(self): """Teste com lista vazia deve falhar.""" with pytest.raises(ValidationError) as exc_info: validate_file_count([]) assert exc_info.value.error_code == "NO_FILES" def test_too_many_files_raises(self): """Teste com arquivos demais deve falhar.""" files = list(range(config.MAX_FILES_PER_SESSION + 1)) with pytest.raises(ValidationError) as exc_info: validate_file_count(files) assert exc_info.value.error_code == "TOO_MANY_FILES" class TestValidateFileSize: """Testes para validate_file_size().""" def test_valid_size(self, temp_file): """Teste com arquivo de tamanho válido.""" assert validate_file_size(temp_file) is True def test_file_too_large(self, large_temp_file): """Teste com arquivo muito grande.""" with pytest.raises(ValidationError) as exc_info: validate_file_size(large_temp_file) assert exc_info.value.error_code == "FILE_TOO_LARGE" def test_empty_file(self, empty_temp_file): """Teste com arquivo vazio.""" with pytest.raises(ValidationError) as exc_info: validate_file_size(empty_temp_file) assert exc_info.value.error_code == "EMPTY_FILE" def test_file_not_found(self): """Teste com arquivo inexistente.""" with pytest.raises(ValidationError) as exc_info: validate_file_size("/caminho/inexistente/arquivo.pdf") assert exc_info.value.error_code == "FILE_NOT_FOUND" class TestSanitizeFilename: """Testes para sanitize_filename().""" def test_normal_filename(self): """Teste com nome normal.""" assert sanitize_filename("documento.pdf") == "documento.pdf" def test_special_characters(self): """Teste com caracteres especiais.""" result = sanitize_filename("doc<>:test.pdf") assert "<" not in result assert ">" not in result assert ":" not in result def test_spaces(self): """Teste com espaços.""" result = sanitize_filename("meu documento.pdf") assert result == "meu_documento.pdf" def test_multiple_underscores(self): """Teste com underscores múltiplos.""" result = sanitize_filename("doc___test.pdf") assert "___" not in result def test_empty_filename(self): """Teste com nome vazio.""" result = sanitize_filename("") assert result == "arquivo_sem_nome" def test_long_filename(self): """Teste com nome muito longo.""" long_name = "a" * 300 + ".pdf" result = sanitize_filename(long_name) assert len(result) <= config.FILENAME_MAX_LENGTH # ============================================================================= # TESTES DE FORMATAÇÃO JSON # ============================================================================= class TestJSONFormatter: """Testes para json_formatter.py.""" def test_format_to_json_basic(self): """Teste de formatação JSON básica.""" from processors.json_formatter import format_to_json # Mock de dados processados mock_document = MagicMock() mock_document.export_to_dict.return_value = {"content": "teste"} processed_data = { "document": mock_document, "metadata": {"nome_arquivo": "test.pdf"}, "tables": [], "language": "pt", } result = format_to_json(processed_data, "test.pdf") assert isinstance(result, str) parsed = json.loads(result) assert parsed["arquivo"] == "test.pdf" assert parsed["idioma"] == "pt" assert "processado_em" in parsed def test_format_to_json_with_tables(self): """Teste de formatação JSON com tabelas.""" from processors.json_formatter import format_to_json mock_document = MagicMock() mock_document.export_to_dict.return_value = {} processed_data = { "document": mock_document, "metadata": {}, "tables": [ {"indice": 1, "dados": [{"col1": "val1"}]} ], "language": "en", } result = format_to_json(processed_data, "test.pdf") parsed = json.loads(result) assert len(parsed["tabelas"]) == 1 assert parsed["tabelas"][0]["indice"] == 1 # ============================================================================= # TESTES DE FORMATAÇÃO MARKDOWN # ============================================================================= class TestMarkdownFormatter: """Testes para markdown_formatter.py.""" def test_format_to_markdown_basic(self): """Teste de formatação Markdown básica.""" from processors.markdown_formatter import format_to_markdown mock_document = MagicMock() mock_document.export_to_markdown.return_value = "# Conteúdo\n\nTexto aqui." processed_data = { "document": mock_document, "metadata": {"nome_arquivo": "test.pdf", "num_paginas": 3}, "tables": [], "language": "pt", } result = format_to_markdown(processed_data) assert isinstance(result, str) assert "# " in result or "## " in result # Tem headings def test_dict_to_markdown_table(self): """Teste de conversão de dict para tabela MD.""" from processors.markdown_formatter import _dict_to_markdown_table data = [ {"Nome": "Alice", "Idade": 30}, {"Nome": "Bob", "Idade": 25}, ] result = _dict_to_markdown_table(data) assert "| Nome | Idade |" in result assert "| --- | --- |" in result assert "| Alice | 30 |" in result assert "| Bob | 25 |" in result def test_empty_table(self): """Teste com tabela vazia.""" from processors.markdown_formatter import _dict_to_markdown_table result = _dict_to_markdown_table([]) assert "vazia" in result.lower() # ============================================================================= # TESTES DE FILE HANDLER # ============================================================================= class TestFileHandler: """Testes para file_handler.py.""" def test_create_temp_directory(self): """Teste de criação de diretório temporário.""" from utils.file_handler import create_temp_directory temp_dir = create_temp_directory(prefix="test_") try: assert temp_dir.exists() assert temp_dir.is_dir() assert "test_" in temp_dir.name finally: # Cleanup if temp_dir.exists(): import shutil shutil.rmtree(temp_dir) def test_save_output_file(self): """Teste de salvamento de arquivo de saída.""" from utils.file_handler import save_output_file, create_temp_directory temp_dir = create_temp_directory(prefix="test_") try: content = "Conteúdo de teste" output_path = save_output_file(content, "teste.txt", temp_dir) assert output_path.exists() assert output_path.read_text() == content finally: import shutil if temp_dir.exists(): shutil.rmtree(temp_dir) def test_format_size(self): """Teste de formatação de tamanho.""" from utils.file_handler import format_size assert "B" in format_size(500) assert "KB" in format_size(1024 * 5) assert "MB" in format_size(1024 * 1024 * 10) assert "GB" in format_size(1024 * 1024 * 1024 * 2) # ============================================================================= # TESTES DE INTEGRAÇÃO (MOCK) # ============================================================================= class TestDoclingProcessorMock: """Testes do DoclingProcessor com mocks.""" @patch("processors.docling_processor.DocumentConverter") def test_processor_initialization(self, mock_converter_class): """Teste de inicialização do processador.""" from processors.docling_processor import DoclingProcessor processor = DoclingProcessor( enable_ocr=True, enable_table_detection=True, use_gpu=False ) assert processor.enable_ocr is True assert processor.enable_table_detection is True assert processor.use_gpu is False @patch("processors.docling_processor.DocumentConverter") def test_processor_process_document(self, mock_converter_class): """Teste de processamento de documento.""" from processors.docling_processor import DoclingProcessor # Setup mock mock_converter = MagicMock() mock_converter_class.return_value = mock_converter mock_result = MagicMock() mock_document = MagicMock() mock_document.export_to_markdown.return_value = "# Teste" mock_result.document = mock_document mock_converter.convert.return_value = mock_result # Cria arquivo temporário with tempfile.NamedTemporaryFile( mode="wb", suffix=".pdf", delete=False ) as f: f.write(b"%PDF-1.4\n%%EOF\n") temp_path = f.name try: processor = DoclingProcessor() result = processor.process_document(temp_path) assert "document" in result assert "metadata" in result assert "tables" in result assert "language" in result finally: os.unlink(temp_path) # ============================================================================= # EXECUTAR TESTES # ============================================================================= if __name__ == "__main__": pytest.main([__file__, "-v"])