File size: 12,436 Bytes
780413d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
"""
Testes unitários para os processadores e validadores.

Execute com: python -m pytest tests/test_processors.py -v
"""

import json
import os
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

# Adiciona o diretório pai ao path
sys.path.insert(0, str(Path(__file__).parent.parent))

import config
from utils.validators import (
    ValidationError,
    sanitize_filename,
    validate_file_count,
    validate_file_size,
)


# =============================================================================
# FIXTURES
# =============================================================================

@pytest.fixture
def temp_file():
    """Cria um arquivo temporário para testes."""
    with tempfile.NamedTemporaryFile(
        mode="wb",
        suffix=".pdf",
        delete=False
    ) as f:
        # Escreve conteúdo mínimo de PDF
        f.write(b"%PDF-1.4\n")
        f.write(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n")
        f.write(b"%%EOF\n")
        temp_path = f.name

    yield temp_path

    # Cleanup
    if os.path.exists(temp_path):
        os.unlink(temp_path)


@pytest.fixture
def large_temp_file():
    """Cria um arquivo temporário grande (> limite)."""
    with tempfile.NamedTemporaryFile(
        mode="wb",
        suffix=".pdf",
        delete=False
    ) as f:
        # Escreve mais que o limite
        f.write(b"X" * (config.MAX_FILE_SIZE_BYTES + 1000))
        temp_path = f.name

    yield temp_path

    if os.path.exists(temp_path):
        os.unlink(temp_path)


@pytest.fixture
def empty_temp_file():
    """Cria um arquivo temporário vazio."""
    with tempfile.NamedTemporaryFile(
        mode="wb",
        suffix=".pdf",
        delete=False
    ) as f:
        temp_path = f.name

    yield temp_path

    if os.path.exists(temp_path):
        os.unlink(temp_path)


# =============================================================================
# TESTES DE VALIDAÇÃO
# =============================================================================

class TestValidateFileCount:
    """Testes para validate_file_count()."""

    def test_valid_count_single(self):
        """Teste com um arquivo."""
        assert validate_file_count([1]) is True

    def test_valid_count_multiple(self):
        """Teste com múltiplos arquivos dentro do limite."""
        files = list(range(config.MAX_FILES_PER_SESSION))
        assert validate_file_count(files) is True

    def test_empty_list_raises(self):
        """Teste com lista vazia deve falhar."""
        with pytest.raises(ValidationError) as exc_info:
            validate_file_count([])

        assert exc_info.value.error_code == "NO_FILES"

    def test_too_many_files_raises(self):
        """Teste com arquivos demais deve falhar."""
        files = list(range(config.MAX_FILES_PER_SESSION + 1))

        with pytest.raises(ValidationError) as exc_info:
            validate_file_count(files)

        assert exc_info.value.error_code == "TOO_MANY_FILES"


class TestValidateFileSize:
    """Testes para validate_file_size()."""

    def test_valid_size(self, temp_file):
        """Teste com arquivo de tamanho válido."""
        assert validate_file_size(temp_file) is True

    def test_file_too_large(self, large_temp_file):
        """Teste com arquivo muito grande."""
        with pytest.raises(ValidationError) as exc_info:
            validate_file_size(large_temp_file)

        assert exc_info.value.error_code == "FILE_TOO_LARGE"

    def test_empty_file(self, empty_temp_file):
        """Teste com arquivo vazio."""
        with pytest.raises(ValidationError) as exc_info:
            validate_file_size(empty_temp_file)

        assert exc_info.value.error_code == "EMPTY_FILE"

    def test_file_not_found(self):
        """Teste com arquivo inexistente."""
        with pytest.raises(ValidationError) as exc_info:
            validate_file_size("/caminho/inexistente/arquivo.pdf")

        assert exc_info.value.error_code == "FILE_NOT_FOUND"


class TestSanitizeFilename:
    """Testes para sanitize_filename()."""

    def test_normal_filename(self):
        """Teste com nome normal."""
        assert sanitize_filename("documento.pdf") == "documento.pdf"

    def test_special_characters(self):
        """Teste com caracteres especiais."""
        result = sanitize_filename("doc<>:test.pdf")
        assert "<" not in result
        assert ">" not in result
        assert ":" not in result

    def test_spaces(self):
        """Teste com espaços."""
        result = sanitize_filename("meu documento.pdf")
        assert result == "meu_documento.pdf"

    def test_multiple_underscores(self):
        """Teste com underscores múltiplos."""
        result = sanitize_filename("doc___test.pdf")
        assert "___" not in result

    def test_empty_filename(self):
        """Teste com nome vazio."""
        result = sanitize_filename("")
        assert result == "arquivo_sem_nome"

    def test_long_filename(self):
        """Teste com nome muito longo."""
        long_name = "a" * 300 + ".pdf"
        result = sanitize_filename(long_name)
        assert len(result) <= config.FILENAME_MAX_LENGTH


# =============================================================================
# TESTES DE FORMATAÇÃO JSON
# =============================================================================

class TestJSONFormatter:
    """Testes para json_formatter.py."""

    def test_format_to_json_basic(self):
        """Teste de formatação JSON básica."""
        from processors.json_formatter import format_to_json

        # Mock de dados processados
        mock_document = MagicMock()
        mock_document.export_to_dict.return_value = {"content": "teste"}

        processed_data = {
            "document": mock_document,
            "metadata": {"nome_arquivo": "test.pdf"},
            "tables": [],
            "language": "pt",
        }

        result = format_to_json(processed_data, "test.pdf")

        assert isinstance(result, str)

        parsed = json.loads(result)
        assert parsed["arquivo"] == "test.pdf"
        assert parsed["idioma"] == "pt"
        assert "processado_em" in parsed

    def test_format_to_json_with_tables(self):
        """Teste de formatação JSON com tabelas."""
        from processors.json_formatter import format_to_json

        mock_document = MagicMock()
        mock_document.export_to_dict.return_value = {}

        processed_data = {
            "document": mock_document,
            "metadata": {},
            "tables": [
                {"indice": 1, "dados": [{"col1": "val1"}]}
            ],
            "language": "en",
        }

        result = format_to_json(processed_data, "test.pdf")
        parsed = json.loads(result)

        assert len(parsed["tabelas"]) == 1
        assert parsed["tabelas"][0]["indice"] == 1


# =============================================================================
# TESTES DE FORMATAÇÃO MARKDOWN
# =============================================================================

class TestMarkdownFormatter:
    """Testes para markdown_formatter.py."""

    def test_format_to_markdown_basic(self):
        """Teste de formatação Markdown básica."""
        from processors.markdown_formatter import format_to_markdown

        mock_document = MagicMock()
        mock_document.export_to_markdown.return_value = "# Conteúdo\n\nTexto aqui."

        processed_data = {
            "document": mock_document,
            "metadata": {"nome_arquivo": "test.pdf", "num_paginas": 3},
            "tables": [],
            "language": "pt",
        }

        result = format_to_markdown(processed_data)

        assert isinstance(result, str)
        assert "# " in result or "## " in result  # Tem headings

    def test_dict_to_markdown_table(self):
        """Teste de conversão de dict para tabela MD."""
        from processors.markdown_formatter import _dict_to_markdown_table

        data = [
            {"Nome": "Alice", "Idade": 30},
            {"Nome": "Bob", "Idade": 25},
        ]

        result = _dict_to_markdown_table(data)

        assert "| Nome | Idade |" in result
        assert "| --- | --- |" in result
        assert "| Alice | 30 |" in result
        assert "| Bob | 25 |" in result

    def test_empty_table(self):
        """Teste com tabela vazia."""
        from processors.markdown_formatter import _dict_to_markdown_table

        result = _dict_to_markdown_table([])
        assert "vazia" in result.lower()


# =============================================================================
# TESTES DE FILE HANDLER
# =============================================================================

class TestFileHandler:
    """Testes para file_handler.py."""

    def test_create_temp_directory(self):
        """Teste de criação de diretório temporário."""
        from utils.file_handler import create_temp_directory

        temp_dir = create_temp_directory(prefix="test_")

        try:
            assert temp_dir.exists()
            assert temp_dir.is_dir()
            assert "test_" in temp_dir.name
        finally:
            # Cleanup
            if temp_dir.exists():
                import shutil
                shutil.rmtree(temp_dir)

    def test_save_output_file(self):
        """Teste de salvamento de arquivo de saída."""
        from utils.file_handler import save_output_file, create_temp_directory

        temp_dir = create_temp_directory(prefix="test_")

        try:
            content = "Conteúdo de teste"
            output_path = save_output_file(content, "teste.txt", temp_dir)

            assert output_path.exists()
            assert output_path.read_text() == content
        finally:
            import shutil
            if temp_dir.exists():
                shutil.rmtree(temp_dir)

    def test_format_size(self):
        """Teste de formatação de tamanho."""
        from utils.file_handler import format_size

        assert "B" in format_size(500)
        assert "KB" in format_size(1024 * 5)
        assert "MB" in format_size(1024 * 1024 * 10)
        assert "GB" in format_size(1024 * 1024 * 1024 * 2)


# =============================================================================
# TESTES DE INTEGRAÇÃO (MOCK)
# =============================================================================

class TestDoclingProcessorMock:
    """Testes do DoclingProcessor com mocks."""

    @patch("processors.docling_processor.DocumentConverter")
    def test_processor_initialization(self, mock_converter_class):
        """Teste de inicialização do processador."""
        from processors.docling_processor import DoclingProcessor

        processor = DoclingProcessor(
            enable_ocr=True,
            enable_table_detection=True,
            use_gpu=False
        )

        assert processor.enable_ocr is True
        assert processor.enable_table_detection is True
        assert processor.use_gpu is False

    @patch("processors.docling_processor.DocumentConverter")
    def test_processor_process_document(self, mock_converter_class):
        """Teste de processamento de documento."""
        from processors.docling_processor import DoclingProcessor

        # Setup mock
        mock_converter = MagicMock()
        mock_converter_class.return_value = mock_converter

        mock_result = MagicMock()
        mock_document = MagicMock()
        mock_document.export_to_markdown.return_value = "# Teste"
        mock_result.document = mock_document

        mock_converter.convert.return_value = mock_result

        # Cria arquivo temporário
        with tempfile.NamedTemporaryFile(
            mode="wb",
            suffix=".pdf",
            delete=False
        ) as f:
            f.write(b"%PDF-1.4\n%%EOF\n")
            temp_path = f.name

        try:
            processor = DoclingProcessor()
            result = processor.process_document(temp_path)

            assert "document" in result
            assert "metadata" in result
            assert "tables" in result
            assert "language" in result
        finally:
            os.unlink(temp_path)


# =============================================================================
# EXECUTAR TESTES
# =============================================================================

if __name__ == "__main__":
    pytest.main([__file__, "-v"])