"""
Testes para processamento de documentos.
"""

import pytest
import tempfile
from pathlib import Path
from src.document_processing import DocumentProcessor


class TestDocumentProcessor:
    """Testes para classe DocumentProcessor."""

    @pytest.fixture
    def processor(self):
        """Instancia de DocumentProcessor."""
        return DocumentProcessor()

    def test_extract_text_from_txt(self, processor):
        """Testa extracao de texto de arquivo TXT."""
        # Criar arquivo temporario
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
            f.write("Este e um texto de teste.\nCom multiplas linhas.")
            temp_path = f.name

        try:
            text = processor.extract_text(temp_path)
            assert "Este e um texto de teste" in text
            assert "Com multiplas linhas" in text
        finally:
            Path(temp_path).unlink()

    def test_extract_text_from_nonexistent_file(self, processor):
        """Testa extracao de arquivo que nao existe."""
        with pytest.raises(FileNotFoundError):
            processor.extract_text("/caminho/inexistente.txt")

    def test_detect_file_type_txt(self, processor):
        """Testa deteccao de tipo TXT."""
        assert processor.detect_file_type("documento.txt") == "TXT"
        assert processor.detect_file_type("arquivo.text") == "TXT"

    def test_detect_file_type_pdf(self, processor):
        """Testa deteccao de tipo PDF."""
        assert processor.detect_file_type("documento.pdf") == "PDF"
        assert processor.detect_file_type("ARQUIVO.PDF") == "PDF"

    def test_detect_file_type_md(self, processor):
        """Testa deteccao de tipo Markdown."""
        assert processor.detect_file_type("readme.md") == "MD"
        assert processor.detect_file_type("docs.markdown") == "MD"

    def test_detect_file_type_unknown(self, processor):
        """Testa deteccao de tipo desconhecido."""
        assert processor.detect_file_type("arquivo.xyz") == "UNKNOWN"

    def test_clean_text(self, processor):
        """Testa limpeza de texto."""
        dirty_text = "  Texto   com    espacos  \n\n\n  multiplos  "
        clean_text = processor.clean_text(dirty_text)

        assert "  " not in clean_text
        assert "\n\n\n" not in clean_text
        assert clean_text.strip() == clean_text

    def test_clean_text_empty(self, processor):
        """Testa limpeza de texto vazio."""
        assert processor.clean_text("") == ""
        assert processor.clean_text("   ") == ""

    def test_get_text_stats(self, processor):
        """Testa calculo de estatisticas de texto."""
        text = "Este e um texto de teste. Tem varias palavras e caracteres."

        stats = processor.get_text_stats(text)

        assert stats['num_chars'] > 0
        assert stats['num_words'] > 0
        assert stats['num_lines'] >= 1
        assert stats['num_chars'] == len(text)

    def test_get_text_stats_empty(self, processor):
        """Testa estatisticas de texto vazio."""
        stats = processor.get_text_stats("")

        assert stats['num_chars'] == 0
        assert stats['num_words'] == 0
        assert stats['num_lines'] == 0

    def test_split_into_sentences(self, processor):
        """Testa divisao em sentencas."""
        text = "Esta e a primeira sentenca. Esta e a segunda. E esta e a terceira!"

        sentences = processor.split_into_sentences(text)

        assert len(sentences) == 3
        assert "primeira" in sentences[0]
        assert "segunda" in sentences[1]
        assert "terceira" in sentences[2]

    def test_extract_metadata_from_filename(self, processor):
        """Testa extracao de metadata do nome do arquivo."""
        metadata = processor.extract_metadata_from_filename("documento_importante_2026.pdf")

        assert metadata['file_type'] == "PDF"
        assert '2026' in metadata.get('filename', '')

    def test_process_file_txt(self, processor):
        """Testa processamento completo de arquivo TXT."""
        # Criar arquivo temporario
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
            f.write("Conteudo do arquivo de teste.")
            temp_path = f.name

        try:
            result = processor.process_file(temp_path)

            assert result['text'] is not None
            assert result['file_type'] == "TXT"
            assert result['stats']['num_chars'] > 0
            assert 'Conteudo do arquivo' in result['text']
        finally:
            Path(temp_path).unlink()


if __name__ == "__main__":
    pytest.main([__file__, "-v"])