"""Tests for document ingestion."""

import pytest
from pathlib import Path
import tempfile
from core.ingest import DocumentLoader, HierarchicalClassifier, DocumentProcessor


def test_document_loader_txt():
    """Test loading text files."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write("Test content for document loader.")
        temp_path = f.name
    
    try:
        loader = DocumentLoader()
        content, metadata = loader.load_txt(temp_path)
        
        assert "Test content" in content
        assert metadata["format"] == "txt"
        assert "source_name" in metadata
    finally:
        Path(temp_path).unlink()


def test_document_loader_unsupported():
    """Test handling of unsupported file formats."""
    with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as f:
        temp_path = f.name
    
    try:
        loader = DocumentLoader()
        with pytest.raises(ValueError):
            loader.load(temp_path)
    finally:
        Path(temp_path).unlink()


def test_hierarchical_classifier():
    """Test hierarchical classification."""
    classifier = HierarchicalClassifier("hospital")
    
    text = "Patient admission procedures and clinical protocols for emergency care."
    classification = classifier.classify_text(text)
    
    assert "level1" in classification
    assert "level2" in classification
    assert "level3" in classification
    assert "doc_type" in classification


def test_document_processor():
    """Test document processing pipeline."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write("Medical policy for patient care. " * 100)  # Create substantial content
        temp_path = f.name
    
    try:
        processor = DocumentProcessor(
            hierarchy_name="hospital",
            chunk_size=256,
            chunk_overlap=50
        )
        
        chunks = processor.process_document(temp_path)
        
        assert len(chunks) > 0
        assert "text" in chunks[0]
        assert "metadata" in chunks[0]
        assert "chunk_id" in chunks[0]["metadata"]
        assert "level1" in chunks[0]["metadata"]
    finally:
        Path(temp_path).unlink()


def test_chunk_consistency():
    """Test that chunks maintain metadata consistency."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write("Hospital emergency procedures. " * 50)
        temp_path = f.name
    
    try:
        processor = DocumentProcessor("hospital", chunk_size=200, chunk_overlap=20)
        chunks = processor.process_document(temp_path)
        
        # All chunks should have same doc_id
        doc_ids = [c["metadata"]["doc_id"] for c in chunks]
        assert len(set(doc_ids)) == 1
        
        # Chunk indices should be sequential
        indices = [c["metadata"]["chunk_index"] for c in chunks]
        assert indices == list(range(len(chunks)))
    finally:
        Path(temp_path).unlink()