Spaces:

msse-team-3
/

ai-engineering-project

Sleeping

File size: 5,384 Bytes

f884e6e

"""
Citation validation tests for CI/CD pipeline.

These tests ensure that the citation system works properly and
prevents hallucination of document names in responses.
"""

import os
import sys

import pytest

# Add src to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))


@pytest.mark.citation
def test_citation_fix_implementation():
    """Test that citation fix is properly implemented in prompt templates."""
    from llm.prompt_templates import PromptTemplates

    # Test that system prompt contains citation fix
    system_prompt = PromptTemplates.SYSTEM_PROMPT

    # Should contain explicit instructions to prevent document_X.md citations
    assert "EXACT filename" in system_prompt or "exact filename" in system_prompt
    assert "NEVER use generic names" in system_prompt  # Updated to match actual prompt content

    # Should specifically mention avoiding document_1.md style citations
    assert "document_1.md" in system_prompt or "document_X" in system_prompt

    print("✓ Citation fix properly implemented in prompt templates")


@pytest.mark.citation
def test_citation_extraction_accuracy():
    """Test that citation extraction works correctly."""
    from llm.prompt_templates import PromptTemplates

    # Test single citation with proper format
    test_response = "Based on the remote work policy [Source: remote_work_policy.md], " "employees can work remotely."
    citations = PromptTemplates.extract_citations(test_response)
    assert "remote_work_policy.md" in citations

    # Test multiple citations
    test_response_multi = (
        "According to [Source: employee_handbook.md] and " "[Source: remote_work_policy.md], the policies are clear."
    )
    citations = PromptTemplates.extract_citations(test_response_multi)
    assert len(citations) == 2
    assert "employee_handbook.md" in citations
    assert "remote_work_policy.md" in citations

    print("✓ Citation extraction working correctly")


@pytest.mark.citation
def test_citation_hallucination_prevention():
    """Test that citation hallucination prevention is working."""
    from llm.prompt_templates import PromptTemplates

    # Test that system prompt specifically prevents document_1.md style citations
    system_prompt = PromptTemplates.SYSTEM_PROMPT

    # Should contain instructions against generic document names
    assert "document_1.md" in system_prompt
    assert "real filenames" in system_prompt or "exact filename" in system_prompt

    # Test citation validation functionality
    hallucinated_response = "According to [Source: document_1.md] and [Source: document_2.md], " "this is policy."
    available_sources = ["remote_work_policy.md", "employee_handbook.md"]

    validation = PromptTemplates.validate_citations(hallucinated_response, available_sources)

    # Should detect that document_1.md and document_2.md are invalid
    assert validation.get("document_1.md", True) is False
    assert validation.get("document_2.md", True) is False

    print("✓ Citation hallucination prevention working")


@pytest.mark.citation
def test_citation_end_to_end_pipeline():
    """Test that E2E pipeline validation script works."""
    import os
    import subprocess

    script_path = os.path.join(os.path.dirname(__file__), "..", "scripts", "test_e2e_pipeline.py")

    if os.path.exists(script_path):
        # Run the E2E pipeline test script
        result = subprocess.run([sys.executable, script_path], capture_output=True, text=True, timeout=30)

        # Should run without errors (exit code 0 or basic validation pass)
        assert result.returncode in [0, 1], f"E2E pipeline test failed: {result.stderr}"

        print("✓ E2E pipeline validation script executable")
    else:
        print("⚠ E2E pipeline script not found - creating placeholder test")
        # Basic validation that our citation system works
        from llm.prompt_templates import PromptTemplates

        template = PromptTemplates.get_policy_qa_template()
        assert template.system_prompt is not None
        assert "filename" in template.citation_format


@pytest.mark.citation
def test_citation_validation_service():
    """Test that service validation script works."""
    import os
    import subprocess

    script_path = os.path.join(os.path.dirname(__file__), "..", "scripts", "validate_services.py")

    if os.path.exists(script_path):
        # Run the service validation script
        result = subprocess.run([sys.executable, script_path], capture_output=True, text=True, timeout=30)

        # Should run without critical errors
        assert result.returncode in [
            0,
            1,
        ], f"Service validation failed: {result.stderr}"

        print("✓ Service validation script executable")
    else:
        print("⚠ Service validation script not found - creating placeholder test")
        # Basic validation of citation functionality
        from llm.prompt_templates import PromptTemplates

        # Test that format_context works
        mock_results = [
            {
                "content": "Test content",
                "metadata": {"source_file": "test.md"},
                "similarity_score": 0.9,
            }
        ]

        formatted = PromptTemplates.format_context(mock_results)
        assert "test.md" in formatted
        assert "Test content" in formatted