File size: 4,349 Bytes
f884e6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
"""
Service Validation Script for HuggingFace CI/CD

This script validates that all services can be initialized properly
in the HuggingFace environment.
"""

import os
import sys
import traceback
from typing import Tuple

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))


def validate_service(service_name: str, init_func) -> Tuple[bool, str]:
    """
    Validate that a service can be initialized.

    Args:
        service_name: Human-readable name of the service
        init_func: Function that initializes the service

    Returns:
        Tuple of (success, message)
    """
    try:
        init_func()
        return True, f"✅ {service_name}: Initialized successfully"
    except Exception as e:
        error_msg = f"❌ {service_name}: {str(e)}"
        if "mock" in str(e).lower() or "token" in str(e).lower():
            # Expected errors in CI environment
            return (
                True,
                f"⚠️  {service_name}: Expected error in CI (token/auth): {str(e)}",
            )
        return False, error_msg


def validate_hf_embedding_service():
    """Validate HF Embedding Service initialization."""
    from embedding.hf_embedding_service import HFEmbeddingService

    service = HFEmbeddingService()
    return service


def validate_prompt_templates():
    """Validate Prompt Templates."""
    from llm.prompt_templates import PromptTemplates

    template = PromptTemplates.get_policy_qa_template()
    assert template.system_prompt is not None
    assert "CRITICAL" in template.system_prompt  # Check our citation fix
    return template


def validate_search_service():
    """Validate Search Service (if available)."""
    try:
        from services.search_service import SearchService  # noqa: F401

        # Note: SearchService may require vector DB, so just check import
        return "SearchService imported successfully"
    except ImportError:
        return "SearchService not available (expected in some environments)"


def validate_citation_validation():
    """Validate citation validation functionality."""
    from llm.prompt_templates import PromptTemplates

    # Test citation extraction
    test_response = "Based on the policy [Source: remote_work_policy.md], employees can work from home."
    citations = PromptTemplates.extract_citations(test_response)

    assert len(citations) == 1
    assert "remote_work_policy.md" in citations

    return f"Citation extraction working: {citations}"


def validate_context_formatting():
    """Validate the fixed context formatting."""
    from llm.prompt_templates import PromptTemplates

    mock_results = [
        {
            "content": "Test policy content",
            "metadata": {"source_file": "test_policy.md"},
            "similarity_score": 0.95,
        }
    ]

    formatted = PromptTemplates.format_context(mock_results)

    # Check that our fix is working
    assert "SOURCE FILE: test_policy.md" in formatted
    assert "Document 1:" not in formatted  # Old format should be gone

    return "Context formatting fix verified"


def main():
    """Run all service validations."""
    print("🔍 HuggingFace Service Validation")
    print("=" * 40)

    validations = [
        ("HF Embedding Service", validate_hf_embedding_service),
        ("Prompt Templates", validate_prompt_templates),
        ("Search Service", validate_search_service),
        ("Citation Validation", validate_citation_validation),
        ("Context Formatting Fix", validate_context_formatting),
    ]

    results = []
    for name, func in validations:
        success, message = validate_service(name, func)
        results.append((success, message))
        print(message)

    print("\n" + "=" * 40)

    # Summary
    successful = sum(1 for success, _ in results if success)
    total = len(results)

    print(f"Validation Summary: {successful}/{total} passed")

    if successful == total:
        print("🎉 All service validations passed!")
        return 0
    else:
        print("⚠️  Some validations failed.")
        return 1


if __name__ == "__main__":
    try:
        exit_code = main()
        sys.exit(exit_code)
    except Exception as e:
        print(f"❌ Validation script failed: {e}")
        traceback.print_exc()
        sys.exit(1)