ai-engineering-project / scripts /validate_services.py
GitHub Action
Clean deployment without binary files
f884e6e
#!/usr/bin/env python3
"""
Service Validation Script for HuggingFace CI/CD
This script validates that all services can be initialized properly
in the HuggingFace environment.
"""
import os
import sys
import traceback
from typing import Tuple
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
def validate_service(service_name: str, init_func) -> Tuple[bool, str]:
"""
Validate that a service can be initialized.
Args:
service_name: Human-readable name of the service
init_func: Function that initializes the service
Returns:
Tuple of (success, message)
"""
try:
init_func()
return True, f"βœ… {service_name}: Initialized successfully"
except Exception as e:
error_msg = f"❌ {service_name}: {str(e)}"
if "mock" in str(e).lower() or "token" in str(e).lower():
# Expected errors in CI environment
return (
True,
f"⚠️ {service_name}: Expected error in CI (token/auth): {str(e)}",
)
return False, error_msg
def validate_hf_embedding_service():
"""Validate HF Embedding Service initialization."""
from embedding.hf_embedding_service import HFEmbeddingService
service = HFEmbeddingService()
return service
def validate_prompt_templates():
"""Validate Prompt Templates."""
from llm.prompt_templates import PromptTemplates
template = PromptTemplates.get_policy_qa_template()
assert template.system_prompt is not None
assert "CRITICAL" in template.system_prompt # Check our citation fix
return template
def validate_search_service():
"""Validate Search Service (if available)."""
try:
from services.search_service import SearchService # noqa: F401
# Note: SearchService may require vector DB, so just check import
return "SearchService imported successfully"
except ImportError:
return "SearchService not available (expected in some environments)"
def validate_citation_validation():
"""Validate citation validation functionality."""
from llm.prompt_templates import PromptTemplates
# Test citation extraction
test_response = "Based on the policy [Source: remote_work_policy.md], employees can work from home."
citations = PromptTemplates.extract_citations(test_response)
assert len(citations) == 1
assert "remote_work_policy.md" in citations
return f"Citation extraction working: {citations}"
def validate_context_formatting():
"""Validate the fixed context formatting."""
from llm.prompt_templates import PromptTemplates
mock_results = [
{
"content": "Test policy content",
"metadata": {"source_file": "test_policy.md"},
"similarity_score": 0.95,
}
]
formatted = PromptTemplates.format_context(mock_results)
# Check that our fix is working
assert "SOURCE FILE: test_policy.md" in formatted
assert "Document 1:" not in formatted # Old format should be gone
return "Context formatting fix verified"
def main():
"""Run all service validations."""
print("πŸ” HuggingFace Service Validation")
print("=" * 40)
validations = [
("HF Embedding Service", validate_hf_embedding_service),
("Prompt Templates", validate_prompt_templates),
("Search Service", validate_search_service),
("Citation Validation", validate_citation_validation),
("Context Formatting Fix", validate_context_formatting),
]
results = []
for name, func in validations:
success, message = validate_service(name, func)
results.append((success, message))
print(message)
print("\n" + "=" * 40)
# Summary
successful = sum(1 for success, _ in results if success)
total = len(results)
print(f"Validation Summary: {successful}/{total} passed")
if successful == total:
print("πŸŽ‰ All service validations passed!")
return 0
else:
print("⚠️ Some validations failed.")
return 1
if __name__ == "__main__":
try:
exit_code = main()
sys.exit(exit_code)
except Exception as e:
print(f"❌ Validation script failed: {e}")
traceback.print_exc()
sys.exit(1)