Spaces:

msse-team-3
/

ai-engineering-project

Sleeping

ai-engineering-project / scripts /validate_services.py

GitHub Action

Clean deployment without binary files

f884e6e 2 months ago

4.35 kB

	#!/usr/bin/env python3
	"""
	Service Validation Script for HuggingFace CI/CD

	This script validates that all services can be initialized properly
	in the HuggingFace environment.
	"""

	import os
	import sys
	import traceback
	from typing import Tuple

	# Add src to path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))


	def validate_service(service_name: str, init_func) -> Tuple[bool, str]:
	"""
	Validate that a service can be initialized.

	Args:
	service_name: Human-readable name of the service
	init_func: Function that initializes the service

	Returns:
	Tuple of (success, message)
	"""
	try:
	init_func()
	return True, f"✅ {service_name}: Initialized successfully"
	except Exception as e:
	error_msg = f"❌ {service_name}: {str(e)}"
	if "mock" in str(e).lower() or "token" in str(e).lower():
	# Expected errors in CI environment
	return (
	True,
	f"⚠️ {service_name}: Expected error in CI (token/auth): {str(e)}",
	)
	return False, error_msg


	def validate_hf_embedding_service():
	"""Validate HF Embedding Service initialization."""
	from embedding.hf_embedding_service import HFEmbeddingService

	service = HFEmbeddingService()
	return service


	def validate_prompt_templates():
	"""Validate Prompt Templates."""
	from llm.prompt_templates import PromptTemplates

	template = PromptTemplates.get_policy_qa_template()
	assert template.system_prompt is not None
	assert "CRITICAL" in template.system_prompt # Check our citation fix
	return template


	def validate_search_service():
	"""Validate Search Service (if available)."""
	try:
	from services.search_service import SearchService # noqa: F401

	# Note: SearchService may require vector DB, so just check import
	return "SearchService imported successfully"
	except ImportError:
	return "SearchService not available (expected in some environments)"


	def validate_citation_validation():
	"""Validate citation validation functionality."""
	from llm.prompt_templates import PromptTemplates

	# Test citation extraction
	test_response = "Based on the policy [Source: remote_work_policy.md], employees can work from home."
	citations = PromptTemplates.extract_citations(test_response)

	assert len(citations) == 1
	assert "remote_work_policy.md" in citations

	return f"Citation extraction working: {citations}"


	def validate_context_formatting():
	"""Validate the fixed context formatting."""
	from llm.prompt_templates import PromptTemplates

	mock_results = [
	{
	"content": "Test policy content",
	"metadata": {"source_file": "test_policy.md"},
	"similarity_score": 0.95,
	}
	]

	formatted = PromptTemplates.format_context(mock_results)

	# Check that our fix is working
	assert "SOURCE FILE: test_policy.md" in formatted
	assert "Document 1:" not in formatted # Old format should be gone

	return "Context formatting fix verified"


	def main():
	"""Run all service validations."""
	print("🔍 HuggingFace Service Validation")
	print("=" * 40)

	validations = [
	("HF Embedding Service", validate_hf_embedding_service),
	("Prompt Templates", validate_prompt_templates),
	("Search Service", validate_search_service),
	("Citation Validation", validate_citation_validation),
	("Context Formatting Fix", validate_context_formatting),
	]

	results = []
	for name, func in validations:
	success, message = validate_service(name, func)
	results.append((success, message))
	print(message)

	print("\n" + "=" * 40)

	# Summary
	successful = sum(1 for success, _ in results if success)
	total = len(results)

	print(f"Validation Summary: {successful}/{total} passed")

	if successful == total:
	print("🎉 All service validations passed!")
	return 0
	else:
	print("⚠️ Some validations failed.")
	return 1


	if __name__ == "__main__":
	try:
	exit_code = main()
	sys.exit(exit_code)
	except Exception as e:
	print(f"❌ Validation script failed: {e}")
	traceback.print_exc()
	sys.exit(1)