#!/usr/bin/env python3
"""
Test script for Hugging Face Spaces deployment validation.

This script tests all the core functionality that might fail in HF Spaces:
1. Package imports
2. Tool creation and execution
3. Agent system functionality
4. Error handling for missing packages
"""

import sys
import traceback
import asyncio
from typing import List, Dict, Any

def test_imports() -> Dict[str, bool]:
    """Test all critical imports."""
    print("🧪 Testing Critical Imports")
    print("=" * 50)
    
    import_results = {}
    
    # Core imports
    critical_imports = [
        ("langchain", "from langchain_core.tools import tool"),
        ("langchain_core", "from langchain_core.messages import BaseMessage"),
        ("langchain_groq", "from langchain_groq import ChatGroq"),
        ("langgraph", "from langgraph.graph import StateGraph"),
        ("pydantic", "from pydantic import BaseModel"),
        ("wikipedia", "import wikipedia"),
        ("arxiv", "import arxiv"),
        ("huggingface_hub", "from huggingface_hub import list_models"),
        ("python_dotenv", "from dotenv import load_dotenv"),
    ]
    
    # Optional imports (with fallbacks)
    optional_imports = [
        ("langchain_tavily", "from langchain_tavily import TavilySearch"),
        ("langfuse", "from langfuse import get_client"),
    ]
    
    # Test critical imports
    for name, import_statement in critical_imports:
        try:
            exec(import_statement)
            import_results[name] = True
            print(f"✅ {name}: OK")
        except Exception as e:
            import_results[name] = False
            print(f"❌ {name}: FAILED - {e}")
    
    # Test optional imports
    for name, import_statement in optional_imports:
        try:
            exec(import_statement)
            import_results[name] = True
            print(f"✅ {name}: OK (optional)")
        except Exception as e:
            import_results[name] = False
            print(f"⚠️  {name}: MISSING (optional) - {e}")
    
    return import_results

def test_tools_creation() -> bool:
    """Test tool creation without errors."""
    print("\n🔧 Testing Tool Creation")
    print("=" * 50)
    
    try:
        from langgraph_tools import get_research_tools, get_code_tools
        
        # Test research tools
        research_tools = get_research_tools()
        print(f"✅ Research tools: {len(research_tools)} tools created")
        for tool in research_tools:
            print(f"   - {tool.name}: {tool.description}")
        
        # Test code tools
        code_tools = get_code_tools()
        print(f"✅ Code tools: {len(code_tools)} tools created")
        for tool in code_tools:
            print(f"   - {tool.name}: {tool.description}")
        
        return True
        
    except Exception as e:
        print(f"❌ Tool creation failed: {e}")
        traceback.print_exc()
        return False

def test_observability() -> bool:
    """Test observability initialization."""
    print("\n📊 Testing Observability")
    print("=" * 50)
    
    try:
        from observability import initialize_observability, get_callback_handler
        
        # Test initialization (should handle missing env vars gracefully)
        success = initialize_observability()
        if success:
            print("✅ Observability initialized successfully")
        else:
            print("⚠️  Observability initialization failed (expected without env vars)")
        
        # Test callback handler
        handler = get_callback_handler()
        if handler:
            print("✅ Callback handler created")
        else:
            print("⚠️  No callback handler (expected without proper setup)")
        
        return True
        
    except Exception as e:
        print(f"❌ Observability test failed: {e}")
        traceback.print_exc()
        return False

async def test_agent_system() -> bool:
    """Test the complete agent system."""
    print("\n🤖 Testing Agent System")
    print("=" * 50)
    
    try:
        from langgraph_agent_system import run_agent_system
        
        # Test simple math question
        print("📝 Testing math question: 'What is 15 + 27?'")
        result = await run_agent_system("What is 15 + 27?", max_iterations=2)
        print(f"📊 Result: {result}")
        
        if result and result.strip() and result != "No answer could be generated.":
            print("✅ Agent system working correctly")
            return True
        else:
            print("⚠️  Agent system returned no answer")
            return False
        
    except Exception as e:
        print(f"❌ Agent system test failed: {e}")
        traceback.print_exc()
        return False

def test_fallback_search() -> bool:
    """Test search functionality with fallbacks."""
    print("\n🔍 Testing Search Fallbacks")
    print("=" * 50)
    
    try:
        from langgraph_tools import wikipedia_search_tool, get_tavily_search_tool
        
        # Test Wikipedia search
        print("📚 Testing Wikipedia search...")
        wiki_result = wikipedia_search_tool.invoke({"query": "Python programming"})
        if wiki_result and len(wiki_result) > 100:
            print("✅ Wikipedia search working")
        else:
            print("⚠️  Wikipedia search returned limited results")
        
        # Test Tavily search (should fallback gracefully)
        print("🌐 Testing web search...")
        tavily_tool = get_tavily_search_tool()
        search_result = tavily_tool.invoke({"query": "current weather"})
        if search_result:
            print("✅ Web search working (with fallback if needed)")
        else:
            print("⚠️  Web search failed")
        
        return True
        
    except Exception as e:
        print(f"❌ Search test failed: {e}")
        traceback.print_exc()
        return False

def main():
    """Run all tests and provide summary."""
    print("🚀 Hugging Face Spaces Deployment Test")
    print("=" * 60)
    
    results = {}
    
    # Run all tests
    results["imports"] = test_imports()
    results["tools"] = test_tools_creation()
    results["observability"] = test_observability()
    results["search"] = test_fallback_search()
    results["agent_system"] = asyncio.run(test_agent_system())
    
    # Summary
    print("\n📋 TEST SUMMARY")
    print("=" * 60)
    
    # Import summary
    import_success = sum(1 for success in results["imports"].values() if success)
    import_total = len(results["imports"])
    print(f"📦 Imports: {import_success}/{import_total} successful")
    
    # Overall summary
    test_results = [
        ("Tools Creation", results["tools"]),
        ("Observability", results["observability"]),
        ("Search Functions", results["search"]),
        ("Agent System", results["agent_system"]),
    ]
    
    for test_name, success in test_results:
        status = "✅ PASS" if success else "❌ FAIL"
        print(f"{test_name}: {status}")
    
    # Final verdict
    all_critical_passed = (
        results["tools"] and 
        results["search"] and 
        results["agent_system"]
    )
    
    if all_critical_passed:
        print("\n🎉 ALL CRITICAL TESTS PASSED - Ready for HF Spaces!")
    else:
        print("\n⚠️  Some tests failed - Check logs above")
        sys.exit(1)

if __name__ == "__main__":
    main()