File size: 7,427 Bytes
df7388a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python3
"""
Test script for Hugging Face Spaces deployment validation.

This script tests all the core functionality that might fail in HF Spaces:
1. Package imports
2. Tool creation and execution
3. Agent system functionality
4. Error handling for missing packages
"""

import sys
import traceback
import asyncio
from typing import List, Dict, Any

def test_imports() -> Dict[str, bool]:
    """Test all critical imports."""
    print("πŸ§ͺ Testing Critical Imports")
    print("=" * 50)
    
    import_results = {}
    
    # Core imports
    critical_imports = [
        ("langchain", "from langchain_core.tools import tool"),
        ("langchain_core", "from langchain_core.messages import BaseMessage"),
        ("langchain_groq", "from langchain_groq import ChatGroq"),
        ("langgraph", "from langgraph.graph import StateGraph"),
        ("pydantic", "from pydantic import BaseModel"),
        ("wikipedia", "import wikipedia"),
        ("arxiv", "import arxiv"),
        ("huggingface_hub", "from huggingface_hub import list_models"),
        ("python_dotenv", "from dotenv import load_dotenv"),
    ]
    
    # Optional imports (with fallbacks)
    optional_imports = [
        ("langchain_tavily", "from langchain_tavily import TavilySearch"),
        ("langfuse", "from langfuse import get_client"),
    ]
    
    # Test critical imports
    for name, import_statement in critical_imports:
        try:
            exec(import_statement)
            import_results[name] = True
            print(f"βœ… {name}: OK")
        except Exception as e:
            import_results[name] = False
            print(f"❌ {name}: FAILED - {e}")
    
    # Test optional imports
    for name, import_statement in optional_imports:
        try:
            exec(import_statement)
            import_results[name] = True
            print(f"βœ… {name}: OK (optional)")
        except Exception as e:
            import_results[name] = False
            print(f"⚠️  {name}: MISSING (optional) - {e}")
    
    return import_results

def test_tools_creation() -> bool:
    """Test tool creation without errors."""
    print("\nπŸ”§ Testing Tool Creation")
    print("=" * 50)
    
    try:
        from langgraph_tools import get_research_tools, get_code_tools
        
        # Test research tools
        research_tools = get_research_tools()
        print(f"βœ… Research tools: {len(research_tools)} tools created")
        for tool in research_tools:
            print(f"   - {tool.name}: {tool.description}")
        
        # Test code tools
        code_tools = get_code_tools()
        print(f"βœ… Code tools: {len(code_tools)} tools created")
        for tool in code_tools:
            print(f"   - {tool.name}: {tool.description}")
        
        return True
        
    except Exception as e:
        print(f"❌ Tool creation failed: {e}")
        traceback.print_exc()
        return False

def test_observability() -> bool:
    """Test observability initialization."""
    print("\nπŸ“Š Testing Observability")
    print("=" * 50)
    
    try:
        from observability import initialize_observability, get_callback_handler
        
        # Test initialization (should handle missing env vars gracefully)
        success = initialize_observability()
        if success:
            print("βœ… Observability initialized successfully")
        else:
            print("⚠️  Observability initialization failed (expected without env vars)")
        
        # Test callback handler
        handler = get_callback_handler()
        if handler:
            print("βœ… Callback handler created")
        else:
            print("⚠️  No callback handler (expected without proper setup)")
        
        return True
        
    except Exception as e:
        print(f"❌ Observability test failed: {e}")
        traceback.print_exc()
        return False

async def test_agent_system() -> bool:
    """Test the complete agent system."""
    print("\nπŸ€– Testing Agent System")
    print("=" * 50)
    
    try:
        from langgraph_agent_system import run_agent_system
        
        # Test simple math question
        print("πŸ“ Testing math question: 'What is 15 + 27?'")
        result = await run_agent_system("What is 15 + 27?", max_iterations=2)
        print(f"πŸ“Š Result: {result}")
        
        if result and result.strip() and result != "No answer could be generated.":
            print("βœ… Agent system working correctly")
            return True
        else:
            print("⚠️  Agent system returned no answer")
            return False
        
    except Exception as e:
        print(f"❌ Agent system test failed: {e}")
        traceback.print_exc()
        return False

def test_fallback_search() -> bool:
    """Test search functionality with fallbacks."""
    print("\nπŸ” Testing Search Fallbacks")
    print("=" * 50)
    
    try:
        from langgraph_tools import wikipedia_search_tool, get_tavily_search_tool
        
        # Test Wikipedia search
        print("πŸ“š Testing Wikipedia search...")
        wiki_result = wikipedia_search_tool.invoke({"query": "Python programming"})
        if wiki_result and len(wiki_result) > 100:
            print("βœ… Wikipedia search working")
        else:
            print("⚠️  Wikipedia search returned limited results")
        
        # Test Tavily search (should fallback gracefully)
        print("🌐 Testing web search...")
        tavily_tool = get_tavily_search_tool()
        search_result = tavily_tool.invoke({"query": "current weather"})
        if search_result:
            print("βœ… Web search working (with fallback if needed)")
        else:
            print("⚠️  Web search failed")
        
        return True
        
    except Exception as e:
        print(f"❌ Search test failed: {e}")
        traceback.print_exc()
        return False

def main():
    """Run all tests and provide summary."""
    print("πŸš€ Hugging Face Spaces Deployment Test")
    print("=" * 60)
    
    results = {}
    
    # Run all tests
    results["imports"] = test_imports()
    results["tools"] = test_tools_creation()
    results["observability"] = test_observability()
    results["search"] = test_fallback_search()
    results["agent_system"] = asyncio.run(test_agent_system())
    
    # Summary
    print("\nπŸ“‹ TEST SUMMARY")
    print("=" * 60)
    
    # Import summary
    import_success = sum(1 for success in results["imports"].values() if success)
    import_total = len(results["imports"])
    print(f"πŸ“¦ Imports: {import_success}/{import_total} successful")
    
    # Overall summary
    test_results = [
        ("Tools Creation", results["tools"]),
        ("Observability", results["observability"]),
        ("Search Functions", results["search"]),
        ("Agent System", results["agent_system"]),
    ]
    
    for test_name, success in test_results:
        status = "βœ… PASS" if success else "❌ FAIL"
        print(f"{test_name}: {status}")
    
    # Final verdict
    all_critical_passed = (
        results["tools"] and 
        results["search"] and 
        results["agent_system"]
    )
    
    if all_critical_passed:
        print("\nπŸŽ‰ ALL CRITICAL TESTS PASSED - Ready for HF Spaces!")
    else:
        print("\n⚠️  Some tests failed - Check logs above")
        sys.exit(1)

if __name__ == "__main__":
    main()