Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script to verify GAIA agent setup and functionality. | |
| """ | |
| from agent import GAIAAgent | |
| from tools import web_search, read_file, calculate_simple_math | |
| def test_api_connection(): | |
| """Test xAI API connection.""" | |
| print("Testing xAI API connection...") | |
| agent = GAIAAgent() | |
| try: | |
| response = agent.test_grok() | |
| print(f"API Response: {response}") | |
| if "error" in response.lower(): | |
| print("β API test failed") | |
| return False | |
| else: | |
| print("β API connection successful") | |
| return True | |
| except Exception as e: | |
| print(f"β API test error: {e}") | |
| return False | |
| def test_basic_reasoning(): | |
| """Test basic reasoning capabilities.""" | |
| print("\nTesting basic reasoning...") | |
| agent = GAIAAgent() | |
| test_cases = [ | |
| { | |
| "task_id": "test_math", | |
| "question": "What is 25 + 17?", | |
| "expected": "42" | |
| }, | |
| { | |
| "task_id": "test_general", | |
| "question": "What is the capital of Japan?", | |
| "expected": "tokyo" | |
| } | |
| ] | |
| for test_case in test_cases: | |
| print(f"\nTest: {test_case['question']}") | |
| try: | |
| response = agent.process_task(test_case) | |
| predicted = agent.extract_final_answer(response) | |
| print(f"Response: {predicted}") | |
| # Simple comparison | |
| if test_case['expected'].lower() in predicted.lower(): | |
| print("β Test passed") | |
| else: | |
| print("β Test failed") | |
| except Exception as e: | |
| print(f"β Test error: {e}") | |
| def test_tools(): | |
| """Test individual tools.""" | |
| print("\nTesting tools...") | |
| # Test math calculation | |
| print("\n1. Testing math calculation:") | |
| result = calculate_simple_math("15 + 27") | |
| print(f"15 + 27 = {result}") | |
| # Test web search (fallback) | |
| print("\n2. Testing web search:") | |
| search_result = web_search("capital of France", None) | |
| print(f"Search result: {search_result[:100]}...") | |
| # Test file reading (with non-existent file) | |
| print("\n3. Testing file reading:") | |
| file_result = read_file("nonexistent.txt") | |
| print(f"File read result: {file_result}") | |
| def test_sample_task(): | |
| """Test with a sample GAIA-like task.""" | |
| print("\nTesting sample GAIA task...") | |
| agent = GAIAAgent() | |
| sample_task = { | |
| "task_id": "sample_test", | |
| "question": "If a store has 150 apples and sells 87 of them, how many apples are left?", | |
| "answer": "63", | |
| "file_name": None | |
| } | |
| try: | |
| print(f"Question: {sample_task['question']}") | |
| response = agent.process_task(sample_task) | |
| predicted = agent.extract_final_answer(response) | |
| expected = sample_task['answer'] | |
| print(f"Expected: {expected}") | |
| print(f"Predicted: {predicted}") | |
| if predicted.strip() == expected: | |
| print("β Sample task passed") | |
| else: | |
| print("β Sample task failed") | |
| except Exception as e: | |
| print(f"β Sample task error: {e}") | |
| def main(): | |
| """Run all tests.""" | |
| print("GAIA Agent Test Suite") | |
| print("=" * 50) | |
| # Test API connection first | |
| api_ok = test_api_connection() | |
| if not api_ok: | |
| print("\nβ API connection failed. Cannot proceed with other tests.") | |
| print("Please check your API key and internet connection.") | |
| return | |
| # Run other tests | |
| test_basic_reasoning() | |
| test_tools() | |
| test_sample_task() | |
| print("\n" + "=" * 50) | |
| print("Test suite completed!") | |
| print("If all tests passed, you can run: python evaluate.py") | |
| if __name__ == "__main__": | |
| main() |