Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Complete Workflow Test for GAIA Agent System | |
| Tests both LangGraph and simplified workflow implementations | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import tempfile | |
| from pathlib import Path | |
| # Add src to path for imports | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from workflow.gaia_workflow import GAIAWorkflow, SimpleGAIAWorkflow | |
| from models.qwen_client import QwenClient | |
| def test_simple_workflow(): | |
| """Test the simplified workflow implementation""" | |
| print("π§ͺ Testing Simple GAIA Workflow") | |
| print("=" * 50) | |
| # Initialize workflow | |
| try: | |
| llm_client = QwenClient() | |
| workflow = SimpleGAIAWorkflow(llm_client) | |
| except Exception as e: | |
| print(f"β Failed to initialize workflow: {e}") | |
| return False | |
| # Test cases | |
| test_cases = [ | |
| { | |
| "question": "What is the capital of France?", | |
| "description": "Simple web research question", | |
| "expected_agents": ["web_researcher"] | |
| }, | |
| { | |
| "question": "Calculate 25% of 200", | |
| "description": "Mathematical reasoning question", | |
| "expected_agents": ["reasoning_agent"] | |
| }, | |
| { | |
| "question": "What is the average of 10, 15, 20?", | |
| "description": "Statistical calculation", | |
| "expected_agents": ["reasoning_agent"] | |
| } | |
| ] | |
| results = [] | |
| total_cost = 0.0 | |
| start_time = time.time() | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\nπ Test {i}: {test_case['description']}") | |
| print(f" Question: {test_case['question']}") | |
| try: | |
| # Process question | |
| result_state = workflow.process_question( | |
| question=test_case["question"], | |
| task_id=f"simple_test_{i}" | |
| ) | |
| # Check results | |
| success = result_state.is_complete and result_state.final_answer | |
| confidence = result_state.final_confidence | |
| cost = result_state.total_cost | |
| print(f" β Router: {result_state.question_type.value}") | |
| print(f" β Agents: {[a.value for a in result_state.selected_agents]}") | |
| print(f" β Final Answer: {result_state.final_answer[:100]}...") | |
| print(f" π Confidence: {confidence:.2f}") | |
| print(f" π° Cost: ${cost:.4f}") | |
| print(f" π― Success: {'β PASS' if success else 'β FAIL'}") | |
| total_cost += cost | |
| results.append(bool(success)) | |
| except Exception as e: | |
| print(f" β Test failed: {e}") | |
| results.append(False) | |
| # Summary | |
| total_time = time.time() - start_time | |
| passed = sum(results) | |
| total = len(results) | |
| print(f"\nπ Simple Workflow Results:") | |
| print(f" π― Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)") | |
| print(f" π° Total Cost: ${total_cost:.4f}") | |
| print(f" β±οΈ Total Time: {total_time:.2f}s") | |
| return passed >= total * 0.8 # 80% success rate | |
| def test_complete_workflow_with_files(): | |
| """Test workflow with file processing""" | |
| print("\nπ§ͺ Testing Complete Workflow with Files") | |
| print("=" * 50) | |
| try: | |
| llm_client = QwenClient() | |
| workflow = SimpleGAIAWorkflow(llm_client) | |
| except Exception as e: | |
| print(f"β Failed to initialize workflow: {e}") | |
| return False | |
| # Create test file | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| csv_path = os.path.join(temp_dir, "test_data.csv") | |
| with open(csv_path, 'w') as f: | |
| f.write("item,quantity,price\nApple,10,1.50\nBanana,20,0.75\nOrange,15,2.00") | |
| print(f"π Created test file: {csv_path}") | |
| try: | |
| result_state = workflow.process_question( | |
| question="What is the total value of all items in this data?", | |
| file_path=csv_path, | |
| file_name="test_data.csv", | |
| task_id="file_test" | |
| ) | |
| success = result_state.is_complete and result_state.final_answer | |
| print(f" β Router: {result_state.question_type.value}") | |
| print(f" β Agents: {[a.value for a in result_state.selected_agents]}") | |
| print(f" β Final Answer: {result_state.final_answer[:150]}...") | |
| print(f" π Confidence: {result_state.final_confidence:.2f}") | |
| print(f" π° Cost: ${result_state.total_cost:.4f}") | |
| print(f" π― File Processing: {'β PASS' if success else 'β FAIL'}") | |
| return bool(success) | |
| except Exception as e: | |
| print(f" β File test failed: {e}") | |
| return False | |
| def test_workflow_error_handling(): | |
| """Test workflow error handling and edge cases""" | |
| print("\nπ§ͺ Testing Workflow Error Handling") | |
| print("=" * 50) | |
| try: | |
| llm_client = QwenClient() | |
| workflow = SimpleGAIAWorkflow(llm_client) | |
| except Exception as e: | |
| print(f"β Failed to initialize workflow: {e}") | |
| return False | |
| # Test cases that might cause errors | |
| error_test_cases = [ | |
| { | |
| "question": "", # Empty question | |
| "description": "Empty question" | |
| }, | |
| { | |
| "question": "x" * 5000, # Very long question | |
| "description": "Extremely long question" | |
| }, | |
| { | |
| "question": "What is this file about?", | |
| "file_path": "/nonexistent/file.txt", # Non-existent file | |
| "description": "Non-existent file" | |
| } | |
| ] | |
| results = [] | |
| for i, test_case in enumerate(error_test_cases, 1): | |
| print(f"\nπ Error Test {i}: {test_case['description']}") | |
| try: | |
| result_state = workflow.process_question( | |
| question=test_case["question"], | |
| file_path=test_case.get("file_path"), | |
| task_id=f"error_test_{i}" | |
| ) | |
| # Check if error was handled gracefully | |
| graceful_handling = ( | |
| result_state.is_complete and | |
| result_state.final_answer and | |
| not result_state.final_answer.startswith("Traceback") | |
| ) | |
| print(f" β Graceful Handling: {'β PASS' if graceful_handling else 'β FAIL'}") | |
| print(f" β Error Messages: {len(result_state.error_messages)}") | |
| print(f" β Final Answer: {result_state.final_answer[:100]}...") | |
| results.append(graceful_handling) | |
| except Exception as e: | |
| print(f" β Unhandled exception: {e}") | |
| results.append(False) | |
| passed = sum(results) | |
| total = len(results) | |
| print(f"\nπ Error Handling Results:") | |
| print(f" π― Tests Passed: {passed}/{total} ({passed/total*100:.1f}%)") | |
| return passed >= total * 0.8 | |
| def test_workflow_state_management(): | |
| """Test workflow state management and tracking""" | |
| print("\nπ§ͺ Testing Workflow State Management") | |
| print("=" * 50) | |
| try: | |
| llm_client = QwenClient() | |
| workflow = SimpleGAIAWorkflow(llm_client) | |
| except Exception as e: | |
| print(f"β Failed to initialize workflow: {e}") | |
| return False | |
| try: | |
| result_state = workflow.process_question( | |
| question="What is the square root of 144?", | |
| task_id="state_test" | |
| ) | |
| # Verify state completeness | |
| state_checks = { | |
| "has_task_id": bool(result_state.task_id), | |
| "has_question": bool(result_state.question), | |
| "has_routing_decision": bool(result_state.routing_decision), | |
| "has_processing_steps": len(result_state.processing_steps) > 0, | |
| "has_final_answer": bool(result_state.final_answer), | |
| "is_complete": result_state.is_complete, | |
| "has_cost_tracking": result_state.total_cost >= 0, | |
| "has_timing": result_state.total_processing_time >= 0 | |
| } | |
| print(" π State Management Checks:") | |
| for check, passed in state_checks.items(): | |
| status = "β " if passed else "β" | |
| print(f" {status} {check}: {passed}") | |
| # Check state summary | |
| summary = result_state.get_summary() | |
| print(f"\n π State Summary:") | |
| for key, value in summary.items(): | |
| print(f" {key}: {value}") | |
| # Verify processing steps | |
| print(f"\n π Processing Steps ({len(result_state.processing_steps)}):") | |
| for i, step in enumerate(result_state.processing_steps[-5:], 1): # Last 5 steps | |
| print(f" {i}. {step}") | |
| all_passed = all(state_checks.values()) | |
| print(f"\n π― State Management: {'β PASS' if all_passed else 'β FAIL'}") | |
| return all_passed | |
| except Exception as e: | |
| print(f" β State test failed: {e}") | |
| return False | |
| def main(): | |
| """Run all workflow tests""" | |
| print("π GAIA Workflow Integration Tests") | |
| print("=" * 60) | |
| test_results = [] | |
| start_time = time.time() | |
| # Run all tests | |
| test_results.append(test_simple_workflow()) | |
| test_results.append(test_complete_workflow_with_files()) | |
| test_results.append(test_workflow_error_handling()) | |
| test_results.append(test_workflow_state_management()) | |
| # Summary | |
| total_time = time.time() - start_time | |
| passed = sum(test_results) | |
| total = len(test_results) | |
| print("\n" + "=" * 60) | |
| print("π COMPLETE WORKFLOW TEST RESULTS") | |
| print("=" * 60) | |
| print(f"π― Test Suites Passed: {passed}/{total} ({passed/total*100:.1f}%)") | |
| print(f"β±οΈ Total Time: {total_time:.2f} seconds") | |
| # Test breakdown | |
| test_names = [ | |
| "Simple Workflow", | |
| "File Processing", | |
| "Error Handling", | |
| "State Management" | |
| ] | |
| print(f"\nπ Test Breakdown:") | |
| for i, (name, result) in enumerate(zip(test_names, test_results)): | |
| status = "β " if result else "β" | |
| print(f" {status} {name}") | |
| if passed == total: | |
| print("\nπ ALL WORKFLOW TESTS PASSED! System ready for production!") | |
| return True | |
| elif passed >= total * 0.8: | |
| print("\nβ MOST TESTS PASSED! System functional with minor issues.") | |
| return True | |
| else: | |
| print("\nβ οΈ SIGNIFICANT ISSUES! Review failures above.") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |