Chris
commited on
Commit
·
64e1704
1
Parent(s):
5a03810
Final 7.3.3
Browse files- src/agents/__pycache__/router.cpython-310.pyc +0 -0
- src/agents/__pycache__/state.cpython-310.pyc +0 -0
- src/agents/router.py +241 -1
- src/agents/state.py +47 -28
- src/agents/web_researcher.py +385 -38
- src/app.py +132 -2
- src/workflow/gaia_workflow.py +239 -1
src/agents/__pycache__/router.cpython-310.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/router.cpython-310.pyc and b/src/agents/__pycache__/router.cpython-310.pyc differ
|
|
|
src/agents/__pycache__/state.cpython-310.pyc
CHANGED
|
Binary files a/src/agents/__pycache__/state.cpython-310.pyc and b/src/agents/__pycache__/state.cpython-310.pyc differ
|
|
|
src/agents/router.py
CHANGED
|
@@ -22,6 +22,53 @@ class RouterAgent:
|
|
| 22 |
def __init__(self, llm_client: QwenClient):
|
| 23 |
self.llm_client = llm_client
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
|
| 26 |
"""
|
| 27 |
Main routing function - analyzes question and determines processing strategy
|
|
@@ -586,4 +633,197 @@ REASONING: [brief explanation]
|
|
| 586 |
if AgentRole.SYNTHESIZER not in agents:
|
| 587 |
agents.append(AgentRole.SYNTHESIZER)
|
| 588 |
|
| 589 |
-
return agents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def __init__(self, llm_client: QwenClient):
|
| 23 |
self.llm_client = llm_client
|
| 24 |
|
| 25 |
+
def process(self, state: GAIAAgentState) -> GAIAAgentState:
|
| 26 |
+
"""
|
| 27 |
+
Enhanced routing with multi-phase problem decomposition
|
| 28 |
+
"""
|
| 29 |
+
logger.info("🧭 Router: Starting multi-phase question analysis")
|
| 30 |
+
state.add_processing_step("Router: Multi-phase analysis initiated")
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# Phase 1: Structural Analysis
|
| 34 |
+
structural_analysis = self._analyze_question_structure(state.question)
|
| 35 |
+
state.add_processing_step(f"Router: Structure = {structural_analysis['type']}")
|
| 36 |
+
|
| 37 |
+
# Phase 2: Information Requirements Analysis
|
| 38 |
+
info_requirements = self._analyze_information_needs(state.question, structural_analysis)
|
| 39 |
+
state.add_processing_step(f"Router: Needs = {info_requirements['primary_need']}")
|
| 40 |
+
|
| 41 |
+
# Phase 3: Strategy Planning
|
| 42 |
+
execution_strategy = self._plan_execution_strategy(state.question, structural_analysis, info_requirements)
|
| 43 |
+
state.add_processing_step(f"Router: Strategy = {execution_strategy['approach']}")
|
| 44 |
+
|
| 45 |
+
# Phase 4: Agent Selection and Sequencing
|
| 46 |
+
agent_sequence = self._select_agent_sequence(execution_strategy, info_requirements)
|
| 47 |
+
|
| 48 |
+
# Store analysis in state for agents to use
|
| 49 |
+
state.router_analysis = {
|
| 50 |
+
'structural': structural_analysis,
|
| 51 |
+
'requirements': info_requirements,
|
| 52 |
+
'strategy': execution_strategy,
|
| 53 |
+
'sequence': agent_sequence
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
logger.info(f"✅ Routing complete: {structural_analysis['type']} -> {agent_sequence}")
|
| 57 |
+
state.add_processing_step(f"Router: Selected agents = {agent_sequence}")
|
| 58 |
+
|
| 59 |
+
# Set agent sequence for workflow
|
| 60 |
+
state.agent_sequence = agent_sequence
|
| 61 |
+
return state
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
error_msg = f"Router analysis failed: {str(e)}"
|
| 65 |
+
logger.error(error_msg)
|
| 66 |
+
state.add_error(error_msg)
|
| 67 |
+
|
| 68 |
+
# Fallback to basic routing
|
| 69 |
+
state.agent_sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
|
| 70 |
+
return state
|
| 71 |
+
|
| 72 |
def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
|
| 73 |
"""
|
| 74 |
Main routing function - analyzes question and determines processing strategy
|
|
|
|
| 633 |
if AgentRole.SYNTHESIZER not in agents:
|
| 634 |
agents.append(AgentRole.SYNTHESIZER)
|
| 635 |
|
| 636 |
+
return agents
|
| 637 |
+
|
| 638 |
+
def _analyze_question_structure(self, question: str) -> Dict[str, Any]:
|
| 639 |
+
"""
|
| 640 |
+
Phase 1: Analyze the structural components of the question
|
| 641 |
+
"""
|
| 642 |
+
structure = {
|
| 643 |
+
'type': 'unknown',
|
| 644 |
+
'complexity': 'medium',
|
| 645 |
+
'components': [],
|
| 646 |
+
'data_sources': [],
|
| 647 |
+
'temporal_aspects': [],
|
| 648 |
+
'quantitative_aspects': []
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
question_lower = question.lower()
|
| 652 |
+
|
| 653 |
+
# Identify question type
|
| 654 |
+
if any(word in question_lower for word in ['how many', 'count', 'number of', 'quantity']):
|
| 655 |
+
structure['type'] = 'quantitative'
|
| 656 |
+
elif any(word in question_lower for word in ['who is', 'who was', 'who did', 'name of']):
|
| 657 |
+
structure['type'] = 'identification'
|
| 658 |
+
elif any(word in question_lower for word in ['where', 'location', 'place']):
|
| 659 |
+
structure['type'] = 'location'
|
| 660 |
+
elif any(word in question_lower for word in ['when', 'date', 'time', 'year']):
|
| 661 |
+
structure['type'] = 'temporal'
|
| 662 |
+
elif any(word in question_lower for word in ['what is', 'define', 'explain']):
|
| 663 |
+
structure['type'] = 'definition'
|
| 664 |
+
elif any(word in question_lower for word in ['calculate', 'compute', 'solve']):
|
| 665 |
+
structure['type'] = 'mathematical'
|
| 666 |
+
elif any(word in question_lower for word in ['compare', 'difference', 'versus']):
|
| 667 |
+
structure['type'] = 'comparison'
|
| 668 |
+
elif 'file' in question_lower or 'attached' in question_lower:
|
| 669 |
+
structure['type'] = 'file_analysis'
|
| 670 |
+
else:
|
| 671 |
+
structure['type'] = 'complex_reasoning'
|
| 672 |
+
|
| 673 |
+
# Identify data sources needed
|
| 674 |
+
if any(term in question_lower for term in ['wikipedia', 'article', 'page']):
|
| 675 |
+
structure['data_sources'].append('wikipedia')
|
| 676 |
+
if any(term in question_lower for term in ['video', 'youtube', 'watch']):
|
| 677 |
+
structure['data_sources'].append('video')
|
| 678 |
+
if any(term in question_lower for term in ['file', 'attached', 'document']):
|
| 679 |
+
structure['data_sources'].append('file')
|
| 680 |
+
if any(term in question_lower for term in ['recent', 'latest', 'current', '2024', '2025']):
|
| 681 |
+
structure['data_sources'].append('web_search')
|
| 682 |
+
|
| 683 |
+
# Identify temporal aspects
|
| 684 |
+
import re
|
| 685 |
+
years = re.findall(r'\b(?:19|20)\d{2}\b', question)
|
| 686 |
+
dates = re.findall(r'\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2},?\s+\d{4}\b', question_lower)
|
| 687 |
+
structure['temporal_aspects'] = years + dates
|
| 688 |
+
|
| 689 |
+
# Identify quantitative aspects
|
| 690 |
+
quantities = re.findall(r'\b\d+(?:\.\d+)?\b', question)
|
| 691 |
+
structure['quantitative_aspects'] = quantities
|
| 692 |
+
|
| 693 |
+
# Assess complexity
|
| 694 |
+
complexity_factors = [
|
| 695 |
+
len(question.split()) > 25, # Long question
|
| 696 |
+
len(structure['data_sources']) > 1, # Multiple sources
|
| 697 |
+
len(structure['temporal_aspects']) > 1, # Multiple time periods
|
| 698 |
+
'and' in question_lower and 'or' in question_lower, # Multiple conditions
|
| 699 |
+
question.count('?') > 1, # Multiple questions
|
| 700 |
+
]
|
| 701 |
+
|
| 702 |
+
if sum(complexity_factors) >= 3:
|
| 703 |
+
structure['complexity'] = 'high'
|
| 704 |
+
elif sum(complexity_factors) >= 1:
|
| 705 |
+
structure['complexity'] = 'medium'
|
| 706 |
+
else:
|
| 707 |
+
structure['complexity'] = 'low'
|
| 708 |
+
|
| 709 |
+
return structure
|
| 710 |
+
|
| 711 |
+
def _analyze_information_needs(self, question: str, structural: Dict[str, Any]) -> Dict[str, Any]:
|
| 712 |
+
"""
|
| 713 |
+
Phase 2: Analyze what specific information is needed to answer the question
|
| 714 |
+
"""
|
| 715 |
+
needs = {
|
| 716 |
+
'primary_need': 'factual_lookup',
|
| 717 |
+
'information_types': [],
|
| 718 |
+
'precision_required': 'medium',
|
| 719 |
+
'verification_needed': False,
|
| 720 |
+
'synthesis_complexity': 'simple'
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
# Determine primary information need
|
| 724 |
+
if structural['type'] == 'quantitative':
|
| 725 |
+
needs['primary_need'] = 'numerical_data'
|
| 726 |
+
needs['precision_required'] = 'high'
|
| 727 |
+
elif structural['type'] == 'identification':
|
| 728 |
+
needs['primary_need'] = 'entity_identification'
|
| 729 |
+
elif structural['type'] == 'mathematical':
|
| 730 |
+
needs['primary_need'] = 'computation'
|
| 731 |
+
needs['precision_required'] = 'high'
|
| 732 |
+
elif structural['type'] == 'file_analysis':
|
| 733 |
+
needs['primary_need'] = 'file_processing'
|
| 734 |
+
elif structural['type'] == 'comparison':
|
| 735 |
+
needs['primary_need'] = 'comparative_analysis'
|
| 736 |
+
needs['verification_needed'] = True
|
| 737 |
+
else:
|
| 738 |
+
needs['primary_need'] = 'factual_lookup'
|
| 739 |
+
|
| 740 |
+
# Determine information types needed
|
| 741 |
+
if 'wikipedia' in structural['data_sources']:
|
| 742 |
+
needs['information_types'].append('encyclopedic')
|
| 743 |
+
if 'video' in structural['data_sources']:
|
| 744 |
+
needs['information_types'].append('multimedia_content')
|
| 745 |
+
if 'web_search' in structural['data_sources']:
|
| 746 |
+
needs['information_types'].append('current_information')
|
| 747 |
+
if 'file' in structural['data_sources']:
|
| 748 |
+
needs['information_types'].append('document_analysis')
|
| 749 |
+
|
| 750 |
+
# Assess synthesis complexity
|
| 751 |
+
if structural['complexity'] == 'high' or len(needs['information_types']) > 2:
|
| 752 |
+
needs['synthesis_complexity'] = 'complex'
|
| 753 |
+
elif len(needs['information_types']) > 1:
|
| 754 |
+
needs['synthesis_complexity'] = 'moderate'
|
| 755 |
+
|
| 756 |
+
return needs
|
| 757 |
+
|
| 758 |
+
def _plan_execution_strategy(self, question: str, structural: Dict[str, Any], requirements: Dict[str, Any]) -> Dict[str, Any]:
|
| 759 |
+
"""
|
| 760 |
+
Phase 3: Plan the execution strategy based on analysis
|
| 761 |
+
"""
|
| 762 |
+
strategy = {
|
| 763 |
+
'approach': 'sequential',
|
| 764 |
+
'parallel_possible': False,
|
| 765 |
+
'iterative_refinement': False,
|
| 766 |
+
'fallback_needed': True,
|
| 767 |
+
'verification_steps': []
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
# Determine approach
|
| 771 |
+
if requirements['primary_need'] == 'file_processing':
|
| 772 |
+
strategy['approach'] = 'file_first'
|
| 773 |
+
elif requirements['primary_need'] == 'computation':
|
| 774 |
+
strategy['approach'] = 'reasoning_first'
|
| 775 |
+
elif len(requirements['information_types']) > 2:
|
| 776 |
+
strategy['approach'] = 'multi_source'
|
| 777 |
+
strategy['parallel_possible'] = True
|
| 778 |
+
elif 'current_information' in requirements['information_types']:
|
| 779 |
+
strategy['approach'] = 'web_first'
|
| 780 |
+
else:
|
| 781 |
+
strategy['approach'] = 'knowledge_first'
|
| 782 |
+
|
| 783 |
+
# Determine if iterative refinement is needed
|
| 784 |
+
if (structural['complexity'] == 'high' or
|
| 785 |
+
requirements['precision_required'] == 'high' or
|
| 786 |
+
requirements['verification_needed']):
|
| 787 |
+
strategy['iterative_refinement'] = True
|
| 788 |
+
|
| 789 |
+
# Plan verification steps
|
| 790 |
+
if requirements['verification_needed']:
|
| 791 |
+
strategy['verification_steps'] = ['cross_reference', 'consistency_check']
|
| 792 |
+
if requirements['precision_required'] == 'high':
|
| 793 |
+
strategy['verification_steps'].append('precision_validation')
|
| 794 |
+
|
| 795 |
+
return strategy
|
| 796 |
+
|
| 797 |
+
def _select_agent_sequence(self, strategy: Dict[str, Any], requirements: Dict[str, Any]) -> List[str]:
|
| 798 |
+
"""
|
| 799 |
+
Phase 4: Select the optimal sequence of agents based on strategy
|
| 800 |
+
"""
|
| 801 |
+
sequence = []
|
| 802 |
+
|
| 803 |
+
# Base sequence based on approach
|
| 804 |
+
if strategy['approach'] == 'file_first':
|
| 805 |
+
sequence = ['file_processor', 'reasoning_agent', 'synthesizer']
|
| 806 |
+
elif strategy['approach'] == 'reasoning_first':
|
| 807 |
+
sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
|
| 808 |
+
elif strategy['approach'] == 'web_first':
|
| 809 |
+
sequence = ['web_researcher', 'reasoning_agent', 'synthesizer']
|
| 810 |
+
elif strategy['approach'] == 'knowledge_first':
|
| 811 |
+
sequence = ['web_researcher', 'reasoning_agent', 'synthesizer']
|
| 812 |
+
elif strategy['approach'] == 'multi_source':
|
| 813 |
+
sequence = ['web_researcher', 'file_processor', 'reasoning_agent', 'synthesizer']
|
| 814 |
+
else: # sequential
|
| 815 |
+
sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
|
| 816 |
+
|
| 817 |
+
# Add verification agents if needed
|
| 818 |
+
if strategy['iterative_refinement']:
|
| 819 |
+
# Insert reasoning agent before synthesizer for verification
|
| 820 |
+
if 'reasoning_agent' in sequence:
|
| 821 |
+
sequence.remove('reasoning_agent')
|
| 822 |
+
sequence.insert(-1, 'reasoning_agent') # Before synthesizer
|
| 823 |
+
|
| 824 |
+
# Ensure synthesizer is always last
|
| 825 |
+
if 'synthesizer' in sequence:
|
| 826 |
+
sequence.remove('synthesizer')
|
| 827 |
+
sequence.append('synthesizer')
|
| 828 |
+
|
| 829 |
+
return sequence
|
src/agents/state.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Literal
|
|
| 8 |
from dataclasses import dataclass, field
|
| 9 |
from enum import Enum
|
| 10 |
import time
|
|
|
|
| 11 |
|
| 12 |
class QuestionType(Enum):
|
| 13 |
"""Classification of GAIA question types"""
|
|
@@ -65,38 +66,54 @@ class GAIAAgentState:
|
|
| 65 |
This is passed between all agents in the LangGraph workflow
|
| 66 |
"""
|
| 67 |
|
| 68 |
-
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# Question information
|
| 70 |
-
self.task_id: str = ""
|
| 71 |
-
self.question: str = ""
|
| 72 |
-
self.question_type: QuestionType = QuestionType.UNKNOWN
|
| 73 |
self.difficulty_level: int = 1 # 1, 2, or 3
|
| 74 |
-
self.file_name: Optional[str] = None
|
| 75 |
self.file_path: Optional[str] = None
|
| 76 |
self.metadata: Dict[str, Any] = {}
|
| 77 |
|
| 78 |
# Routing decisions
|
| 79 |
-
self.routing_decision: Dict[str, Any] = {}
|
| 80 |
-
self.selected_agents: List[AgentRole] = []
|
| 81 |
-
self.complexity_assessment: str = "medium"
|
| 82 |
self.estimated_cost: float = 0.0
|
| 83 |
|
| 84 |
# Agent results
|
| 85 |
-
self.agent_results: Dict[AgentRole, AgentResult] = {}
|
| 86 |
self.tool_results: List[ToolResult] = []
|
| 87 |
|
| 88 |
# Final answer
|
| 89 |
-
self.final_answer: str = ""
|
| 90 |
-
self.final_confidence: float = 0.0
|
| 91 |
-
self.final_reasoning: str = ""
|
| 92 |
self.answer_source: str = "" # Which agent provided the final answer
|
| 93 |
|
| 94 |
# System tracking
|
| 95 |
-
self.start_time: float = time.time()
|
| 96 |
-
self.processing_steps: List[str] = []
|
| 97 |
-
self.total_cost: float = 0.0
|
| 98 |
self.total_processing_time: float = 0.0
|
| 99 |
-
self.error_messages: List[str] = []
|
| 100 |
|
| 101 |
# Status flags
|
| 102 |
self.is_complete: bool = False
|
|
@@ -109,7 +126,7 @@ class GAIAAgentState:
|
|
| 109 |
|
| 110 |
def add_agent_result(self, result: AgentResult):
|
| 111 |
"""Add result from an agent"""
|
| 112 |
-
self.agent_results
|
| 113 |
self.total_cost += result.cost_estimate
|
| 114 |
self.total_processing_time += result.processing_time
|
| 115 |
self.add_processing_step(f"{result.agent_role.value}: {result.result[:50]}...")
|
|
@@ -121,14 +138,14 @@ class GAIAAgentState:
|
|
| 121 |
|
| 122 |
def add_error(self, error_message: str):
|
| 123 |
"""Add an error message"""
|
| 124 |
-
self.
|
| 125 |
self.add_processing_step(f"ERROR: {error_message}")
|
| 126 |
|
| 127 |
def get_best_result(self) -> Optional[AgentResult]:
|
| 128 |
"""Get the agent result with highest confidence"""
|
| 129 |
if not self.agent_results:
|
| 130 |
return None
|
| 131 |
-
return max(self.agent_results
|
| 132 |
|
| 133 |
def should_use_complex_model(self) -> bool:
|
| 134 |
"""Determine if complex model should be used based on state"""
|
|
@@ -145,9 +162,9 @@ class GAIAAgentState:
|
|
| 145 |
def get_summary(self) -> Dict[str, Any]:
|
| 146 |
"""Get a summary of the current state"""
|
| 147 |
return {
|
| 148 |
-
"task_id": self.
|
| 149 |
-
"question_type": self.question_type.value,
|
| 150 |
-
"agents_used": [role.value for role in self.
|
| 151 |
"tools_used": [tool.tool_name for tool in self.tool_results],
|
| 152 |
"final_answer": self.final_answer,
|
| 153 |
"confidence": self.final_confidence,
|
|
@@ -155,15 +172,15 @@ class GAIAAgentState:
|
|
| 155 |
"total_cost": self.total_cost,
|
| 156 |
"steps_count": len(self.processing_steps),
|
| 157 |
"is_complete": self.is_complete,
|
| 158 |
-
"error_count": len(self.
|
| 159 |
}
|
| 160 |
|
| 161 |
def to_dict(self) -> Dict[str, Any]:
|
| 162 |
"""Convert state to dictionary for serialization"""
|
| 163 |
return {
|
| 164 |
-
"task_id": self.
|
| 165 |
"question": self.question,
|
| 166 |
-
"question_type": self.question_type.value,
|
| 167 |
"difficulty_level": self.difficulty_level,
|
| 168 |
"file_name": self.file_name,
|
| 169 |
"file_path": self.file_path,
|
|
@@ -172,14 +189,16 @@ class GAIAAgentState:
|
|
| 172 |
"complexity_assessment": self.complexity_assessment,
|
| 173 |
"final_answer": self.final_answer,
|
| 174 |
"final_confidence": self.final_confidence,
|
| 175 |
-
"final_reasoning": self.
|
| 176 |
"answer_source": self.answer_source,
|
| 177 |
"processing_steps": self.processing_steps,
|
| 178 |
"total_cost": self.total_cost,
|
| 179 |
"total_processing_time": self.total_processing_time,
|
| 180 |
-
"error_messages": self.
|
| 181 |
"is_complete": self.is_complete,
|
| 182 |
-
"summary": self.get_summary()
|
|
|
|
|
|
|
| 183 |
}
|
| 184 |
|
| 185 |
# Type alias for LangGraph
|
|
|
|
| 8 |
from dataclasses import dataclass, field
|
| 9 |
from enum import Enum
|
| 10 |
import time
|
| 11 |
+
import uuid
|
| 12 |
|
| 13 |
class QuestionType(Enum):
|
| 14 |
"""Classification of GAIA question types"""
|
|
|
|
| 66 |
This is passed between all agents in the LangGraph workflow
|
| 67 |
"""
|
| 68 |
|
| 69 |
+
def __init__(self, question: str, question_id: str = None, file_name: str = None, file_content: bytes = None):
|
| 70 |
+
self.question = question
|
| 71 |
+
self.question_id = question_id or str(uuid.uuid4())
|
| 72 |
+
self.file_name = file_name
|
| 73 |
+
self.file_content = file_content
|
| 74 |
+
|
| 75 |
+
# Analysis results
|
| 76 |
+
self.question_type: Optional[QuestionType] = None
|
| 77 |
+
self.question_types: List[QuestionType] = []
|
| 78 |
+
self.primary_question_type: Optional[QuestionType] = None
|
| 79 |
+
self.complexity_assessment: str = "medium"
|
| 80 |
+
self.selected_agents: List[AgentRole] = []
|
| 81 |
+
|
| 82 |
+
# Enhanced router analysis
|
| 83 |
+
self.router_analysis: Optional[Dict[str, Any]] = None
|
| 84 |
+
self.agent_sequence: List[str] = []
|
| 85 |
+
|
| 86 |
+
# Processing tracking
|
| 87 |
+
self.processing_steps: List[str] = []
|
| 88 |
+
self.agent_results: List[AgentResult] = []
|
| 89 |
+
self.errors: List[str] = []
|
| 90 |
+
self.start_time: float = time.time()
|
| 91 |
+
self.total_cost: float = 0.0
|
| 92 |
+
|
| 93 |
+
# Final results
|
| 94 |
+
self.final_answer: Optional[str] = None
|
| 95 |
+
self.final_confidence: float = 0.0
|
| 96 |
+
self.synthesis_reasoning: str = ""
|
| 97 |
+
|
| 98 |
+
# Routing decision tracking
|
| 99 |
+
self.routing_decision: Dict[str, Any] = {}
|
| 100 |
+
|
| 101 |
# Question information
|
|
|
|
|
|
|
|
|
|
| 102 |
self.difficulty_level: int = 1 # 1, 2, or 3
|
|
|
|
| 103 |
self.file_path: Optional[str] = None
|
| 104 |
self.metadata: Dict[str, Any] = {}
|
| 105 |
|
| 106 |
# Routing decisions
|
|
|
|
|
|
|
|
|
|
| 107 |
self.estimated_cost: float = 0.0
|
| 108 |
|
| 109 |
# Agent results
|
|
|
|
| 110 |
self.tool_results: List[ToolResult] = []
|
| 111 |
|
| 112 |
# Final answer
|
|
|
|
|
|
|
|
|
|
| 113 |
self.answer_source: str = "" # Which agent provided the final answer
|
| 114 |
|
| 115 |
# System tracking
|
|
|
|
|
|
|
|
|
|
| 116 |
self.total_processing_time: float = 0.0
|
|
|
|
| 117 |
|
| 118 |
# Status flags
|
| 119 |
self.is_complete: bool = False
|
|
|
|
| 126 |
|
| 127 |
def add_agent_result(self, result: AgentResult):
|
| 128 |
"""Add result from an agent"""
|
| 129 |
+
self.agent_results.append(result)
|
| 130 |
self.total_cost += result.cost_estimate
|
| 131 |
self.total_processing_time += result.processing_time
|
| 132 |
self.add_processing_step(f"{result.agent_role.value}: {result.result[:50]}...")
|
|
|
|
| 138 |
|
| 139 |
def add_error(self, error_message: str):
|
| 140 |
"""Add an error message"""
|
| 141 |
+
self.errors.append(error_message)
|
| 142 |
self.add_processing_step(f"ERROR: {error_message}")
|
| 143 |
|
| 144 |
def get_best_result(self) -> Optional[AgentResult]:
|
| 145 |
"""Get the agent result with highest confidence"""
|
| 146 |
if not self.agent_results:
|
| 147 |
return None
|
| 148 |
+
return max(self.agent_results, key=lambda r: r.confidence)
|
| 149 |
|
| 150 |
def should_use_complex_model(self) -> bool:
|
| 151 |
"""Determine if complex model should be used based on state"""
|
|
|
|
| 162 |
def get_summary(self) -> Dict[str, Any]:
|
| 163 |
"""Get a summary of the current state"""
|
| 164 |
return {
|
| 165 |
+
"task_id": self.question_id,
|
| 166 |
+
"question_type": self.question_type.value if self.question_type else "unknown",
|
| 167 |
+
"agents_used": [role.value for role in self.selected_agents],
|
| 168 |
"tools_used": [tool.tool_name for tool in self.tool_results],
|
| 169 |
"final_answer": self.final_answer,
|
| 170 |
"confidence": self.final_confidence,
|
|
|
|
| 172 |
"total_cost": self.total_cost,
|
| 173 |
"steps_count": len(self.processing_steps),
|
| 174 |
"is_complete": self.is_complete,
|
| 175 |
+
"error_count": len(self.errors)
|
| 176 |
}
|
| 177 |
|
| 178 |
def to_dict(self) -> Dict[str, Any]:
|
| 179 |
"""Convert state to dictionary for serialization"""
|
| 180 |
return {
|
| 181 |
+
"task_id": self.question_id,
|
| 182 |
"question": self.question,
|
| 183 |
+
"question_type": self.question_type.value if self.question_type else "unknown",
|
| 184 |
"difficulty_level": self.difficulty_level,
|
| 185 |
"file_name": self.file_name,
|
| 186 |
"file_path": self.file_path,
|
|
|
|
| 189 |
"complexity_assessment": self.complexity_assessment,
|
| 190 |
"final_answer": self.final_answer,
|
| 191 |
"final_confidence": self.final_confidence,
|
| 192 |
+
"final_reasoning": self.synthesis_reasoning,
|
| 193 |
"answer_source": self.answer_source,
|
| 194 |
"processing_steps": self.processing_steps,
|
| 195 |
"total_cost": self.total_cost,
|
| 196 |
"total_processing_time": self.total_processing_time,
|
| 197 |
+
"error_messages": self.errors,
|
| 198 |
"is_complete": self.is_complete,
|
| 199 |
+
"summary": self.get_summary(),
|
| 200 |
+
"router_analysis": self.router_analysis,
|
| 201 |
+
"agent_sequence": self.agent_sequence
|
| 202 |
}
|
| 203 |
|
| 204 |
# Type alias for LangGraph
|
src/agents/web_researcher.py
CHANGED
|
@@ -29,54 +29,43 @@ class WebResearchAgent:
|
|
| 29 |
|
| 30 |
def process(self, state: GAIAAgentState) -> GAIAAgentState:
|
| 31 |
"""
|
| 32 |
-
|
| 33 |
"""
|
| 34 |
logger.info(f"Web researcher processing: {state.question[:100]}...")
|
| 35 |
-
state.add_processing_step("Web Researcher: Starting research")
|
| 36 |
|
| 37 |
try:
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
# Execute research with
|
| 43 |
-
|
| 44 |
-
try:
|
| 45 |
-
# Execute research based on strategy
|
| 46 |
-
if strategy == "wikipedia_direct":
|
| 47 |
-
result = self._research_wikipedia_direct(state)
|
| 48 |
-
elif strategy == "wikipedia_search":
|
| 49 |
-
result = self._research_wikipedia_search(state)
|
| 50 |
-
elif strategy == "youtube_analysis":
|
| 51 |
-
result = self._research_youtube(state)
|
| 52 |
-
elif strategy == "web_search":
|
| 53 |
-
result = self._research_web_general(state)
|
| 54 |
-
elif strategy == "url_extraction":
|
| 55 |
-
result = self._research_url_content(state)
|
| 56 |
-
else:
|
| 57 |
-
result = self._research_multi_source(state)
|
| 58 |
-
|
| 59 |
-
except Exception as strategy_error:
|
| 60 |
-
logger.warning(f"Strategy {strategy} failed: {strategy_error}, trying fallback")
|
| 61 |
-
# Try fallback strategy
|
| 62 |
-
try:
|
| 63 |
-
result = self._research_fallback_strategy(state, str(strategy_error))
|
| 64 |
-
except Exception as fallback_error:
|
| 65 |
-
logger.error(f"Fallback strategy also failed: {fallback_error}")
|
| 66 |
-
result = self._create_basic_response(state, f"Research failed: {fallback_error}")
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
if not
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Add result to state
|
| 73 |
-
state.add_agent_result(
|
| 74 |
-
state.add_processing_step(f"Web Researcher: Completed with confidence {
|
| 75 |
|
| 76 |
return state
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
-
error_msg = f"
|
| 80 |
state.add_error(error_msg)
|
| 81 |
logger.error(error_msg)
|
| 82 |
|
|
@@ -85,8 +74,8 @@ class WebResearchAgent:
|
|
| 85 |
agent_role=AgentRole.WEB_RESEARCHER,
|
| 86 |
success=False,
|
| 87 |
result=f"Research encountered difficulties: {str(e)}",
|
| 88 |
-
confidence=0.1,
|
| 89 |
-
reasoning=f"Exception during web research: {str(e)}",
|
| 90 |
tools_used=[],
|
| 91 |
model_used="error",
|
| 92 |
processing_time=0.0,
|
|
@@ -95,6 +84,364 @@ class WebResearchAgent:
|
|
| 95 |
state.add_agent_result(failure_result)
|
| 96 |
return state
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def _determine_research_strategy(self, question: str, file_name: Optional[str] = None) -> str:
|
| 99 |
"""Determine the best research strategy for the question"""
|
| 100 |
|
|
|
|
| 29 |
|
| 30 |
def process(self, state: GAIAAgentState) -> GAIAAgentState:
|
| 31 |
"""
|
| 32 |
+
Enhanced multi-step research processing with systematic problem decomposition
|
| 33 |
"""
|
| 34 |
logger.info(f"Web researcher processing: {state.question[:100]}...")
|
| 35 |
+
state.add_processing_step("Web Researcher: Starting enhanced multi-step research")
|
| 36 |
|
| 37 |
try:
|
| 38 |
+
# Step 1: Analyze router's decomposition if available
|
| 39 |
+
router_analysis = getattr(state, 'router_analysis', None)
|
| 40 |
+
if router_analysis:
|
| 41 |
+
state.add_processing_step("Web Researcher: Using router analysis")
|
| 42 |
+
research_plan = self._build_research_plan_from_router(state.question, router_analysis)
|
| 43 |
+
else:
|
| 44 |
+
state.add_processing_step("Web Researcher: Creating independent research plan")
|
| 45 |
+
research_plan = self._create_independent_research_plan(state.question)
|
| 46 |
|
| 47 |
+
# Step 2: Execute research plan with iterative refinement
|
| 48 |
+
results = self._execute_research_plan(state, research_plan)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# Step 3: Evaluate results and refine if needed
|
| 51 |
+
if not results or results.confidence < 0.4:
|
| 52 |
+
logger.info("Initial research insufficient, attempting refinement")
|
| 53 |
+
state.add_processing_step("Web Researcher: Refining research approach")
|
| 54 |
+
refined_plan = self._refine_research_plan(state.question, research_plan, results)
|
| 55 |
+
results = self._execute_research_plan(state, refined_plan)
|
| 56 |
+
|
| 57 |
+
# Step 4: Finalize results
|
| 58 |
+
if not results or not isinstance(results, AgentResult):
|
| 59 |
+
results = self._create_basic_response(state, "Multi-step research completed with limited results")
|
| 60 |
|
| 61 |
# Add result to state
|
| 62 |
+
state.add_agent_result(results)
|
| 63 |
+
state.add_processing_step(f"Web Researcher: Completed with confidence {results.confidence:.2f}")
|
| 64 |
|
| 65 |
return state
|
| 66 |
|
| 67 |
except Exception as e:
|
| 68 |
+
error_msg = f"Enhanced web research failed: {str(e)}"
|
| 69 |
state.add_error(error_msg)
|
| 70 |
logger.error(error_msg)
|
| 71 |
|
|
|
|
| 74 |
agent_role=AgentRole.WEB_RESEARCHER,
|
| 75 |
success=False,
|
| 76 |
result=f"Research encountered difficulties: {str(e)}",
|
| 77 |
+
confidence=0.1,
|
| 78 |
+
reasoning=f"Exception during enhanced web research: {str(e)}",
|
| 79 |
tools_used=[],
|
| 80 |
model_used="error",
|
| 81 |
processing_time=0.0,
|
|
|
|
| 84 |
state.add_agent_result(failure_result)
|
| 85 |
return state
|
| 86 |
|
| 87 |
+
def _build_research_plan_from_router(self, question: str, router_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
| 88 |
+
"""Build research plan using router's structural analysis"""
|
| 89 |
+
|
| 90 |
+
structural = router_analysis.get('structural', {})
|
| 91 |
+
requirements = router_analysis.get('requirements', {})
|
| 92 |
+
strategy = router_analysis.get('strategy', {})
|
| 93 |
+
|
| 94 |
+
plan = {
|
| 95 |
+
'question_type': structural.get('type', 'unknown'),
|
| 96 |
+
'primary_need': requirements.get('primary_need', 'factual_lookup'),
|
| 97 |
+
'data_sources': structural.get('data_sources', []),
|
| 98 |
+
'approach': strategy.get('approach', 'sequential'),
|
| 99 |
+
'steps': [],
|
| 100 |
+
'fallback_strategies': []
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
# Build step-by-step research plan
|
| 104 |
+
if plan['question_type'] == 'quantitative':
|
| 105 |
+
plan['steps'] = [
|
| 106 |
+
{'action': 'identify_entity', 'details': 'Extract the main subject/entity'},
|
| 107 |
+
{'action': 'gather_data', 'details': 'Find relevant numerical data'},
|
| 108 |
+
{'action': 'verify_timeframe', 'details': 'Ensure data matches time constraints'},
|
| 109 |
+
{'action': 'extract_count', 'details': 'Extract specific count/quantity'}
|
| 110 |
+
]
|
| 111 |
+
elif plan['question_type'] == 'identification':
|
| 112 |
+
plan['steps'] = [
|
| 113 |
+
{'action': 'parse_subject', 'details': 'Identify what/who to find'},
|
| 114 |
+
{'action': 'context_search', 'details': 'Search for relevant context'},
|
| 115 |
+
{'action': 'verify_identity', 'details': 'Confirm identity from sources'}
|
| 116 |
+
]
|
| 117 |
+
else:
|
| 118 |
+
plan['steps'] = [
|
| 119 |
+
{'action': 'decompose_query', 'details': 'Break down complex question'},
|
| 120 |
+
{'action': 'research_components', 'details': 'Research each component'},
|
| 121 |
+
{'action': 'synthesize_findings', 'details': 'Combine results'}
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
# Add fallback strategies
|
| 125 |
+
plan['fallback_strategies'] = [
|
| 126 |
+
'broaden_search_terms',
|
| 127 |
+
'try_alternative_sources',
|
| 128 |
+
'use_partial_information'
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
return plan
|
| 132 |
+
|
| 133 |
+
def _create_independent_research_plan(self, question: str) -> Dict[str, Any]:
|
| 134 |
+
"""Create research plan when router analysis isn't available"""
|
| 135 |
+
|
| 136 |
+
# Analyze question independently
|
| 137 |
+
plan = {
|
| 138 |
+
'question_type': 'general_research',
|
| 139 |
+
'primary_need': 'factual_lookup',
|
| 140 |
+
'data_sources': [],
|
| 141 |
+
'approach': 'sequential',
|
| 142 |
+
'steps': [],
|
| 143 |
+
'fallback_strategies': []
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
question_lower = question.lower()
|
| 147 |
+
|
| 148 |
+
# Determine research approach based on question patterns
|
| 149 |
+
if any(term in question_lower for term in ['how many', 'count', 'number']):
|
| 150 |
+
plan['question_type'] = 'quantitative'
|
| 151 |
+
plan['steps'] = [
|
| 152 |
+
{'action': 'extract_entity', 'details': 'Find the main subject'},
|
| 153 |
+
{'action': 'search_entity_data', 'details': 'Search for subject information'},
|
| 154 |
+
{'action': 'extract_quantities', 'details': 'Find numerical data'},
|
| 155 |
+
{'action': 'apply_constraints', 'details': 'Apply time/condition filters'}
|
| 156 |
+
]
|
| 157 |
+
elif any(term in question_lower for term in ['who', 'name', 'identity']):
|
| 158 |
+
plan['question_type'] = 'identification'
|
| 159 |
+
plan['steps'] = [
|
| 160 |
+
{'action': 'parse_context', 'details': 'Understand context clues'},
|
| 161 |
+
{'action': 'search_individuals', 'details': 'Search for people/entities'},
|
| 162 |
+
{'action': 'verify_match', 'details': 'Confirm identity match'}
|
| 163 |
+
]
|
| 164 |
+
elif any(term in question_lower for term in ['wikipedia', 'article']):
|
| 165 |
+
plan['question_type'] = 'wikipedia_specific'
|
| 166 |
+
plan['data_sources'] = ['wikipedia']
|
| 167 |
+
plan['steps'] = [
|
| 168 |
+
{'action': 'extract_topic', 'details': 'Identify Wikipedia topic'},
|
| 169 |
+
{'action': 'search_wikipedia', 'details': 'Search Wikipedia directly'},
|
| 170 |
+
{'action': 'extract_metadata', 'details': 'Get article details'}
|
| 171 |
+
]
|
| 172 |
+
else:
|
| 173 |
+
plan['steps'] = [
|
| 174 |
+
{'action': 'analyze_question', 'details': 'Break down question components'},
|
| 175 |
+
{'action': 'multi_source_search', 'details': 'Search multiple sources'},
|
| 176 |
+
{'action': 'consolidate_results', 'details': 'Combine findings'}
|
| 177 |
+
]
|
| 178 |
+
|
| 179 |
+
# Standard fallback strategies
|
| 180 |
+
plan['fallback_strategies'] = [
|
| 181 |
+
'simplify_search_terms',
|
| 182 |
+
'try_broader_keywords',
|
| 183 |
+
'search_related_topics'
|
| 184 |
+
]
|
| 185 |
+
|
| 186 |
+
return plan
|
| 187 |
+
|
| 188 |
+
def _execute_research_plan(self, state: GAIAAgentState, plan: Dict[str, Any]) -> AgentResult:
|
| 189 |
+
"""Execute the research plan step by step"""
|
| 190 |
+
|
| 191 |
+
logger.info(f"Executing research plan: {plan['question_type']} with {len(plan['steps'])} steps")
|
| 192 |
+
|
| 193 |
+
accumulated_results = []
|
| 194 |
+
total_processing_time = 0.0
|
| 195 |
+
total_cost = 0.0
|
| 196 |
+
|
| 197 |
+
for i, step in enumerate(plan['steps'], 1):
|
| 198 |
+
logger.info(f"Step {i}/{len(plan['steps'])}: {step['action']} - {step['details']}")
|
| 199 |
+
state.add_processing_step(f"Web Research Step {i}: {step['action']}")
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
step_result = self._execute_research_step(state, step, plan, accumulated_results)
|
| 203 |
+
if step_result:
|
| 204 |
+
accumulated_results.append(step_result)
|
| 205 |
+
total_processing_time += getattr(step_result, 'execution_time', 0.0)
|
| 206 |
+
total_cost += getattr(step_result, 'cost_estimate', 0.0)
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
logger.warning(f"Step {i} failed: {e}, continuing with next step")
|
| 210 |
+
state.add_processing_step(f"Web Research Step {i}: Failed - {str(e)}")
|
| 211 |
+
continue
|
| 212 |
+
|
| 213 |
+
# Synthesize accumulated results
|
| 214 |
+
if accumulated_results:
|
| 215 |
+
return self._synthesize_research_results(state, accumulated_results, plan, total_processing_time, total_cost)
|
| 216 |
+
else:
|
| 217 |
+
return self._create_failure_result("All research steps failed")
|
| 218 |
+
|
| 219 |
+
def _execute_research_step(self, state: GAIAAgentState, step: Dict[str, Any],
|
| 220 |
+
plan: Dict[str, Any], previous_results: List) -> Any:
|
| 221 |
+
"""Execute a single research step"""
|
| 222 |
+
|
| 223 |
+
action = step['action']
|
| 224 |
+
|
| 225 |
+
if action == 'extract_entity' or action == 'identify_entity':
|
| 226 |
+
return self._extract_main_entity(state.question)
|
| 227 |
+
|
| 228 |
+
elif action == 'search_entity_data' or action == 'gather_data':
|
| 229 |
+
entity = self._get_entity_from_results(previous_results)
|
| 230 |
+
return self._search_entity_information(entity, state.question)
|
| 231 |
+
|
| 232 |
+
elif action == 'extract_quantities' or action == 'extract_count':
|
| 233 |
+
return self._extract_numerical_data(previous_results, state.question)
|
| 234 |
+
|
| 235 |
+
elif action == 'search_wikipedia':
|
| 236 |
+
topic = self._extract_wikipedia_topic(state.question)
|
| 237 |
+
return self.wikipedia_tool.execute(topic)
|
| 238 |
+
|
| 239 |
+
elif action == 'multi_source_search':
|
| 240 |
+
search_terms = self._extract_search_terms(state.question)
|
| 241 |
+
return self._research_multi_source_enhanced(state, search_terms)
|
| 242 |
+
|
| 243 |
+
else:
|
| 244 |
+
# Default: general web search
|
| 245 |
+
search_terms = self._extract_search_terms(state.question)
|
| 246 |
+
return self.web_search_tool.execute(search_terms)
|
| 247 |
+
|
| 248 |
+
def _extract_main_entity(self, question: str) -> Dict[str, Any]:
|
| 249 |
+
"""Extract the main entity/subject from the question"""
|
| 250 |
+
|
| 251 |
+
# Use simple heuristics and patterns to extract main entity
|
| 252 |
+
import re
|
| 253 |
+
|
| 254 |
+
# Look for quoted entities
|
| 255 |
+
quoted = re.findall(r'"([^"]+)"', question)
|
| 256 |
+
if quoted:
|
| 257 |
+
return {'type': 'quoted_entity', 'entity': quoted[0], 'confidence': 0.9}
|
| 258 |
+
|
| 259 |
+
# Look for proper nouns (capitalized words)
|
| 260 |
+
words = question.split()
|
| 261 |
+
proper_nouns = []
|
| 262 |
+
for word in words:
|
| 263 |
+
clean_word = re.sub(r'[^\w]', '', word)
|
| 264 |
+
if clean_word and clean_word[0].isupper() and len(clean_word) > 1:
|
| 265 |
+
proper_nouns.append(clean_word)
|
| 266 |
+
|
| 267 |
+
if proper_nouns:
|
| 268 |
+
entity = ' '.join(proper_nouns[:3]) # Take first few proper nouns
|
| 269 |
+
return {'type': 'proper_noun', 'entity': entity, 'confidence': 0.7}
|
| 270 |
+
|
| 271 |
+
# Fallback: use question keywords
|
| 272 |
+
keywords = self._extract_search_terms(question, max_length=50)
|
| 273 |
+
return {'type': 'keywords', 'entity': keywords, 'confidence': 0.5}
|
| 274 |
+
|
| 275 |
+
def _search_entity_information(self, entity_data: Dict[str, Any], question: str) -> Any:
|
| 276 |
+
"""Search for information about the extracted entity"""
|
| 277 |
+
|
| 278 |
+
if not entity_data or 'entity' not in entity_data:
|
| 279 |
+
return None
|
| 280 |
+
|
| 281 |
+
entity = entity_data['entity']
|
| 282 |
+
|
| 283 |
+
# Try Wikipedia first for entities
|
| 284 |
+
wiki_result = self.wikipedia_tool.execute(entity)
|
| 285 |
+
if wiki_result.success and wiki_result.result.get('found'):
|
| 286 |
+
return wiki_result
|
| 287 |
+
|
| 288 |
+
# Fallback to web search
|
| 289 |
+
search_query = f"{entity} {self._extract_search_terms(question, max_length=30)}"
|
| 290 |
+
return self.web_search_tool.execute(search_query)
|
| 291 |
+
|
| 292 |
+
def _extract_numerical_data(self, previous_results: List, question: str) -> Dict[str, Any]:
|
| 293 |
+
"""Extract numerical data from previous search results"""
|
| 294 |
+
|
| 295 |
+
numerical_data = {
|
| 296 |
+
'numbers_found': [],
|
| 297 |
+
'context': [],
|
| 298 |
+
'confidence': 0.0
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
for result in previous_results:
|
| 302 |
+
if hasattr(result, 'result') and result.result:
|
| 303 |
+
text = str(result.result)
|
| 304 |
+
|
| 305 |
+
# Extract numbers with context
|
| 306 |
+
import re
|
| 307 |
+
number_patterns = [
|
| 308 |
+
r'\b(\d+)\s*(albums?|songs?|tracks?|releases?)\b',
|
| 309 |
+
r'\b(\d+)\s*(studio|live|compilation)\s*(albums?)\b',
|
| 310 |
+
r'\bbetween\s*(\d{4})\s*and\s*(\d{4})\b',
|
| 311 |
+
r'\b(\d+)\b' # Any number as fallback
|
| 312 |
+
]
|
| 313 |
+
|
| 314 |
+
for pattern in number_patterns:
|
| 315 |
+
matches = re.findall(pattern, text, re.IGNORECASE)
|
| 316 |
+
for match in matches:
|
| 317 |
+
if isinstance(match, tuple):
|
| 318 |
+
numerical_data['numbers_found'].extend(match)
|
| 319 |
+
else:
|
| 320 |
+
numerical_data['numbers_found'].append(match)
|
| 321 |
+
|
| 322 |
+
if numerical_data['numbers_found']:
|
| 323 |
+
numerical_data['confidence'] = 0.8
|
| 324 |
+
|
| 325 |
+
return numerical_data
|
| 326 |
+
|
| 327 |
+
def _get_entity_from_results(self, results: List) -> str:
|
| 328 |
+
"""Extract entity name from previous results"""
|
| 329 |
+
|
| 330 |
+
for result in results:
|
| 331 |
+
if isinstance(result, dict) and 'entity' in result:
|
| 332 |
+
return result['entity']
|
| 333 |
+
|
| 334 |
+
return ""
|
| 335 |
+
|
| 336 |
+
def _research_multi_source_enhanced(self, state: GAIAAgentState, search_terms: str) -> Any:
|
| 337 |
+
"""Enhanced multi-source research with systematic approach"""
|
| 338 |
+
|
| 339 |
+
sources_tried = []
|
| 340 |
+
|
| 341 |
+
# Try Wikipedia first for factual information
|
| 342 |
+
wiki_result = self.wikipedia_tool.execute(search_terms)
|
| 343 |
+
if wiki_result.success and wiki_result.result.get('found'):
|
| 344 |
+
sources_tried.append(('Wikipedia', wiki_result))
|
| 345 |
+
|
| 346 |
+
# Try web search for additional information
|
| 347 |
+
web_result = self.web_search_tool.execute({
|
| 348 |
+
"query": search_terms,
|
| 349 |
+
"action": "search",
|
| 350 |
+
"limit": 3
|
| 351 |
+
})
|
| 352 |
+
if web_result.success and web_result.result.get('found'):
|
| 353 |
+
sources_tried.append(('Web', web_result))
|
| 354 |
+
|
| 355 |
+
return {'sources': sources_tried, 'primary_terms': search_terms}
|
| 356 |
+
|
| 357 |
+
def _synthesize_research_results(self, state: GAIAAgentState, results: List, plan: Dict[str, Any],
|
| 358 |
+
total_time: float, total_cost: float) -> AgentResult:
|
| 359 |
+
"""Synthesize results from multi-step research"""
|
| 360 |
+
|
| 361 |
+
# Combine information from all steps
|
| 362 |
+
combined_info = []
|
| 363 |
+
confidence_scores = []
|
| 364 |
+
|
| 365 |
+
for result in results:
|
| 366 |
+
if hasattr(result, 'result'):
|
| 367 |
+
combined_info.append(str(result.result))
|
| 368 |
+
if hasattr(result, 'confidence'):
|
| 369 |
+
confidence_scores.append(result.confidence)
|
| 370 |
+
elif isinstance(result, dict):
|
| 371 |
+
combined_info.append(str(result))
|
| 372 |
+
confidence_scores.append(0.5) # Default confidence
|
| 373 |
+
|
| 374 |
+
# Create synthesis prompt
|
| 375 |
+
synthesis_prompt = f"""
|
| 376 |
+
Based on multi-step research for this question, provide a direct answer:
|
| 377 |
+
|
| 378 |
+
Question: {state.question}
|
| 379 |
+
|
| 380 |
+
Research Plan Type: {plan['question_type']}
|
| 381 |
+
|
| 382 |
+
Research Findings:
|
| 383 |
+
{chr(10).join(f"Step {i+1}: {info}" for i, info in enumerate(combined_info))}
|
| 384 |
+
|
| 385 |
+
Please provide a direct, precise answer based on the research findings.
|
| 386 |
+
"""
|
| 387 |
+
|
| 388 |
+
# Use appropriate model for synthesis
|
| 389 |
+
model_tier = ModelTier.COMPLEX if len(results) > 2 else ModelTier.MAIN
|
| 390 |
+
llm_result = self.llm_client.generate(synthesis_prompt, tier=model_tier, max_tokens=300)
|
| 391 |
+
|
| 392 |
+
avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.5
|
| 393 |
+
|
| 394 |
+
if llm_result.success:
|
| 395 |
+
return AgentResult(
|
| 396 |
+
agent_role=AgentRole.WEB_RESEARCHER,
|
| 397 |
+
success=True,
|
| 398 |
+
result=llm_result.response,
|
| 399 |
+
confidence=min(0.85, avg_confidence + 0.1), # Boost for multi-step research
|
| 400 |
+
reasoning=f"Multi-step research completed with {len(results)} steps: {plan['question_type']}",
|
| 401 |
+
tools_used=[],
|
| 402 |
+
model_used=llm_result.model_used,
|
| 403 |
+
processing_time=total_time + llm_result.response_time,
|
| 404 |
+
cost_estimate=total_cost + llm_result.cost_estimate
|
| 405 |
+
)
|
| 406 |
+
else:
|
| 407 |
+
# Fallback to best single result
|
| 408 |
+
best_info = combined_info[0] if combined_info else "Multi-step research completed"
|
| 409 |
+
return AgentResult(
|
| 410 |
+
agent_role=AgentRole.WEB_RESEARCHER,
|
| 411 |
+
success=True,
|
| 412 |
+
result=best_info,
|
| 413 |
+
confidence=avg_confidence,
|
| 414 |
+
reasoning=f"Multi-step research completed, synthesis failed",
|
| 415 |
+
tools_used=[],
|
| 416 |
+
model_used="fallback",
|
| 417 |
+
processing_time=total_time,
|
| 418 |
+
cost_estimate=total_cost
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
def _refine_research_plan(self, question: str, original_plan: Dict[str, Any],
|
| 422 |
+
previous_result: AgentResult) -> Dict[str, Any]:
|
| 423 |
+
"""Refine research plan when initial attempt yields poor results"""
|
| 424 |
+
|
| 425 |
+
refined_plan = original_plan.copy()
|
| 426 |
+
|
| 427 |
+
# Add refinement strategies based on why previous attempt failed
|
| 428 |
+
if previous_result and previous_result.confidence < 0.3:
|
| 429 |
+
# Very low confidence - try different approach
|
| 430 |
+
refined_plan['steps'] = [
|
| 431 |
+
{'action': 'broaden_search', 'details': 'Use broader search terms'},
|
| 432 |
+
{'action': 'alternative_sources', 'details': 'Try different information sources'},
|
| 433 |
+
{'action': 'relaxed_matching', 'details': 'Accept partial matches'}
|
| 434 |
+
]
|
| 435 |
+
elif not previous_result or not previous_result.success:
|
| 436 |
+
# Complete failure - simplify approach
|
| 437 |
+
refined_plan['steps'] = [
|
| 438 |
+
{'action': 'simple_search', 'details': 'Basic web search with key terms'},
|
| 439 |
+
{'action': 'extract_any_info', 'details': 'Extract any relevant information'}
|
| 440 |
+
]
|
| 441 |
+
|
| 442 |
+
refined_plan['refinement_attempt'] = True
|
| 443 |
+
return refined_plan
|
| 444 |
+
|
| 445 |
def _determine_research_strategy(self, question: str, file_name: Optional[str] = None) -> str:
|
| 446 |
"""Determine the best research strategy for the question"""
|
| 447 |
|
src/app.py
CHANGED
|
@@ -10,7 +10,7 @@ import logging
|
|
| 10 |
import time
|
| 11 |
import requests
|
| 12 |
import pandas as pd
|
| 13 |
-
from typing import Optional, Tuple, Dict
|
| 14 |
import tempfile
|
| 15 |
from pathlib import Path
|
| 16 |
import json
|
|
@@ -22,8 +22,9 @@ logging.basicConfig(level=logging.INFO)
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
# Import our workflow
|
| 25 |
-
from workflow.gaia_workflow import SimpleGAIAWorkflow
|
| 26 |
from models.qwen_client import QwenClient
|
|
|
|
| 27 |
|
| 28 |
# Constants for Unit 4 API
|
| 29 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
@@ -419,6 +420,84 @@ class GAIAAgentApp:
|
|
| 419 |
"Compare the GDP of Japan and Germany in 2023 and tell me the difference",
|
| 420 |
]
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
|
| 423 |
"""
|
| 424 |
Check what scopes are available with the OAuth token
|
|
@@ -1864,5 +1943,56 @@ def main():
|
|
| 1864 |
|
| 1865 |
interface.launch(**launch_kwargs)
|
| 1866 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1867 |
if __name__ == "__main__":
|
| 1868 |
main()
|
|
|
|
| 10 |
import time
|
| 11 |
import requests
|
| 12 |
import pandas as pd
|
| 13 |
+
from typing import Optional, Tuple, Dict, Any
|
| 14 |
import tempfile
|
| 15 |
from pathlib import Path
|
| 16 |
import json
|
|
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
# Import our workflow
|
| 25 |
+
from workflow.gaia_workflow import SimpleGAIAWorkflow, create_gaia_workflow
|
| 26 |
from models.qwen_client import QwenClient
|
| 27 |
+
from models.gaia_state import GAIAAgentState
|
| 28 |
|
| 29 |
# Constants for Unit 4 API
|
| 30 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
| 420 |
"Compare the GDP of Japan and Germany in 2023 and tell me the difference",
|
| 421 |
]
|
| 422 |
|
| 423 |
+
def process_with_langgraph(self, question: str, question_id: str = None) -> Dict[str, Any]:
|
| 424 |
+
"""
|
| 425 |
+
Process question using enhanced LangGraph workflow with multi-phase planning
|
| 426 |
+
"""
|
| 427 |
+
try:
|
| 428 |
+
logger.info(f"📝 Processing question with enhanced LangGraph workflow: {question[:100]}...")
|
| 429 |
+
|
| 430 |
+
# Create enhanced state with proper initialization
|
| 431 |
+
state = GAIAAgentState(
|
| 432 |
+
question=question,
|
| 433 |
+
question_id=question_id,
|
| 434 |
+
file_name=None, # File handling would be added here if needed
|
| 435 |
+
file_content=None
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
# Create enhanced workflow with multi-step planning
|
| 439 |
+
workflow = create_gaia_workflow(self.llm_client, self.tools)
|
| 440 |
+
|
| 441 |
+
logger.info("🚀 Starting enhanced multi-phase workflow execution")
|
| 442 |
+
|
| 443 |
+
# Execute workflow with enhanced planning and refinement
|
| 444 |
+
result_state = workflow.invoke(state)
|
| 445 |
+
|
| 446 |
+
# Extract enhanced results
|
| 447 |
+
processing_details = {
|
| 448 |
+
"steps": result_state.processing_steps,
|
| 449 |
+
"agents_used": [r.agent_role.value for r in result_state.agent_results],
|
| 450 |
+
"router_analysis": getattr(result_state, 'router_analysis', {}),
|
| 451 |
+
"agent_sequence": getattr(result_state, 'agent_sequence', []),
|
| 452 |
+
"total_steps": len(result_state.processing_steps),
|
| 453 |
+
"refinement_attempted": getattr(result_state, 'refinement_attempted', False)
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
# Calculate enhanced confidence based on multi-agent results
|
| 457 |
+
if result_state.agent_results:
|
| 458 |
+
confidences = [r.confidence for r in result_state.agent_results]
|
| 459 |
+
avg_confidence = sum(confidences) / len(confidences)
|
| 460 |
+
max_confidence = max(confidences)
|
| 461 |
+
# Boost confidence for multi-agent consensus
|
| 462 |
+
enhanced_confidence = min(0.95, (avg_confidence + max_confidence) / 2)
|
| 463 |
+
else:
|
| 464 |
+
enhanced_confidence = 0.1
|
| 465 |
+
|
| 466 |
+
return {
|
| 467 |
+
"answer": result_state.final_answer or "Unable to determine answer",
|
| 468 |
+
"confidence": enhanced_confidence,
|
| 469 |
+
"reasoning": result_state.synthesis_reasoning or "Multi-phase processing completed",
|
| 470 |
+
"cost": result_state.total_cost,
|
| 471 |
+
"processing_time": time.time() - result_state.start_time,
|
| 472 |
+
"processing_details": processing_details,
|
| 473 |
+
"agent_results": [
|
| 474 |
+
{
|
| 475 |
+
"agent": r.agent_role.value,
|
| 476 |
+
"success": r.success,
|
| 477 |
+
"confidence": r.confidence,
|
| 478 |
+
"reasoning": r.reasoning[:200] + "..." if len(r.reasoning) > 200 else r.reasoning,
|
| 479 |
+
"processing_time": r.processing_time,
|
| 480 |
+
"cost": r.cost_estimate
|
| 481 |
+
}
|
| 482 |
+
for r in result_state.agent_results
|
| 483 |
+
],
|
| 484 |
+
"errors": result_state.errors
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
except Exception as e:
|
| 488 |
+
error_msg = f"Enhanced LangGraph processing failed: {str(e)}"
|
| 489 |
+
logger.error(error_msg)
|
| 490 |
+
return {
|
| 491 |
+
"answer": "Processing failed with enhanced workflow",
|
| 492 |
+
"confidence": 0.0,
|
| 493 |
+
"reasoning": error_msg,
|
| 494 |
+
"cost": 0.0,
|
| 495 |
+
"processing_time": 0.0,
|
| 496 |
+
"processing_details": {"error": error_msg},
|
| 497 |
+
"agent_results": [],
|
| 498 |
+
"errors": [error_msg]
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
|
| 502 |
"""
|
| 503 |
Check what scopes are available with the OAuth token
|
|
|
|
| 1943 |
|
| 1944 |
interface.launch(**launch_kwargs)
|
| 1945 |
|
| 1946 |
+
def process_question_with_gaia_agent(question_text: str, question_id: str = None,
|
| 1947 |
+
file_name: str = None, file_content: bytes = None) -> Dict[str, Any]:
|
| 1948 |
+
"""
|
| 1949 |
+
Process a GAIA question using enhanced multi-phase planning workflow
|
| 1950 |
+
"""
|
| 1951 |
+
try:
|
| 1952 |
+
logger.info(f"📝 Processing GAIA question with enhanced workflow: {question_text[:100]}...")
|
| 1953 |
+
|
| 1954 |
+
# Create GAIA agent with enhanced capabilities
|
| 1955 |
+
llm_client = QwenClient()
|
| 1956 |
+
gaia_agent = GAIAAgentApp.create_with_qwen_client(llm_client)
|
| 1957 |
+
|
| 1958 |
+
# Use enhanced LangGraph workflow with multi-step planning
|
| 1959 |
+
result = gaia_agent.process_with_langgraph(question_text, question_id)
|
| 1960 |
+
|
| 1961 |
+
# Enhanced result formatting for GAIA compliance
|
| 1962 |
+
enhanced_result = {
|
| 1963 |
+
"question_id": question_id or "unknown",
|
| 1964 |
+
"question": question_text,
|
| 1965 |
+
"answer": result["answer"],
|
| 1966 |
+
"confidence": result["confidence"],
|
| 1967 |
+
"reasoning": result["reasoning"],
|
| 1968 |
+
"cost_estimate": result["cost"],
|
| 1969 |
+
"processing_time": result["processing_time"],
|
| 1970 |
+
"workflow_type": "enhanced_multi_phase",
|
| 1971 |
+
"processing_details": result["processing_details"],
|
| 1972 |
+
"agent_results": result["agent_results"],
|
| 1973 |
+
"success": len(result["errors"]) == 0,
|
| 1974 |
+
"error_messages": result["errors"]
|
| 1975 |
+
}
|
| 1976 |
+
|
| 1977 |
+
return enhanced_result
|
| 1978 |
+
|
| 1979 |
+
except Exception as e:
|
| 1980 |
+
error_msg = f"Enhanced GAIA processing failed: {str(e)}"
|
| 1981 |
+
logger.error(error_msg)
|
| 1982 |
+
return {
|
| 1983 |
+
"question_id": question_id or "unknown",
|
| 1984 |
+
"question": question_text,
|
| 1985 |
+
"answer": "Enhanced processing failed",
|
| 1986 |
+
"confidence": 0.0,
|
| 1987 |
+
"reasoning": error_msg,
|
| 1988 |
+
"cost_estimate": 0.0,
|
| 1989 |
+
"processing_time": 0.0,
|
| 1990 |
+
"workflow_type": "enhanced_multi_phase_failed",
|
| 1991 |
+
"processing_details": {"error": error_msg},
|
| 1992 |
+
"agent_results": [],
|
| 1993 |
+
"success": False,
|
| 1994 |
+
"error_messages": [error_msg]
|
| 1995 |
+
}
|
| 1996 |
+
|
| 1997 |
if __name__ == "__main__":
|
| 1998 |
main()
|
src/workflow/gaia_workflow.py
CHANGED
|
@@ -301,4 +301,242 @@ class SimpleGAIAWorkflow:
|
|
| 301 |
state.final_confidence = 0.0
|
| 302 |
state.final_reasoning = error_msg
|
| 303 |
state.is_complete = True
|
| 304 |
-
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
state.final_confidence = 0.0
|
| 302 |
state.final_reasoning = error_msg
|
| 303 |
state.is_complete = True
|
| 304 |
+
return state
|
| 305 |
+
|
| 306 |
+
def create_gaia_workflow(llm_client, tools_dict):
|
| 307 |
+
"""
|
| 308 |
+
Create an enhanced GAIA workflow with multi-phase planning and iterative refinement
|
| 309 |
+
"""
|
| 310 |
+
|
| 311 |
+
# Initialize agents with enhanced capabilities
|
| 312 |
+
router = RouterAgent(llm_client)
|
| 313 |
+
web_researcher = WebResearchAgent(llm_client)
|
| 314 |
+
file_processor = FileProcessorAgent(llm_client)
|
| 315 |
+
reasoning_agent = ReasoningAgent(llm_client)
|
| 316 |
+
synthesizer = SynthesizerAgent(llm_client)
|
| 317 |
+
|
| 318 |
+
# Enhanced workflow nodes with multi-step processing
|
| 319 |
+
def router_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 320 |
+
"""Enhanced router with multi-phase analysis"""
|
| 321 |
+
logger.info("🧭 Router: Starting multi-phase analysis")
|
| 322 |
+
return router.process(state)
|
| 323 |
+
|
| 324 |
+
def web_researcher_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 325 |
+
"""Web researcher with multi-step planning"""
|
| 326 |
+
logger.info("🌐 Web Researcher: Starting enhanced research")
|
| 327 |
+
return web_researcher.process(state)
|
| 328 |
+
|
| 329 |
+
def file_processor_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 330 |
+
"""File processor with step-by-step analysis"""
|
| 331 |
+
logger.info("📁 File Processor: Starting file analysis")
|
| 332 |
+
return file_processor.process(state)
|
| 333 |
+
|
| 334 |
+
def reasoning_agent_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 335 |
+
"""Reasoning agent with systematic approach"""
|
| 336 |
+
logger.info("🧠 Reasoning Agent: Starting analysis")
|
| 337 |
+
return reasoning_agent.process(state)
|
| 338 |
+
|
| 339 |
+
def synthesizer_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 340 |
+
"""Enhanced synthesizer with verification"""
|
| 341 |
+
logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis")
|
| 342 |
+
return synthesizer.process(state)
|
| 343 |
+
|
| 344 |
+
def should_continue_to_next_agent(state: GAIAAgentState) -> str:
|
| 345 |
+
"""
|
| 346 |
+
Enhanced routing logic that follows the planned agent sequence
|
| 347 |
+
"""
|
| 348 |
+
# Get the planned sequence from router
|
| 349 |
+
agent_sequence = getattr(state, 'agent_sequence', [])
|
| 350 |
+
|
| 351 |
+
if not agent_sequence:
|
| 352 |
+
logger.warning("No agent sequence found, using fallback routing")
|
| 353 |
+
# Fallback to basic routing
|
| 354 |
+
if not state.agent_results:
|
| 355 |
+
return "web_researcher"
|
| 356 |
+
return "synthesizer"
|
| 357 |
+
|
| 358 |
+
# Count how many agents have been executed
|
| 359 |
+
executed_count = len(state.agent_results)
|
| 360 |
+
|
| 361 |
+
# Check if we've executed all planned agents
|
| 362 |
+
if executed_count >= len(agent_sequence):
|
| 363 |
+
return "synthesizer"
|
| 364 |
+
|
| 365 |
+
# Get next agent in sequence
|
| 366 |
+
next_agent = agent_sequence[executed_count]
|
| 367 |
+
|
| 368 |
+
# Map string names to node names
|
| 369 |
+
agent_mapping = {
|
| 370 |
+
'web_researcher': 'web_researcher',
|
| 371 |
+
'file_processor': 'file_processor',
|
| 372 |
+
'reasoning_agent': 'reasoning_agent',
|
| 373 |
+
'synthesizer': 'synthesizer'
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
return agent_mapping.get(next_agent, 'synthesizer')
|
| 377 |
+
|
| 378 |
+
def check_quality_and_refinement(state: GAIAAgentState) -> str:
|
| 379 |
+
"""
|
| 380 |
+
Check if results need refinement before synthesis
|
| 381 |
+
"""
|
| 382 |
+
if not state.agent_results:
|
| 383 |
+
return "synthesizer"
|
| 384 |
+
|
| 385 |
+
# Check overall quality of results
|
| 386 |
+
avg_confidence = sum(r.confidence for r in state.agent_results) / len(state.agent_results)
|
| 387 |
+
|
| 388 |
+
# If confidence is very low and we haven't tried refinement yet
|
| 389 |
+
if avg_confidence < 0.3 and not getattr(state, 'refinement_attempted', False):
|
| 390 |
+
logger.info(f"Low confidence ({avg_confidence:.2f}), attempting refinement")
|
| 391 |
+
state.refinement_attempted = True
|
| 392 |
+
return "refine_approach"
|
| 393 |
+
|
| 394 |
+
return "synthesizer"
|
| 395 |
+
|
| 396 |
+
def refinement_node(state: GAIAAgentState) -> GAIAAgentState:
|
| 397 |
+
"""
|
| 398 |
+
Attempt to refine the approach when initial results are poor
|
| 399 |
+
"""
|
| 400 |
+
logger.info("🔄 Attempting result refinement")
|
| 401 |
+
state.add_processing_step("Workflow: Attempting refinement due to low confidence")
|
| 402 |
+
|
| 403 |
+
# Analyze what went wrong and try a different approach
|
| 404 |
+
router_analysis = getattr(state, 'router_analysis', {})
|
| 405 |
+
|
| 406 |
+
if router_analysis:
|
| 407 |
+
# Try alternative strategy from router analysis
|
| 408 |
+
strategy = router_analysis.get('strategy', {})
|
| 409 |
+
fallback_strategies = strategy.get('fallback_needed', True)
|
| 410 |
+
|
| 411 |
+
if fallback_strategies:
|
| 412 |
+
# Try web research if it wasn't the primary approach
|
| 413 |
+
if not any(r.agent_role == AgentRole.WEB_RESEARCHER for r in state.agent_results):
|
| 414 |
+
return web_researcher.process(state)
|
| 415 |
+
# Try reasoning if web search was done
|
| 416 |
+
elif not any(r.agent_role == AgentRole.REASONING_AGENT for r in state.agent_results):
|
| 417 |
+
return reasoning_agent.process(state)
|
| 418 |
+
|
| 419 |
+
# Fallback: try reasoning agent for additional analysis
|
| 420 |
+
return reasoning_agent.process(state)
|
| 421 |
+
|
| 422 |
+
# Create workflow graph with enhanced routing
|
| 423 |
+
workflow = StateGraph(GAIAAgentState)
|
| 424 |
+
|
| 425 |
+
# Add nodes
|
| 426 |
+
workflow.add_node("router", router_node)
|
| 427 |
+
workflow.add_node("web_researcher", web_researcher_node)
|
| 428 |
+
workflow.add_node("file_processor", file_processor_node)
|
| 429 |
+
workflow.add_node("reasoning_agent", reasoning_agent_node)
|
| 430 |
+
workflow.add_node("refine_approach", refinement_node)
|
| 431 |
+
workflow.add_node("synthesizer", synthesizer_node)
|
| 432 |
+
|
| 433 |
+
# Set entry point
|
| 434 |
+
workflow.set_entry_point("router")
|
| 435 |
+
|
| 436 |
+
# Enhanced routing edges
|
| 437 |
+
workflow.add_conditional_edges(
|
| 438 |
+
"router",
|
| 439 |
+
should_continue_to_next_agent,
|
| 440 |
+
{
|
| 441 |
+
"web_researcher": "web_researcher",
|
| 442 |
+
"file_processor": "file_processor",
|
| 443 |
+
"reasoning_agent": "reasoning_agent",
|
| 444 |
+
"synthesizer": "synthesizer"
|
| 445 |
+
}
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
# Progressive routing with quality checks
|
| 449 |
+
workflow.add_conditional_edges(
|
| 450 |
+
"web_researcher",
|
| 451 |
+
should_continue_to_next_agent,
|
| 452 |
+
{
|
| 453 |
+
"file_processor": "file_processor",
|
| 454 |
+
"reasoning_agent": "reasoning_agent",
|
| 455 |
+
"synthesizer": "synthesizer",
|
| 456 |
+
"refine_approach": "refine_approach"
|
| 457 |
+
}
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
workflow.add_conditional_edges(
|
| 461 |
+
"file_processor",
|
| 462 |
+
should_continue_to_next_agent,
|
| 463 |
+
{
|
| 464 |
+
"web_researcher": "web_researcher",
|
| 465 |
+
"reasoning_agent": "reasoning_agent",
|
| 466 |
+
"synthesizer": "synthesizer",
|
| 467 |
+
"refine_approach": "refine_approach"
|
| 468 |
+
}
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
workflow.add_conditional_edges(
|
| 472 |
+
"reasoning_agent",
|
| 473 |
+
should_continue_to_next_agent,
|
| 474 |
+
{
|
| 475 |
+
"web_researcher": "web_researcher",
|
| 476 |
+
"file_processor": "file_processor",
|
| 477 |
+
"synthesizer": "synthesizer",
|
| 478 |
+
"refine_approach": "refine_approach"
|
| 479 |
+
}
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
# Quality check before synthesis
|
| 483 |
+
workflow.add_conditional_edges(
|
| 484 |
+
"refine_approach",
|
| 485 |
+
check_quality_and_refinement,
|
| 486 |
+
{
|
| 487 |
+
"synthesizer": "synthesizer",
|
| 488 |
+
"refine_approach": "refine_approach" # Allow multiple refinement attempts
|
| 489 |
+
}
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
# Synthesizer is the final step
|
| 493 |
+
workflow.add_edge("synthesizer", END)
|
| 494 |
+
|
| 495 |
+
return workflow.compile()
|
| 496 |
+
|
| 497 |
+
def create_simple_workflow(llm_client, tools_dict):
|
| 498 |
+
"""
|
| 499 |
+
Enhanced simple workflow with better planning and execution
|
| 500 |
+
"""
|
| 501 |
+
# Use same agents as complex workflow for consistency
|
| 502 |
+
router = RouterAgent(llm_client)
|
| 503 |
+
web_researcher = WebResearchAgent(llm_client)
|
| 504 |
+
reasoning_agent = ReasoningAgent(llm_client)
|
| 505 |
+
synthesizer = SynthesizerAgent(llm_client)
|
| 506 |
+
|
| 507 |
+
def process_with_planning(state: GAIAAgentState) -> GAIAAgentState:
|
| 508 |
+
"""Simple but systematic processing with planning"""
|
| 509 |
+
|
| 510 |
+
logger.info("🚀 Starting simple workflow with enhanced planning")
|
| 511 |
+
|
| 512 |
+
# Step 1: Analyze and plan
|
| 513 |
+
state = router.process(state)
|
| 514 |
+
|
| 515 |
+
# Step 2: Execute primary research/reasoning
|
| 516 |
+
agent_sequence = getattr(state, 'agent_sequence', ['web_researcher', 'reasoning_agent'])
|
| 517 |
+
|
| 518 |
+
for agent_name in agent_sequence:
|
| 519 |
+
if agent_name == 'web_researcher':
|
| 520 |
+
state = web_researcher.process(state)
|
| 521 |
+
elif agent_name == 'reasoning_agent':
|
| 522 |
+
state = reasoning_agent.process(state)
|
| 523 |
+
elif agent_name == 'synthesizer':
|
| 524 |
+
break # Synthesizer is handled separately
|
| 525 |
+
|
| 526 |
+
# Early exit if we have high confidence result
|
| 527 |
+
if state.agent_results and state.agent_results[-1].confidence > 0.8:
|
| 528 |
+
logger.info("High confidence result achieved, proceeding to synthesis")
|
| 529 |
+
break
|
| 530 |
+
|
| 531 |
+
# Step 3: Synthesize results
|
| 532 |
+
state = synthesizer.process(state)
|
| 533 |
+
|
| 534 |
+
return state
|
| 535 |
+
|
| 536 |
+
# Create simple workflow graph
|
| 537 |
+
workflow = StateGraph(GAIAAgentState)
|
| 538 |
+
workflow.add_node("process", process_with_planning)
|
| 539 |
+
workflow.set_entry_point("process")
|
| 540 |
+
workflow.add_edge("process", END)
|
| 541 |
+
|
| 542 |
+
return workflow.compile()
|