Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

64e1704

1 Parent(s): 5a03810

Final 7.3.3

Browse files

Files changed (7) hide show

src/agents/__pycache__/router.cpython-310.pyc +0 -0
src/agents/__pycache__/state.cpython-310.pyc +0 -0
src/agents/router.py +241 -1
src/agents/state.py +47 -28
src/agents/web_researcher.py +385 -38
src/app.py +132 -2
src/workflow/gaia_workflow.py +239 -1

src/agents/__pycache__/router.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/router.cpython-310.pyc and b/src/agents/__pycache__/router.cpython-310.pyc differ

src/agents/__pycache__/state.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/state.cpython-310.pyc and b/src/agents/__pycache__/state.cpython-310.pyc differ

src/agents/router.py CHANGED Viewed

@@ -22,6 +22,53 @@ class RouterAgent:
     def __init__(self, llm_client: QwenClient):
         self.llm_client = llm_client
     def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
         """
         Main routing function - analyzes question and determines processing strategy
@@ -586,4 +633,197 @@ REASONING: [brief explanation]
         if AgentRole.SYNTHESIZER not in agents:
             agents.append(AgentRole.SYNTHESIZER)
-        return agents

     def __init__(self, llm_client: QwenClient):
         self.llm_client = llm_client
+    def process(self, state: GAIAAgentState) -> GAIAAgentState:
+        """
+        Enhanced routing with multi-phase problem decomposition
+        """
+        logger.info("🧭 Router: Starting multi-phase question analysis")
+        state.add_processing_step("Router: Multi-phase analysis initiated")
+        try:
+            # Phase 1: Structural Analysis
+            structural_analysis = self._analyze_question_structure(state.question)
+            state.add_processing_step(f"Router: Structure = {structural_analysis['type']}")
+            # Phase 2: Information Requirements Analysis
+            info_requirements = self._analyze_information_needs(state.question, structural_analysis)
+            state.add_processing_step(f"Router: Needs = {info_requirements['primary_need']}")
+            # Phase 3: Strategy Planning
+            execution_strategy = self._plan_execution_strategy(state.question, structural_analysis, info_requirements)
+            state.add_processing_step(f"Router: Strategy = {execution_strategy['approach']}")
+            # Phase 4: Agent Selection and Sequencing
+            agent_sequence = self._select_agent_sequence(execution_strategy, info_requirements)
+            # Store analysis in state for agents to use
+            state.router_analysis = {
+                'structural': structural_analysis,
+                'requirements': info_requirements,
+                'strategy': execution_strategy,
+                'sequence': agent_sequence
+            }
+            logger.info(f"✅ Routing complete: {structural_analysis['type']} -> {agent_sequence}")
+            state.add_processing_step(f"Router: Selected agents = {agent_sequence}")
+            # Set agent sequence for workflow
+            state.agent_sequence = agent_sequence
+            return state
+        except Exception as e:
+            error_msg = f"Router analysis failed: {str(e)}"
+            logger.error(error_msg)
+            state.add_error(error_msg)
+            # Fallback to basic routing
+            state.agent_sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
+            return state
     def route_question(self, state: GAIAAgentState) -> GAIAAgentState:
         """
         Main routing function - analyzes question and determines processing strategy
         if AgentRole.SYNTHESIZER not in agents:
             agents.append(AgentRole.SYNTHESIZER)
+        return agents
+    def _analyze_question_structure(self, question: str) -> Dict[str, Any]:
+        """
+        Phase 1: Analyze the structural components of the question
+        """
+        structure = {
+            'type': 'unknown',
+            'complexity': 'medium',
+            'components': [],
+            'data_sources': [],
+            'temporal_aspects': [],
+            'quantitative_aspects': []
+        }
+        question_lower = question.lower()
+        # Identify question type
+        if any(word in question_lower for word in ['how many', 'count', 'number of', 'quantity']):
+            structure['type'] = 'quantitative'
+        elif any(word in question_lower for word in ['who is', 'who was', 'who did', 'name of']):
+            structure['type'] = 'identification'
+        elif any(word in question_lower for word in ['where', 'location', 'place']):
+            structure['type'] = 'location'
+        elif any(word in question_lower for word in ['when', 'date', 'time', 'year']):
+            structure['type'] = 'temporal'
+        elif any(word in question_lower for word in ['what is', 'define', 'explain']):
+            structure['type'] = 'definition'
+        elif any(word in question_lower for word in ['calculate', 'compute', 'solve']):
+            structure['type'] = 'mathematical'
+        elif any(word in question_lower for word in ['compare', 'difference', 'versus']):
+            structure['type'] = 'comparison'
+        elif 'file' in question_lower or 'attached' in question_lower:
+            structure['type'] = 'file_analysis'
+        else:
+            structure['type'] = 'complex_reasoning'
+        # Identify data sources needed
+        if any(term in question_lower for term in ['wikipedia', 'article', 'page']):
+            structure['data_sources'].append('wikipedia')
+        if any(term in question_lower for term in ['video', 'youtube', 'watch']):
+            structure['data_sources'].append('video')
+        if any(term in question_lower for term in ['file', 'attached', 'document']):
+            structure['data_sources'].append('file')
+        if any(term in question_lower for term in ['recent', 'latest', 'current', '2024', '2025']):
+            structure['data_sources'].append('web_search')
+        # Identify temporal aspects
+        import re
+        years = re.findall(r'\b(?:19|20)\d{2}\b', question)
+        dates = re.findall(r'\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2},?\s+\d{4}\b', question_lower)
+        structure['temporal_aspects'] = years + dates
+        # Identify quantitative aspects
+        quantities = re.findall(r'\b\d+(?:\.\d+)?\b', question)
+        structure['quantitative_aspects'] = quantities
+        # Assess complexity
+        complexity_factors = [
+            len(question.split()) > 25,  # Long question
+            len(structure['data_sources']) > 1,  # Multiple sources
+            len(structure['temporal_aspects']) > 1,  # Multiple time periods
+            'and' in question_lower and 'or' in question_lower,  # Multiple conditions
+            question.count('?') > 1,  # Multiple questions
+        ]
+        if sum(complexity_factors) >= 3:
+            structure['complexity'] = 'high'
+        elif sum(complexity_factors) >= 1:
+            structure['complexity'] = 'medium'
+        else:
+            structure['complexity'] = 'low'
+        return structure
+    def _analyze_information_needs(self, question: str, structural: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Phase 2: Analyze what specific information is needed to answer the question
+        """
+        needs = {
+            'primary_need': 'factual_lookup',
+            'information_types': [],
+            'precision_required': 'medium',
+            'verification_needed': False,
+            'synthesis_complexity': 'simple'
+        }
+        # Determine primary information need
+        if structural['type'] == 'quantitative':
+            needs['primary_need'] = 'numerical_data'
+            needs['precision_required'] = 'high'
+        elif structural['type'] == 'identification':
+            needs['primary_need'] = 'entity_identification'
+        elif structural['type'] == 'mathematical':
+            needs['primary_need'] = 'computation'
+            needs['precision_required'] = 'high'
+        elif structural['type'] == 'file_analysis':
+            needs['primary_need'] = 'file_processing'
+        elif structural['type'] == 'comparison':
+            needs['primary_need'] = 'comparative_analysis'
+            needs['verification_needed'] = True
+        else:
+            needs['primary_need'] = 'factual_lookup'
+        # Determine information types needed
+        if 'wikipedia' in structural['data_sources']:
+            needs['information_types'].append('encyclopedic')
+        if 'video' in structural['data_sources']:
+            needs['information_types'].append('multimedia_content')
+        if 'web_search' in structural['data_sources']:
+            needs['information_types'].append('current_information')
+        if 'file' in structural['data_sources']:
+            needs['information_types'].append('document_analysis')
+        # Assess synthesis complexity
+        if structural['complexity'] == 'high' or len(needs['information_types']) > 2:
+            needs['synthesis_complexity'] = 'complex'
+        elif len(needs['information_types']) > 1:
+            needs['synthesis_complexity'] = 'moderate'
+        return needs
+    def _plan_execution_strategy(self, question: str, structural: Dict[str, Any], requirements: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Phase 3: Plan the execution strategy based on analysis
+        """
+        strategy = {
+            'approach': 'sequential',
+            'parallel_possible': False,
+            'iterative_refinement': False,
+            'fallback_needed': True,
+            'verification_steps': []
+        }
+        # Determine approach
+        if requirements['primary_need'] == 'file_processing':
+            strategy['approach'] = 'file_first'
+        elif requirements['primary_need'] == 'computation':
+            strategy['approach'] = 'reasoning_first'
+        elif len(requirements['information_types']) > 2:
+            strategy['approach'] = 'multi_source'
+            strategy['parallel_possible'] = True
+        elif 'current_information' in requirements['information_types']:
+            strategy['approach'] = 'web_first'
+        else:
+            strategy['approach'] = 'knowledge_first'
+        # Determine if iterative refinement is needed
+        if (structural['complexity'] == 'high' or
+            requirements['precision_required'] == 'high' or
+            requirements['verification_needed']):
+            strategy['iterative_refinement'] = True
+        # Plan verification steps
+        if requirements['verification_needed']:
+            strategy['verification_steps'] = ['cross_reference', 'consistency_check']
+        if requirements['precision_required'] == 'high':
+            strategy['verification_steps'].append('precision_validation')
+        return strategy
+    def _select_agent_sequence(self, strategy: Dict[str, Any], requirements: Dict[str, Any]) -> List[str]:
+        """
+        Phase 4: Select the optimal sequence of agents based on strategy
+        """
+        sequence = []
+        # Base sequence based on approach
+        if strategy['approach'] == 'file_first':
+            sequence = ['file_processor', 'reasoning_agent', 'synthesizer']
+        elif strategy['approach'] == 'reasoning_first':
+            sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
+        elif strategy['approach'] == 'web_first':
+            sequence = ['web_researcher', 'reasoning_agent', 'synthesizer']
+        elif strategy['approach'] == 'knowledge_first':
+            sequence = ['web_researcher', 'reasoning_agent', 'synthesizer']
+        elif strategy['approach'] == 'multi_source':
+            sequence = ['web_researcher', 'file_processor', 'reasoning_agent', 'synthesizer']
+        else:  # sequential
+            sequence = ['reasoning_agent', 'web_researcher', 'synthesizer']
+        # Add verification agents if needed
+        if strategy['iterative_refinement']:
+            # Insert reasoning agent before synthesizer for verification
+            if 'reasoning_agent' in sequence:
+                sequence.remove('reasoning_agent')
+            sequence.insert(-1, 'reasoning_agent')  # Before synthesizer
+        # Ensure synthesizer is always last
+        if 'synthesizer' in sequence:
+            sequence.remove('synthesizer')
+        sequence.append('synthesizer')
+        return sequence

src/agents/state.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Literal
 from dataclasses import dataclass, field
 from enum import Enum
 import time
 class QuestionType(Enum):
     """Classification of GAIA question types"""
@@ -65,38 +66,54 @@ class GAIAAgentState:
     This is passed between all agents in the LangGraph workflow
     """
-    def __init__(self):
         # Question information
-        self.task_id: str = ""
-        self.question: str = ""
-        self.question_type: QuestionType = QuestionType.UNKNOWN
         self.difficulty_level: int = 1  # 1, 2, or 3
-        self.file_name: Optional[str] = None
         self.file_path: Optional[str] = None
         self.metadata: Dict[str, Any] = {}
         # Routing decisions
-        self.routing_decision: Dict[str, Any] = {}
-        self.selected_agents: List[AgentRole] = []
-        self.complexity_assessment: str = "medium"
         self.estimated_cost: float = 0.0
         # Agent results
-        self.agent_results: Dict[AgentRole, AgentResult] = {}
         self.tool_results: List[ToolResult] = []
         # Final answer
-        self.final_answer: str = ""
-        self.final_confidence: float = 0.0
-        self.final_reasoning: str = ""
         self.answer_source: str = ""  # Which agent provided the final answer
         # System tracking
-        self.start_time: float = time.time()
-        self.processing_steps: List[str] = []
-        self.total_cost: float = 0.0
         self.total_processing_time: float = 0.0
-        self.error_messages: List[str] = []
         # Status flags
         self.is_complete: bool = False
@@ -109,7 +126,7 @@ class GAIAAgentState:
     def add_agent_result(self, result: AgentResult):
         """Add result from an agent"""
-        self.agent_results[result.agent_role] = result
         self.total_cost += result.cost_estimate
         self.total_processing_time += result.processing_time
         self.add_processing_step(f"{result.agent_role.value}: {result.result[:50]}...")
@@ -121,14 +138,14 @@ class GAIAAgentState:
     def add_error(self, error_message: str):
         """Add an error message"""
-        self.error_messages.append(error_message)
         self.add_processing_step(f"ERROR: {error_message}")
     def get_best_result(self) -> Optional[AgentResult]:
         """Get the agent result with highest confidence"""
         if not self.agent_results:
             return None
-        return max(self.agent_results.values(), key=lambda r: r.confidence)
     def should_use_complex_model(self) -> bool:
         """Determine if complex model should be used based on state"""
@@ -145,9 +162,9 @@ class GAIAAgentState:
     def get_summary(self) -> Dict[str, Any]:
         """Get a summary of the current state"""
         return {
-            "task_id": self.task_id,
-            "question_type": self.question_type.value,
-            "agents_used": [role.value for role in self.agent_results.keys()],
             "tools_used": [tool.tool_name for tool in self.tool_results],
             "final_answer": self.final_answer,
             "confidence": self.final_confidence,
@@ -155,15 +172,15 @@ class GAIAAgentState:
             "total_cost": self.total_cost,
             "steps_count": len(self.processing_steps),
             "is_complete": self.is_complete,
-            "error_count": len(self.error_messages)
         }
     def to_dict(self) -> Dict[str, Any]:
         """Convert state to dictionary for serialization"""
         return {
-            "task_id": self.task_id,
             "question": self.question,
-            "question_type": self.question_type.value,
             "difficulty_level": self.difficulty_level,
             "file_name": self.file_name,
             "file_path": self.file_path,
@@ -172,14 +189,16 @@ class GAIAAgentState:
             "complexity_assessment": self.complexity_assessment,
             "final_answer": self.final_answer,
             "final_confidence": self.final_confidence,
-            "final_reasoning": self.final_reasoning,
             "answer_source": self.answer_source,
             "processing_steps": self.processing_steps,
             "total_cost": self.total_cost,
             "total_processing_time": self.total_processing_time,
-            "error_messages": self.error_messages,
             "is_complete": self.is_complete,
-            "summary": self.get_summary()
         }
 # Type alias for LangGraph

 from dataclasses import dataclass, field
 from enum import Enum
 import time
+import uuid
 class QuestionType(Enum):
     """Classification of GAIA question types"""
     This is passed between all agents in the LangGraph workflow
     """
+    def __init__(self, question: str, question_id: str = None, file_name: str = None, file_content: bytes = None):
+        self.question = question
+        self.question_id = question_id or str(uuid.uuid4())
+        self.file_name = file_name
+        self.file_content = file_content
+        # Analysis results
+        self.question_type: Optional[QuestionType] = None
+        self.question_types: List[QuestionType] = []
+        self.primary_question_type: Optional[QuestionType] = None
+        self.complexity_assessment: str = "medium"
+        self.selected_agents: List[AgentRole] = []
+        # Enhanced router analysis
+        self.router_analysis: Optional[Dict[str, Any]] = None
+        self.agent_sequence: List[str] = []
+        # Processing tracking
+        self.processing_steps: List[str] = []
+        self.agent_results: List[AgentResult] = []
+        self.errors: List[str] = []
+        self.start_time: float = time.time()
+        self.total_cost: float = 0.0
+        # Final results
+        self.final_answer: Optional[str] = None
+        self.final_confidence: float = 0.0
+        self.synthesis_reasoning: str = ""
+        # Routing decision tracking
+        self.routing_decision: Dict[str, Any] = {}
         # Question information
         self.difficulty_level: int = 1  # 1, 2, or 3
         self.file_path: Optional[str] = None
         self.metadata: Dict[str, Any] = {}
         # Routing decisions
         self.estimated_cost: float = 0.0
         # Agent results
         self.tool_results: List[ToolResult] = []
         # Final answer
         self.answer_source: str = ""  # Which agent provided the final answer
         # System tracking
         self.total_processing_time: float = 0.0
         # Status flags
         self.is_complete: bool = False
     def add_agent_result(self, result: AgentResult):
         """Add result from an agent"""
+        self.agent_results.append(result)
         self.total_cost += result.cost_estimate
         self.total_processing_time += result.processing_time
         self.add_processing_step(f"{result.agent_role.value}: {result.result[:50]}...")
     def add_error(self, error_message: str):
         """Add an error message"""
+        self.errors.append(error_message)
         self.add_processing_step(f"ERROR: {error_message}")
     def get_best_result(self) -> Optional[AgentResult]:
         """Get the agent result with highest confidence"""
         if not self.agent_results:
             return None
+        return max(self.agent_results, key=lambda r: r.confidence)
     def should_use_complex_model(self) -> bool:
         """Determine if complex model should be used based on state"""
     def get_summary(self) -> Dict[str, Any]:
         """Get a summary of the current state"""
         return {
+            "task_id": self.question_id,
+            "question_type": self.question_type.value if self.question_type else "unknown",
+            "agents_used": [role.value for role in self.selected_agents],
             "tools_used": [tool.tool_name for tool in self.tool_results],
             "final_answer": self.final_answer,
             "confidence": self.final_confidence,
             "total_cost": self.total_cost,
             "steps_count": len(self.processing_steps),
             "is_complete": self.is_complete,
+            "error_count": len(self.errors)
         }
     def to_dict(self) -> Dict[str, Any]:
         """Convert state to dictionary for serialization"""
         return {
+            "task_id": self.question_id,
             "question": self.question,
+            "question_type": self.question_type.value if self.question_type else "unknown",
             "difficulty_level": self.difficulty_level,
             "file_name": self.file_name,
             "file_path": self.file_path,
             "complexity_assessment": self.complexity_assessment,
             "final_answer": self.final_answer,
             "final_confidence": self.final_confidence,
+            "final_reasoning": self.synthesis_reasoning,
             "answer_source": self.answer_source,
             "processing_steps": self.processing_steps,
             "total_cost": self.total_cost,
             "total_processing_time": self.total_processing_time,
+            "error_messages": self.errors,
             "is_complete": self.is_complete,
+            "summary": self.get_summary(),
+            "router_analysis": self.router_analysis,
+            "agent_sequence": self.agent_sequence
         }
 # Type alias for LangGraph

src/agents/web_researcher.py CHANGED Viewed

@@ -29,54 +29,43 @@ class WebResearchAgent:
     def process(self, state: GAIAAgentState) -> GAIAAgentState:
         """
-        Process web research questions using Wikipedia and web search
         """
         logger.info(f"Web researcher processing: {state.question[:100]}...")
-        state.add_processing_step("Web Researcher: Starting research")
         try:
-            # Determine research strategy
-            strategy = self._determine_research_strategy(state.question, state.file_name)
-            state.add_processing_step(f"Web Researcher: Strategy = {strategy}")
-            # Execute research with enhanced error handling
-            result = None
-            try:
-                # Execute research based on strategy
-                if strategy == "wikipedia_direct":
-                    result = self._research_wikipedia_direct(state)
-                elif strategy == "wikipedia_search":
-                    result = self._research_wikipedia_search(state)
-                elif strategy == "youtube_analysis":
-                    result = self._research_youtube(state)
-                elif strategy == "web_search":
-                    result = self._research_web_general(state)
-                elif strategy == "url_extraction":
-                    result = self._research_url_content(state)
-                else:
-                    result = self._research_multi_source(state)
-            except Exception as strategy_error:
-                logger.warning(f"Strategy {strategy} failed: {strategy_error}, trying fallback")
-                # Try fallback strategy
-                try:
-                    result = self._research_fallback_strategy(state, str(strategy_error))
-                except Exception as fallback_error:
-                    logger.error(f"Fallback strategy also failed: {fallback_error}")
-                    result = self._create_basic_response(state, f"Research failed: {fallback_error}")
-            # Ensure we always have a valid result
-            if not result or not isinstance(result, AgentResult):
-                result = self._create_basic_response(state, "No research results available")
             # Add result to state
-            state.add_agent_result(result)
-            state.add_processing_step(f"Web Researcher: Completed with confidence {result.confidence:.2f}")
             return state
         except Exception as e:
-            error_msg = f"Web research failed: {str(e)}"
             state.add_error(error_msg)
             logger.error(error_msg)
@@ -85,8 +74,8 @@ class WebResearchAgent:
                 agent_role=AgentRole.WEB_RESEARCHER,
                 success=False,
                 result=f"Research encountered difficulties: {str(e)}",
-                confidence=0.1,  # Very low but not zero to allow synthesis
-                reasoning=f"Exception during web research: {str(e)}",
                 tools_used=[],
                 model_used="error",
                 processing_time=0.0,
@@ -95,6 +84,364 @@ class WebResearchAgent:
             state.add_agent_result(failure_result)
             return state
     def _determine_research_strategy(self, question: str, file_name: Optional[str] = None) -> str:
         """Determine the best research strategy for the question"""

     def process(self, state: GAIAAgentState) -> GAIAAgentState:
         """
+        Enhanced multi-step research processing with systematic problem decomposition
         """
         logger.info(f"Web researcher processing: {state.question[:100]}...")
+        state.add_processing_step("Web Researcher: Starting enhanced multi-step research")
         try:
+            # Step 1: Analyze router's decomposition if available
+            router_analysis = getattr(state, 'router_analysis', None)
+            if router_analysis:
+                state.add_processing_step("Web Researcher: Using router analysis")
+                research_plan = self._build_research_plan_from_router(state.question, router_analysis)
+            else:
+                state.add_processing_step("Web Researcher: Creating independent research plan")
+                research_plan = self._create_independent_research_plan(state.question)
+            # Step 2: Execute research plan with iterative refinement
+            results = self._execute_research_plan(state, research_plan)
+            # Step 3: Evaluate results and refine if needed
+            if not results or results.confidence < 0.4:
+                logger.info("Initial research insufficient, attempting refinement")
+                state.add_processing_step("Web Researcher: Refining research approach")
+                refined_plan = self._refine_research_plan(state.question, research_plan, results)
+                results = self._execute_research_plan(state, refined_plan)
+            # Step 4: Finalize results
+            if not results or not isinstance(results, AgentResult):
+                results = self._create_basic_response(state, "Multi-step research completed with limited results")
             # Add result to state
+            state.add_agent_result(results)
+            state.add_processing_step(f"Web Researcher: Completed with confidence {results.confidence:.2f}")
             return state
         except Exception as e:
+            error_msg = f"Enhanced web research failed: {str(e)}"
             state.add_error(error_msg)
             logger.error(error_msg)
                 agent_role=AgentRole.WEB_RESEARCHER,
                 success=False,
                 result=f"Research encountered difficulties: {str(e)}",
+                confidence=0.1,
+                reasoning=f"Exception during enhanced web research: {str(e)}",
                 tools_used=[],
                 model_used="error",
                 processing_time=0.0,
             state.add_agent_result(failure_result)
             return state
+    def _build_research_plan_from_router(self, question: str, router_analysis: Dict[str, Any]) -> Dict[str, Any]:
+        """Build research plan using router's structural analysis"""
+        structural = router_analysis.get('structural', {})
+        requirements = router_analysis.get('requirements', {})
+        strategy = router_analysis.get('strategy', {})
+        plan = {
+            'question_type': structural.get('type', 'unknown'),
+            'primary_need': requirements.get('primary_need', 'factual_lookup'),
+            'data_sources': structural.get('data_sources', []),
+            'approach': strategy.get('approach', 'sequential'),
+            'steps': [],
+            'fallback_strategies': []
+        }
+        # Build step-by-step research plan
+        if plan['question_type'] == 'quantitative':
+            plan['steps'] = [
+                {'action': 'identify_entity', 'details': 'Extract the main subject/entity'},
+                {'action': 'gather_data', 'details': 'Find relevant numerical data'},
+                {'action': 'verify_timeframe', 'details': 'Ensure data matches time constraints'},
+                {'action': 'extract_count', 'details': 'Extract specific count/quantity'}
+            ]
+        elif plan['question_type'] == 'identification':
+            plan['steps'] = [
+                {'action': 'parse_subject', 'details': 'Identify what/who to find'},
+                {'action': 'context_search', 'details': 'Search for relevant context'},
+                {'action': 'verify_identity', 'details': 'Confirm identity from sources'}
+            ]
+        else:
+            plan['steps'] = [
+                {'action': 'decompose_query', 'details': 'Break down complex question'},
+                {'action': 'research_components', 'details': 'Research each component'},
+                {'action': 'synthesize_findings', 'details': 'Combine results'}
+            ]
+        # Add fallback strategies
+        plan['fallback_strategies'] = [
+            'broaden_search_terms',
+            'try_alternative_sources',
+            'use_partial_information'
+        ]
+        return plan
+    def _create_independent_research_plan(self, question: str) -> Dict[str, Any]:
+        """Create research plan when router analysis isn't available"""
+        # Analyze question independently
+        plan = {
+            'question_type': 'general_research',
+            'primary_need': 'factual_lookup',
+            'data_sources': [],
+            'approach': 'sequential',
+            'steps': [],
+            'fallback_strategies': []
+        }
+        question_lower = question.lower()
+        # Determine research approach based on question patterns
+        if any(term in question_lower for term in ['how many', 'count', 'number']):
+            plan['question_type'] = 'quantitative'
+            plan['steps'] = [
+                {'action': 'extract_entity', 'details': 'Find the main subject'},
+                {'action': 'search_entity_data', 'details': 'Search for subject information'},
+                {'action': 'extract_quantities', 'details': 'Find numerical data'},
+                {'action': 'apply_constraints', 'details': 'Apply time/condition filters'}
+            ]
+        elif any(term in question_lower for term in ['who', 'name', 'identity']):
+            plan['question_type'] = 'identification'
+            plan['steps'] = [
+                {'action': 'parse_context', 'details': 'Understand context clues'},
+                {'action': 'search_individuals', 'details': 'Search for people/entities'},
+                {'action': 'verify_match', 'details': 'Confirm identity match'}
+            ]
+        elif any(term in question_lower for term in ['wikipedia', 'article']):
+            plan['question_type'] = 'wikipedia_specific'
+            plan['data_sources'] = ['wikipedia']
+            plan['steps'] = [
+                {'action': 'extract_topic', 'details': 'Identify Wikipedia topic'},
+                {'action': 'search_wikipedia', 'details': 'Search Wikipedia directly'},
+                {'action': 'extract_metadata', 'details': 'Get article details'}
+            ]
+        else:
+            plan['steps'] = [
+                {'action': 'analyze_question', 'details': 'Break down question components'},
+                {'action': 'multi_source_search', 'details': 'Search multiple sources'},
+                {'action': 'consolidate_results', 'details': 'Combine findings'}
+            ]
+        # Standard fallback strategies
+        plan['fallback_strategies'] = [
+            'simplify_search_terms',
+            'try_broader_keywords',
+            'search_related_topics'
+        ]
+        return plan
+    def _execute_research_plan(self, state: GAIAAgentState, plan: Dict[str, Any]) -> AgentResult:
+        """Execute the research plan step by step"""
+        logger.info(f"Executing research plan: {plan['question_type']} with {len(plan['steps'])} steps")
+        accumulated_results = []
+        total_processing_time = 0.0
+        total_cost = 0.0
+        for i, step in enumerate(plan['steps'], 1):
+            logger.info(f"Step {i}/{len(plan['steps'])}: {step['action']} - {step['details']}")
+            state.add_processing_step(f"Web Research Step {i}: {step['action']}")
+            try:
+                step_result = self._execute_research_step(state, step, plan, accumulated_results)
+                if step_result:
+                    accumulated_results.append(step_result)
+                    total_processing_time += getattr(step_result, 'execution_time', 0.0)
+                    total_cost += getattr(step_result, 'cost_estimate', 0.0)
+            except Exception as e:
+                logger.warning(f"Step {i} failed: {e}, continuing with next step")
+                state.add_processing_step(f"Web Research Step {i}: Failed - {str(e)}")
+                continue
+        # Synthesize accumulated results
+        if accumulated_results:
+            return self._synthesize_research_results(state, accumulated_results, plan, total_processing_time, total_cost)
+        else:
+            return self._create_failure_result("All research steps failed")
+    def _execute_research_step(self, state: GAIAAgentState, step: Dict[str, Any],
+                              plan: Dict[str, Any], previous_results: List) -> Any:
+        """Execute a single research step"""
+        action = step['action']
+        if action == 'extract_entity' or action == 'identify_entity':
+            return self._extract_main_entity(state.question)
+        elif action == 'search_entity_data' or action == 'gather_data':
+            entity = self._get_entity_from_results(previous_results)
+            return self._search_entity_information(entity, state.question)
+        elif action == 'extract_quantities' or action == 'extract_count':
+            return self._extract_numerical_data(previous_results, state.question)
+        elif action == 'search_wikipedia':
+            topic = self._extract_wikipedia_topic(state.question)
+            return self.wikipedia_tool.execute(topic)
+        elif action == 'multi_source_search':
+            search_terms = self._extract_search_terms(state.question)
+            return self._research_multi_source_enhanced(state, search_terms)
+        else:
+            # Default: general web search
+            search_terms = self._extract_search_terms(state.question)
+            return self.web_search_tool.execute(search_terms)
+    def _extract_main_entity(self, question: str) -> Dict[str, Any]:
+        """Extract the main entity/subject from the question"""
+        # Use simple heuristics and patterns to extract main entity
+        import re
+        # Look for quoted entities
+        quoted = re.findall(r'"([^"]+)"', question)
+        if quoted:
+            return {'type': 'quoted_entity', 'entity': quoted[0], 'confidence': 0.9}
+        # Look for proper nouns (capitalized words)
+        words = question.split()
+        proper_nouns = []
+        for word in words:
+            clean_word = re.sub(r'[^\w]', '', word)
+            if clean_word and clean_word[0].isupper() and len(clean_word) > 1:
+                proper_nouns.append(clean_word)
+        if proper_nouns:
+            entity = ' '.join(proper_nouns[:3])  # Take first few proper nouns
+            return {'type': 'proper_noun', 'entity': entity, 'confidence': 0.7}
+        # Fallback: use question keywords
+        keywords = self._extract_search_terms(question, max_length=50)
+        return {'type': 'keywords', 'entity': keywords, 'confidence': 0.5}
+    def _search_entity_information(self, entity_data: Dict[str, Any], question: str) -> Any:
+        """Search for information about the extracted entity"""
+        if not entity_data or 'entity' not in entity_data:
+            return None
+        entity = entity_data['entity']
+        # Try Wikipedia first for entities
+        wiki_result = self.wikipedia_tool.execute(entity)
+        if wiki_result.success and wiki_result.result.get('found'):
+            return wiki_result
+        # Fallback to web search
+        search_query = f"{entity} {self._extract_search_terms(question, max_length=30)}"
+        return self.web_search_tool.execute(search_query)
+    def _extract_numerical_data(self, previous_results: List, question: str) -> Dict[str, Any]:
+        """Extract numerical data from previous search results"""
+        numerical_data = {
+            'numbers_found': [],
+            'context': [],
+            'confidence': 0.0
+        }
+        for result in previous_results:
+            if hasattr(result, 'result') and result.result:
+                text = str(result.result)
+                # Extract numbers with context
+                import re
+                number_patterns = [
+                    r'\b(\d+)\s*(albums?|songs?|tracks?|releases?)\b',
+                    r'\b(\d+)\s*(studio|live|compilation)\s*(albums?)\b',
+                    r'\bbetween\s*(\d{4})\s*and\s*(\d{4})\b',
+                    r'\b(\d+)\b'  # Any number as fallback
+                ]
+                for pattern in number_patterns:
+                    matches = re.findall(pattern, text, re.IGNORECASE)
+                    for match in matches:
+                        if isinstance(match, tuple):
+                            numerical_data['numbers_found'].extend(match)
+                        else:
+                            numerical_data['numbers_found'].append(match)
+        if numerical_data['numbers_found']:
+            numerical_data['confidence'] = 0.8
+        return numerical_data
+    def _get_entity_from_results(self, results: List) -> str:
+        """Extract entity name from previous results"""
+        for result in results:
+            if isinstance(result, dict) and 'entity' in result:
+                return result['entity']
+        return ""
+    def _research_multi_source_enhanced(self, state: GAIAAgentState, search_terms: str) -> Any:
+        """Enhanced multi-source research with systematic approach"""
+        sources_tried = []
+        # Try Wikipedia first for factual information
+        wiki_result = self.wikipedia_tool.execute(search_terms)
+        if wiki_result.success and wiki_result.result.get('found'):
+            sources_tried.append(('Wikipedia', wiki_result))
+        # Try web search for additional information
+        web_result = self.web_search_tool.execute({
+            "query": search_terms,
+            "action": "search",
+            "limit": 3
+        })
+        if web_result.success and web_result.result.get('found'):
+            sources_tried.append(('Web', web_result))
+        return {'sources': sources_tried, 'primary_terms': search_terms}
+    def _synthesize_research_results(self, state: GAIAAgentState, results: List, plan: Dict[str, Any],
+                                   total_time: float, total_cost: float) -> AgentResult:
+        """Synthesize results from multi-step research"""
+        # Combine information from all steps
+        combined_info = []
+        confidence_scores = []
+        for result in results:
+            if hasattr(result, 'result'):
+                combined_info.append(str(result.result))
+                if hasattr(result, 'confidence'):
+                    confidence_scores.append(result.confidence)
+            elif isinstance(result, dict):
+                combined_info.append(str(result))
+                confidence_scores.append(0.5)  # Default confidence
+        # Create synthesis prompt
+        synthesis_prompt = f"""
+        Based on multi-step research for this question, provide a direct answer:
+        Question: {state.question}
+        Research Plan Type: {plan['question_type']}
+        Research Findings:
+        {chr(10).join(f"Step {i+1}: {info}" for i, info in enumerate(combined_info))}
+        Please provide a direct, precise answer based on the research findings.
+        """
+        # Use appropriate model for synthesis
+        model_tier = ModelTier.COMPLEX if len(results) > 2 else ModelTier.MAIN
+        llm_result = self.llm_client.generate(synthesis_prompt, tier=model_tier, max_tokens=300)
+        avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.5
+        if llm_result.success:
+            return AgentResult(
+                agent_role=AgentRole.WEB_RESEARCHER,
+                success=True,
+                result=llm_result.response,
+                confidence=min(0.85, avg_confidence + 0.1),  # Boost for multi-step research
+                reasoning=f"Multi-step research completed with {len(results)} steps: {plan['question_type']}",
+                tools_used=[],
+                model_used=llm_result.model_used,
+                processing_time=total_time + llm_result.response_time,
+                cost_estimate=total_cost + llm_result.cost_estimate
+            )
+        else:
+            # Fallback to best single result
+            best_info = combined_info[0] if combined_info else "Multi-step research completed"
+            return AgentResult(
+                agent_role=AgentRole.WEB_RESEARCHER,
+                success=True,
+                result=best_info,
+                confidence=avg_confidence,
+                reasoning=f"Multi-step research completed, synthesis failed",
+                tools_used=[],
+                model_used="fallback",
+                processing_time=total_time,
+                cost_estimate=total_cost
+            )
+    def _refine_research_plan(self, question: str, original_plan: Dict[str, Any],
+                            previous_result: AgentResult) -> Dict[str, Any]:
+        """Refine research plan when initial attempt yields poor results"""
+        refined_plan = original_plan.copy()
+        # Add refinement strategies based on why previous attempt failed
+        if previous_result and previous_result.confidence < 0.3:
+            # Very low confidence - try different approach
+            refined_plan['steps'] = [
+                {'action': 'broaden_search', 'details': 'Use broader search terms'},
+                {'action': 'alternative_sources', 'details': 'Try different information sources'},
+                {'action': 'relaxed_matching', 'details': 'Accept partial matches'}
+            ]
+        elif not previous_result or not previous_result.success:
+            # Complete failure - simplify approach
+            refined_plan['steps'] = [
+                {'action': 'simple_search', 'details': 'Basic web search with key terms'},
+                {'action': 'extract_any_info', 'details': 'Extract any relevant information'}
+            ]
+        refined_plan['refinement_attempt'] = True
+        return refined_plan
     def _determine_research_strategy(self, question: str, file_name: Optional[str] = None) -> str:
         """Determine the best research strategy for the question"""

src/app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import logging
 import time
 import requests
 import pandas as pd
-from typing import Optional, Tuple, Dict
 import tempfile
 from pathlib import Path
 import json
@@ -22,8 +22,9 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Import our workflow
-from workflow.gaia_workflow import SimpleGAIAWorkflow
 from models.qwen_client import QwenClient
 # Constants for Unit 4 API
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -419,6 +420,84 @@ class GAIAAgentApp:
             "Compare the GDP of Japan and Germany in 2023 and tell me the difference",
         ]
 def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
     """
     Check what scopes are available with the OAuth token
@@ -1864,5 +1943,56 @@ def main():
     interface.launch(**launch_kwargs)
 if __name__ == "__main__":
     main()

 import time
 import requests
 import pandas as pd
+from typing import Optional, Tuple, Dict, Any
 import tempfile
 from pathlib import Path
 import json
 logger = logging.getLogger(__name__)
 # Import our workflow
+from workflow.gaia_workflow import SimpleGAIAWorkflow, create_gaia_workflow
 from models.qwen_client import QwenClient
+from models.gaia_state import GAIAAgentState
 # Constants for Unit 4 API
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
             "Compare the GDP of Japan and Germany in 2023 and tell me the difference",
         ]
+    def process_with_langgraph(self, question: str, question_id: str = None) -> Dict[str, Any]:
+        """
+        Process question using enhanced LangGraph workflow with multi-phase planning
+        """
+        try:
+            logger.info(f"📝 Processing question with enhanced LangGraph workflow: {question[:100]}...")
+            # Create enhanced state with proper initialization
+            state = GAIAAgentState(
+                question=question,
+                question_id=question_id,
+                file_name=None,  # File handling would be added here if needed
+                file_content=None
+            )
+            # Create enhanced workflow with multi-step planning
+            workflow = create_gaia_workflow(self.llm_client, self.tools)
+            logger.info("🚀 Starting enhanced multi-phase workflow execution")
+            # Execute workflow with enhanced planning and refinement
+            result_state = workflow.invoke(state)
+            # Extract enhanced results
+            processing_details = {
+                "steps": result_state.processing_steps,
+                "agents_used": [r.agent_role.value for r in result_state.agent_results],
+                "router_analysis": getattr(result_state, 'router_analysis', {}),
+                "agent_sequence": getattr(result_state, 'agent_sequence', []),
+                "total_steps": len(result_state.processing_steps),
+                "refinement_attempted": getattr(result_state, 'refinement_attempted', False)
+            }
+            # Calculate enhanced confidence based on multi-agent results
+            if result_state.agent_results:
+                confidences = [r.confidence for r in result_state.agent_results]
+                avg_confidence = sum(confidences) / len(confidences)
+                max_confidence = max(confidences)
+                # Boost confidence for multi-agent consensus
+                enhanced_confidence = min(0.95, (avg_confidence + max_confidence) / 2)
+            else:
+                enhanced_confidence = 0.1
+            return {
+                "answer": result_state.final_answer or "Unable to determine answer",
+                "confidence": enhanced_confidence,
+                "reasoning": result_state.synthesis_reasoning or "Multi-phase processing completed",
+                "cost": result_state.total_cost,
+                "processing_time": time.time() - result_state.start_time,
+                "processing_details": processing_details,
+                "agent_results": [
+                    {
+                        "agent": r.agent_role.value,
+                        "success": r.success,
+                        "confidence": r.confidence,
+                        "reasoning": r.reasoning[:200] + "..." if len(r.reasoning) > 200 else r.reasoning,
+                        "processing_time": r.processing_time,
+                        "cost": r.cost_estimate
+                    }
+                    for r in result_state.agent_results
+                ],
+                "errors": result_state.errors
+            }
+        except Exception as e:
+            error_msg = f"Enhanced LangGraph processing failed: {str(e)}"
+            logger.error(error_msg)
+            return {
+                "answer": "Processing failed with enhanced workflow",
+                "confidence": 0.0,
+                "reasoning": error_msg,
+                "cost": 0.0,
+                "processing_time": 0.0,
+                "processing_details": {"error": error_msg},
+                "agent_results": [],
+                "errors": [error_msg]
+            }
 def check_oauth_scopes(oauth_token: str) -> Dict[str, any]:
     """
     Check what scopes are available with the OAuth token
     interface.launch(**launch_kwargs)
+def process_question_with_gaia_agent(question_text: str, question_id: str = None,
+                                  file_name: str = None, file_content: bytes = None) -> Dict[str, Any]:
+    """
+    Process a GAIA question using enhanced multi-phase planning workflow
+    """
+    try:
+        logger.info(f"📝 Processing GAIA question with enhanced workflow: {question_text[:100]}...")
+        # Create GAIA agent with enhanced capabilities
+        llm_client = QwenClient()
+        gaia_agent = GAIAAgentApp.create_with_qwen_client(llm_client)
+        # Use enhanced LangGraph workflow with multi-step planning
+        result = gaia_agent.process_with_langgraph(question_text, question_id)
+        # Enhanced result formatting for GAIA compliance
+        enhanced_result = {
+            "question_id": question_id or "unknown",
+            "question": question_text,
+            "answer": result["answer"],
+            "confidence": result["confidence"],
+            "reasoning": result["reasoning"],
+            "cost_estimate": result["cost"],
+            "processing_time": result["processing_time"],
+            "workflow_type": "enhanced_multi_phase",
+            "processing_details": result["processing_details"],
+            "agent_results": result["agent_results"],
+            "success": len(result["errors"]) == 0,
+            "error_messages": result["errors"]
+        }
+        return enhanced_result
+    except Exception as e:
+        error_msg = f"Enhanced GAIA processing failed: {str(e)}"
+        logger.error(error_msg)
+        return {
+            "question_id": question_id or "unknown",
+            "question": question_text,
+            "answer": "Enhanced processing failed",
+            "confidence": 0.0,
+            "reasoning": error_msg,
+            "cost_estimate": 0.0,
+            "processing_time": 0.0,
+            "workflow_type": "enhanced_multi_phase_failed",
+            "processing_details": {"error": error_msg},
+            "agent_results": [],
+            "success": False,
+            "error_messages": [error_msg]
+        }
 if __name__ == "__main__":
     main()

src/workflow/gaia_workflow.py CHANGED Viewed

@@ -301,4 +301,242 @@ class SimpleGAIAWorkflow:
             state.final_confidence = 0.0
             state.final_reasoning = error_msg
             state.is_complete = True
-            return state

             state.final_confidence = 0.0
             state.final_reasoning = error_msg
             state.is_complete = True
+            return state
+def create_gaia_workflow(llm_client, tools_dict):
+    """
+    Create an enhanced GAIA workflow with multi-phase planning and iterative refinement
+    """
+    # Initialize agents with enhanced capabilities
+    router = RouterAgent(llm_client)
+    web_researcher = WebResearchAgent(llm_client)
+    file_processor = FileProcessorAgent(llm_client)
+    reasoning_agent = ReasoningAgent(llm_client)
+    synthesizer = SynthesizerAgent(llm_client)
+    # Enhanced workflow nodes with multi-step processing
+    def router_node(state: GAIAAgentState) -> GAIAAgentState:
+        """Enhanced router with multi-phase analysis"""
+        logger.info("🧭 Router: Starting multi-phase analysis")
+        return router.process(state)
+    def web_researcher_node(state: GAIAAgentState) -> GAIAAgentState:
+        """Web researcher with multi-step planning"""
+        logger.info("🌐 Web Researcher: Starting enhanced research")
+        return web_researcher.process(state)
+    def file_processor_node(state: GAIAAgentState) -> GAIAAgentState:
+        """File processor with step-by-step analysis"""
+        logger.info("📁 File Processor: Starting file analysis")
+        return file_processor.process(state)
+    def reasoning_agent_node(state: GAIAAgentState) -> GAIAAgentState:
+        """Reasoning agent with systematic approach"""
+        logger.info("🧠 Reasoning Agent: Starting analysis")
+        return reasoning_agent.process(state)
+    def synthesizer_node(state: GAIAAgentState) -> GAIAAgentState:
+        """Enhanced synthesizer with verification"""
+        logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis")
+        return synthesizer.process(state)
+    def should_continue_to_next_agent(state: GAIAAgentState) -> str:
+        """
+        Enhanced routing logic that follows the planned agent sequence
+        """
+        # Get the planned sequence from router
+        agent_sequence = getattr(state, 'agent_sequence', [])
+        if not agent_sequence:
+            logger.warning("No agent sequence found, using fallback routing")
+            # Fallback to basic routing
+            if not state.agent_results:
+                return "web_researcher"
+            return "synthesizer"
+        # Count how many agents have been executed
+        executed_count = len(state.agent_results)
+        # Check if we've executed all planned agents
+        if executed_count >= len(agent_sequence):
+            return "synthesizer"
+        # Get next agent in sequence
+        next_agent = agent_sequence[executed_count]
+        # Map string names to node names
+        agent_mapping = {
+            'web_researcher': 'web_researcher',
+            'file_processor': 'file_processor',
+            'reasoning_agent': 'reasoning_agent',
+            'synthesizer': 'synthesizer'
+        }
+        return agent_mapping.get(next_agent, 'synthesizer')
+    def check_quality_and_refinement(state: GAIAAgentState) -> str:
+        """
+        Check if results need refinement before synthesis
+        """
+        if not state.agent_results:
+            return "synthesizer"
+        # Check overall quality of results
+        avg_confidence = sum(r.confidence for r in state.agent_results) / len(state.agent_results)
+        # If confidence is very low and we haven't tried refinement yet
+        if avg_confidence < 0.3 and not getattr(state, 'refinement_attempted', False):
+            logger.info(f"Low confidence ({avg_confidence:.2f}), attempting refinement")
+            state.refinement_attempted = True
+            return "refine_approach"
+        return "synthesizer"
+    def refinement_node(state: GAIAAgentState) -> GAIAAgentState:
+        """
+        Attempt to refine the approach when initial results are poor
+        """
+        logger.info("🔄 Attempting result refinement")
+        state.add_processing_step("Workflow: Attempting refinement due to low confidence")
+        # Analyze what went wrong and try a different approach
+        router_analysis = getattr(state, 'router_analysis', {})
+        if router_analysis:
+            # Try alternative strategy from router analysis
+            strategy = router_analysis.get('strategy', {})
+            fallback_strategies = strategy.get('fallback_needed', True)
+            if fallback_strategies:
+                # Try web research if it wasn't the primary approach
+                if not any(r.agent_role == AgentRole.WEB_RESEARCHER for r in state.agent_results):
+                    return web_researcher.process(state)
+                # Try reasoning if web search was done
+                elif not any(r.agent_role == AgentRole.REASONING_AGENT for r in state.agent_results):
+                    return reasoning_agent.process(state)
+        # Fallback: try reasoning agent for additional analysis
+        return reasoning_agent.process(state)
+    # Create workflow graph with enhanced routing
+    workflow = StateGraph(GAIAAgentState)
+    # Add nodes
+    workflow.add_node("router", router_node)
+    workflow.add_node("web_researcher", web_researcher_node)
+    workflow.add_node("file_processor", file_processor_node)
+    workflow.add_node("reasoning_agent", reasoning_agent_node)
+    workflow.add_node("refine_approach", refinement_node)
+    workflow.add_node("synthesizer", synthesizer_node)
+    # Set entry point
+    workflow.set_entry_point("router")
+    # Enhanced routing edges
+    workflow.add_conditional_edges(
+        "router",
+        should_continue_to_next_agent,
+        {
+            "web_researcher": "web_researcher",
+            "file_processor": "file_processor",
+            "reasoning_agent": "reasoning_agent",
+            "synthesizer": "synthesizer"
+        }
+    )
+    # Progressive routing with quality checks
+    workflow.add_conditional_edges(
+        "web_researcher",
+        should_continue_to_next_agent,
+        {
+            "file_processor": "file_processor",
+            "reasoning_agent": "reasoning_agent",
+            "synthesizer": "synthesizer",
+            "refine_approach": "refine_approach"
+        }
+    )
+    workflow.add_conditional_edges(
+        "file_processor",
+        should_continue_to_next_agent,
+        {
+            "web_researcher": "web_researcher",
+            "reasoning_agent": "reasoning_agent",
+            "synthesizer": "synthesizer",
+            "refine_approach": "refine_approach"
+        }
+    )
+    workflow.add_conditional_edges(
+        "reasoning_agent",
+        should_continue_to_next_agent,
+        {
+            "web_researcher": "web_researcher",
+            "file_processor": "file_processor",
+            "synthesizer": "synthesizer",
+            "refine_approach": "refine_approach"
+        }
+    )
+    # Quality check before synthesis
+    workflow.add_conditional_edges(
+        "refine_approach",
+        check_quality_and_refinement,
+        {
+            "synthesizer": "synthesizer",
+            "refine_approach": "refine_approach"  # Allow multiple refinement attempts
+        }
+    )
+    # Synthesizer is the final step
+    workflow.add_edge("synthesizer", END)
+    return workflow.compile()
+def create_simple_workflow(llm_client, tools_dict):
+    """
+    Enhanced simple workflow with better planning and execution
+    """
+    # Use same agents as complex workflow for consistency
+    router = RouterAgent(llm_client)
+    web_researcher = WebResearchAgent(llm_client)
+    reasoning_agent = ReasoningAgent(llm_client)
+    synthesizer = SynthesizerAgent(llm_client)
+    def process_with_planning(state: GAIAAgentState) -> GAIAAgentState:
+        """Simple but systematic processing with planning"""
+        logger.info("🚀 Starting simple workflow with enhanced planning")
+        # Step 1: Analyze and plan
+        state = router.process(state)
+        # Step 2: Execute primary research/reasoning
+        agent_sequence = getattr(state, 'agent_sequence', ['web_researcher', 'reasoning_agent'])
+        for agent_name in agent_sequence:
+            if agent_name == 'web_researcher':
+                state = web_researcher.process(state)
+            elif agent_name == 'reasoning_agent':
+                state = reasoning_agent.process(state)
+            elif agent_name == 'synthesizer':
+                break  # Synthesizer is handled separately
+            # Early exit if we have high confidence result
+            if state.agent_results and state.agent_results[-1].confidence > 0.8:
+                logger.info("High confidence result achieved, proceeding to synthesis")
+                break
+        # Step 3: Synthesize results
+        state = synthesizer.process(state)
+        return state
+    # Create simple workflow graph
+    workflow = StateGraph(GAIAAgentState)
+    workflow.add_node("process", process_with_planning)
+    workflow.set_entry_point("process")
+    workflow.add_edge("process", END)
+    return workflow.compile()