GaiaAgent

Runtime error

App Files Files Community

sims2k commited on May 4, 2025

Commit

ac6a4ef

verified ·

1 Parent(s): 375e6bb

Upload 8 files

Browse files

Files changed (8) hide show

app.py +375 -0
configuration.py +166 -0
graph.py +574 -0
prompts.py +180 -0
state.py +72 -0
supervisor_node.py +406 -0
tools.py +58 -0
utils.py +76 -0

app.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""Web application for the Agent Supervisor with GAIA benchmark integration.
+This module provides a Gradio web interface for interacting with the Agent Supervisor
+and evaluating it against the GAIA benchmark.
+"""
+import os
+import json
+import uuid
+import asyncio
+import requests
+import pandas as pd
+import gradio as gr
+from typing import Dict, List, Optional
+from langchain_core.messages import HumanMessage
+from langgraph.checkpoint.memory import MemorySaver
+from react_agent.graph import create_agent_supervisor_graph, get_compiled_graph
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class GaiaAgent:
+    """Agent implementation for the GAIA benchmark using the LangGraph supervisor."""
+    def __init__(self, model_name=None, checkpointer=None):
+        """Initialize the GAIA agent with LangGraph architecture.
+        Args:
+            model_name: Optional model name to override the default
+            checkpointer: Optional checkpointer for persistence
+        """
+        print("Initializing GaiaAgent...")
+        # Import Configuration class
+        from react_agent.configuration import Configuration
+        # Get configuration
+        config = Configuration.from_context()
+        default_model = config.model
+        # If no checkpointer provided, create a default one - using MemorySaver to avoid SQLite thread issues
+        if checkpointer is None:
+            from langgraph.checkpoint.memory import MemorySaver
+            checkpointer = MemorySaver()
+            print("Using in-memory checkpointer to avoid thread safety issues")
+        # Create and compile the graph
+        self.graph = get_compiled_graph(checkpointer=checkpointer)
+        # Configure the agent using values from Configuration
+        self.config = {
+            "configurable": {
+                # Use configuration model or override if provided
+                "model": model_name if model_name else default_model,
+                # Import specific models for each role from Configuration
+                "researcher_model": config.researcher_model,
+                "coder_model": config.coder_model,
+                "planner_model": config.planner_model,
+                "supervisor_model": config.supervisor_model,
+                "critic_model": config.critic_model,
+                "final_answer_model": config.final_answer_model,
+                # Other settings from Configuration
+                "max_search_results": config.max_search_results,
+                "recursion_limit": config.recursion_limit,
+                "max_iterations": config.max_iterations,
+                "allow_agent_to_extract_answers": config.allow_agent_to_extract_answers
+            }
+        }
+        print(f"GaiaAgent initialized successfully with model: {self.config['configurable']['model']}")
+    def __call__(self, question: str) -> str:
+        """Process a question and return an answer formatted for GAIA benchmark.
+        Args:
+            question: The GAIA benchmark question
+        Returns:
+            Answer formatted for GAIA benchmark evaluation
+        """
+        print(f"Agent received question: {question[:100]}...")
+        # Create a thread_id for this interaction
+        thread_id = str(uuid.uuid4())
+        self.config["configurable"]["thread_id"] = thread_id
+        # Import configuration
+        from react_agent.configuration import Configuration
+        config = Configuration.from_context()
+        # Add a system prompt to ensure proper GAIA format
+        system_prompt = """You are a general AI assistant. Answer the question concisely.
+        YOUR ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+        If asked for a number, don't use commas or units like $ or % unless specified.
+        If asked for a string, don't use articles or abbreviations (e.g. for cities), and write digits as plain text unless specified otherwise.
+        Focus on brevity and correctness."""
+        # Create input state with the human message and system prompt
+        input_state = {
+            "messages": [HumanMessage(content=question)],
+            "configurable": {
+                "thread_id": thread_id,
+                "system_prompt": system_prompt,
+                "model": config.model  # Ensure model is also set in the state
+            }
+        }
+        # Process the question with our graph
+        try:
+            # Execute the graph and get the final state
+            # Use invoke instead of stream to limit operations
+            try:
+                final_state = self.graph.invoke(input_state, config=self.config)
+            except Exception as e:
+                # If we hit recursion error, try again with higher limit
+                print(f"Initial invocation failed: {str(e)}")
+                # Use double the recursion limit as fallback
+                self.config["configurable"]["recursion_limit"] = config.recursion_limit * 2
+                final_state = self.graph.invoke(input_state, config=self.config)
+            # Extract the answer - either from gaia_answer or from the last message
+            if "gaia_answer" in final_state:
+                answer = final_state["gaia_answer"]
+            else:
+                messages = final_state.get("messages", [])
+                answer = messages[-1].content if messages else "No answer generated."
+            # Clean the answer to ensure proper GAIA format (remove any FINAL ANSWER prefix)
+            if "FINAL ANSWER:" in answer:
+                answer = answer.split("FINAL ANSWER:")[1].strip()
+            print(f"Agent returning answer: {answer[:100]}...")
+            return answer
+        except Exception as e:
+            error_msg = f"Error processing question: {str(e)}"
+            print(error_msg)
+            return error_msg
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Fetches all questions, runs the GaiaAgent on them, submits answers, and displays the results."""
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
+    try:
+        agent = GaiaAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run the Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            answer = agent(question_text)
+            # Format answers according to API requirements - use submitted_answer as required
+            answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": answer
+            })
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Answer": answer
+            })
+        except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Answer": f"AGENT ERROR: {e}"
+            })
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# Function to test a single random question
+def test_random_question():
+    """Fetch a random question from the API and run the agent on it."""
+    api_url = DEFAULT_API_URL
+    random_question_url = f"{api_url}/random-question"
+    try:
+        # Fetch a random question
+        response = requests.get(random_question_url, timeout=15)
+        response.raise_for_status()
+        question_data = response.json()
+        if not question_data:
+            return "Error: Received empty response from random question endpoint.", None
+        task_id = question_data.get("task_id")
+        question_text = question_data.get("question")
+        if not task_id or not question_text:
+            return "Error: Invalid question format received.", None
+        # Initialize agent and get answer
+        agent = GaiaAgent()
+        answer = agent(question_text)
+        # Return results
+        result = {
+            "Task ID": task_id,
+            "Question": question_text,
+            "Answer": answer
+        }
+        return "Test completed successfully.", result
+    except Exception as e:
+        return f"Error testing random question: {str(e)}", None
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Benchmark Agent Evaluation")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
+        3. Alternatively, click 'Test on Random Question' to test the agent on a single random question.
+        ---
+        **Note:** Running the agent on all questions may take some time. Please be patient while the agent processes all the questions.
+        """
+    )
+    gr.LoginButton()
+    with gr.Tabs():
+        with gr.TabItem("Full Evaluation"):
+            run_button = gr.Button("Run Evaluation & Submit All Answers")
+            status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+            results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+            run_button.click(
+                fn=run_and_submit_all,
+                outputs=[status_output, results_table]
+            )
+        with gr.TabItem("Test Single Question"):
+            test_button = gr.Button("Test on Random Question")
+            test_status = gr.Textbox(label="Test Status", lines=2, interactive=False)
+            test_result = gr.JSON(label="Question and Answer")
+            test_button.click(
+                fn=test_random_question,
+                outputs=[test_status, test_result]
+            )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False)

configuration.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""Define the configurable parameters for the agent supervisor system."""
+from __future__ import annotations
+from dataclasses import dataclass, field, fields
+from typing import Annotated
+from langchain_core.runnables import ensure_config
+from langgraph.config import get_config
+from react_agent import prompts
+@dataclass(kw_only=True)
+class Configuration:
+    """The configuration for the agent supervisor system."""
+    # Supervisor configuration
+    supervisor_prompt: str = field(
+        default=prompts.SUPERVISOR_PROMPT,
+        metadata={
+            "description": "The system prompt for the supervisor agent. "
+            "This prompt guides how the supervisor delegates tasks to worker agents."
+        },
+    )
+    # Planner configuration
+    planner_prompt: str = field(
+        default=prompts.PLANNER_PROMPT,
+        metadata={
+            "description": "The system prompt for the planner agent. "
+            "This prompt guides how the planner creates structured plans."
+        },
+    )
+    # Critic configuration
+    critic_prompt: str = field(
+        default=prompts.CRITIC_PROMPT,
+        metadata={
+            "description": "The system prompt for the critic agent. "
+            "This prompt guides how the critic evaluates answers."
+        },
+    )
+    # Worker agents configuration
+    researcher_prompt: str = field(
+        default=prompts.RESEARCHER_PROMPT,
+        metadata={
+            "description": "The system prompt for the researcher agent. "
+            "This prompt defines the researcher's capabilities and limitations."
+        },
+    )
+    coder_prompt: str = field(
+        default=prompts.CODER_PROMPT,
+        metadata={
+            "description": "The system prompt for the coder agent. "
+            "This prompt defines the coder's capabilities and approach to programming tasks."
+        },
+    )
+    # Shared configuration
+    system_prompt: str = field(
+        default=prompts.SYSTEM_PROMPT,
+        metadata={
+            "description": "Legacy system prompt for backward compatibility. "
+            "This prompt is used when running the agent in non-supervisor mode."
+        },
+    )
+    # LLM Configuration - Default model for backward compatibility
+    model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="openai/gpt-4o-mini",
+        metadata={
+            "description": "The default large language model used by the agents (provider/model_name)."
+        },
+    )
+    # Model for the researcher (information gathering) - use powerful model
+    researcher_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="openai/gpt-4o-mini",
+        metadata={
+            "description": "The model used by the researcher agent for gathering information (provider/model_name)."
+        },
+    )
+    # Model for the coder (code execution) - use Claude Sonnet
+    coder_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="anthropic/claude-3-5-sonnet-20240620",
+        metadata={
+            "description": "The model used by the coder agent for programming tasks (provider/model_name)."
+        },
+    )
+    # Model for lightweight reasoning tasks (planner, supervisor, critic)
+    planner_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="google_genai/gemini-1.5-flash",
+        metadata={
+            "description": "The lightweight reasoning model used by the planner, supervisor, and critic (provider/model_name)."
+        },
+    )
+    # Same model used for supervisor and critic (points to planner_model)
+    supervisor_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="google_genai/gemini-1.5-flash",
+        metadata={
+            "description": "The model used by the supervisor for routing (provider/model_name)."
+        },
+    )
+    critic_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="openai/gpt-4o-mini",
+        metadata={
+            "description": "The model used by the critic for evaluation (provider/model_name)."
+        },
+    )
+    # Model for final answer generation - using Claude for precise formatting
+    final_answer_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="anthropic/claude-3-5-sonnet-20240620",
+        metadata={
+            "description": "The model used for generating the final answers in GAIA benchmark format (provider/model_name)."
+        },
+    )
+    # Tool Configuration
+    max_search_results: int = field(
+        default=5,
+        metadata={
+            "description": "The maximum number of search results to return."
+        },
+    )
+    # Execution Configuration
+    recursion_limit: int = field(
+        default=50,
+        metadata={
+            "description": "Maximum number of recursion steps allowed in the LangGraph execution."
+        },
+    )
+    max_iterations: int = field(
+        default=12,
+        metadata={
+            "description": "Maximum number of iterations allowed to prevent infinite loops."
+        },
+    )
+    allow_agent_to_extract_answers: bool = field(
+        default=True,
+        metadata={
+            "description": "Whether to allow the agent to extract answers from context when formatting fails."
+        },
+    )
+    @classmethod
+    def from_context(cls) -> Configuration:
+        """Create a Configuration instance from a RunnableConfig object."""
+        try:
+            config = get_config()
+        except RuntimeError:
+            config = None
+        config = ensure_config(config)
+        configurable = config.get("configurable") or {}
+        _fields = {f.name for f in fields(cls) if f.init}
+        return cls(**{k: v for k, v in configurable.items() if k in _fields})

graph.py ADDED Viewed

	@@ -0,0 +1,574 @@

+"""Define an Agent Supervisor graph with specialized worker agents.
+The supervisor routes tasks to specialized agents based on the query type.
+"""
+from typing import Dict, List, Literal, Optional, Union, Type, cast
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import StateGraph, START, END
+# Import adjusted for compatibility
+from langgraph.prebuilt import create_react_agent  # Try original import path first
+from langgraph.types import Command
+from react_agent.configuration import Configuration
+from react_agent.state import WORKERS, MEMBERS, ROUTING, VERDICTS, State, Router, Plan, PlanStep, CriticVerdict
+from react_agent.tools import TOOLS, tavily_tool, python_repl_tool
+from react_agent.utils import load_chat_model, format_system_prompt, get_message_text
+from react_agent import prompts
+from react_agent.supervisor_node import supervisor_node
+# Compile-time type definitions
+SupervisorDestinations = Literal["planner", "critic", "researcher", "coder", "final_answer", "__end__"]
+WorkerDestination = Literal["supervisor"]
+# Helper function to check if a message is from a user
+def is_user_message(message):
+    """Check if a message is from a user regardless of message format."""
+    if isinstance(message, dict):
+        return message.get("role") == "user"
+    elif isinstance(message, HumanMessage):
+        return True
+    return False
+# Helper function to get message content
+def get_message_content(message):
+    """Extract content from a message regardless of format."""
+    if isinstance(message, dict):
+        return message.get("content", "")
+    elif hasattr(message, "content"):
+        return message.content
+    return ""
+# --- Planner node ---------------------------------------------------------
+def planner_node(state: State) -> Command[WorkerDestination]:
+    """Planning LLM that creates a step-by-step execution plan.
+    Args:
+        state: The current state with messages
+    Returns:
+        Command to update the state with a plan
+    """
+    configuration = Configuration.from_context()
+    # Use the specific planner model
+    planner_llm = load_chat_model(configuration.planner_model)
+    # Track steps
+    steps_taken = state.get("steps_taken", 0)
+    steps_taken += 1
+    # Get the original user question (the latest user message)
+    user_messages = [m for m in state["messages"] if is_user_message(m)]
+    original_question = get_message_content(user_messages[-1]) if user_messages else "Help me"
+    # Create a chat prompt template with proper formatting
+    planner_prompt_template = ChatPromptTemplate.from_messages([
+        ("system", prompts.PLANNER_PROMPT),
+        ("user", "{question}")
+    ])
+    # Format the prompt with the necessary variables
+    formatted_messages = planner_prompt_template.format_messages(
+        question=original_question,
+        system_time=format_system_prompt("{system_time}"),
+        workers=", ".join(WORKERS),
+        worker_options=", ".join([f'"{w}"' for w in WORKERS]),
+        example_worker_1=WORKERS[0] if WORKERS else "researcher",
+        example_worker_2=WORKERS[1] if len(WORKERS) > 1 else "coder"
+    )
+    # Get structured output from the planner model
+    plan = planner_llm.with_structured_output(Plan).invoke(formatted_messages)
+    # Return with updated state
+    return Command(
+        goto="supervisor",
+        update={
+            "plan": plan,
+            "current_step_index": 0,
+            # Add a message to show the plan was created
+            "messages": [
+                HumanMessage(
+                    content=f"Created plan with {len(plan['steps'])} steps",
+                    name="planner"
+                )
+            ],
+            "steps_taken": steps_taken
+        }
+    )
+# --- Final Answer node -----------------------------------------------------
+def final_answer_node(state: State) -> Command[Literal["__end__"]]:
+    """Generate a final answer based on gathered information.
+    Args:
+        state: The current state with messages and context
+    Returns:
+        Command with final answer
+    """
+    configuration = Configuration.from_context()
+    # Track steps
+    steps_taken = state.get("steps_taken", 0)
+    steps_taken += 1
+    # Check if we've exhausted retries and already have a draft answer
+    retry_exhausted = state.get("retry_exhausted", False)
+    draft_answer = state.get("draft_answer")
+    # Variable to store the final answer
+    gaia_answer = ""
+    if retry_exhausted and draft_answer and draft_answer.startswith("FINAL ANSWER:"):
+        # If supervisor already provided a properly formatted answer after exhausting retries,
+        # use it directly without calling the model again
+        import re
+        final_answer_match = re.search(r"FINAL ANSWER:\s*(.*?)(?:\n|$)", draft_answer, re.IGNORECASE)
+        if final_answer_match:
+            gaia_answer = final_answer_match.group(1).strip()
+        else:
+            gaia_answer = "unknown"
+    else:
+        # Use the specific final answer model
+        final_llm = load_chat_model(configuration.final_answer_model)
+        # Get the original user question (the latest user message)
+        user_messages = [m for m in state["messages"] if is_user_message(m)]
+        original_question = get_message_content(user_messages[-1]) if user_messages else "Help me"
+        # Check if we already have a draft answer from supervisor
+        if draft_answer and draft_answer.startswith("FINAL ANSWER:"):
+            # If supervisor already provided a properly formatted answer, use it directly
+            raw_answer = draft_answer
+        else:
+            # Get the context and worker results
+            context = state.get("context", {})
+            worker_results = state.get("worker_results", {})
+            # Compose a prompt for the final answer using the GAIA-specific format
+            final_prompt = ChatPromptTemplate.from_messages([
+                ("system", prompts.FINAL_ANSWER_PROMPT),
+                ("user", prompts.FINAL_ANSWER_USER_PROMPT)
+            ])
+            # Format the context information more effectively
+            context_list = []
+            # First include researcher context as it provides background
+            if "researcher" in context:
+                context_list.append(f"Research information: {context['researcher']}")
+            # Then include coder results which are typically calculations
+            if "coder" in context:
+                context_list.append(f"Calculation results: {context['coder']}")
+            # Add any other workers
+            for worker, content in context.items():
+                if worker not in ["researcher", "coder"]:
+                    context_list.append(f"{worker.capitalize()}: {content}")
+            # Get the final answer
+            formatted_messages = final_prompt.format_messages(
+                question=original_question,
+                context="\n\n".join(context_list)
+            )
+            raw_answer = final_llm.invoke(formatted_messages).content
+        # Extract the answer in GAIA format: "FINAL ANSWER: [x]"
+        import re
+        gaia_answer = raw_answer
+        final_answer_match = re.search(r"FINAL ANSWER:\s*(.*?)(?:\n|$)", raw_answer, re.IGNORECASE)
+        if final_answer_match:
+            gaia_answer = final_answer_match.group(1).strip()
+        # Ensure answer is properly formatted - if we don't have a valid answer
+        # but have sufficient context, try to extract directly
+        if configuration.allow_agent_to_extract_answers and (not gaia_answer or gaia_answer.lower() in ["unknown", "insufficient information"]):
+            context = state.get("context", {})
+            from react_agent.supervisor_node import extract_best_answer_from_context
+            extracted_answer = extract_best_answer_from_context(context)
+            if extracted_answer != "unknown":
+                gaia_answer = extracted_answer
+    # Set status to "final_answer_generated" to indicate we're done
+    return Command(
+        goto=END,
+        update={
+            "messages": [
+                AIMessage(
+                    content=f"FINAL ANSWER: {gaia_answer}",
+                    name="supervisor"
+                )
+            ],
+            "next": "FINISH",  # Update next to indicate we're done
+            "gaia_answer": gaia_answer,  # Store answer in GAIA-compatible format
+            "submitted_answer": gaia_answer,  # Store as submitted_answer for GAIA benchmark
+            "status": "final_answer_generated",  # Add status to indicate we're complete
+            "steps_taken": steps_taken
+        }
+    )
+# --- Critic node ----------------------------------------------------------
+def critic_node(state: State) -> Command[Union[WorkerDestination, SupervisorDestinations]]:
+    """Critic that evaluates if the answer fully satisfies the request.
+    Args:
+        state: The current state with messages and draft answer
+    Returns:
+        Command with evaluation verdict
+    """
+    configuration = Configuration.from_context()
+    # Use the specific critic model
+    critic_llm = load_chat_model(configuration.critic_model)
+    # Track steps
+    steps_taken = state.get("steps_taken", 0)
+    steps_taken += 1
+    # Get the original user question (the latest user message)
+    user_messages = [m for m in state["messages"] if is_user_message(m)]
+    original_question = get_message_content(user_messages[-1]) if user_messages else "Help me"
+    # Get the draft answer
+    draft_answer = state.get("draft_answer", "No answer provided.")
+    # Create a chat prompt template with proper formatting
+    critic_prompt_template = ChatPromptTemplate.from_messages([
+        ("system", prompts.CRITIC_PROMPT),
+        ("user", prompts.CRITIC_USER_PROMPT)
+    ])
+    # Format the prompt with the necessary variables
+    formatted_messages = critic_prompt_template.format_messages(
+        question=original_question,
+        answer=draft_answer,
+        system_time=format_system_prompt("{system_time}"),
+        correct_verdict=VERDICTS[0] if VERDICTS else "CORRECT",
+        retry_verdict=VERDICTS[1] if len(VERDICTS) > 1 else "RETRY"
+    )
+    # Get structured output from the critic model
+    verdict = critic_llm.with_structured_output(CriticVerdict).invoke(formatted_messages)
+    # Add a message about the verdict
+    if verdict["verdict"] == VERDICTS[0]:  # CORRECT
+        verdict_message = "Answer is complete, accurate, and properly formatted for GAIA."
+        goto = "final_answer"  # Go to final answer node if correct
+    else:
+        verdict_message = f"Answer needs improvement. Reason: {verdict.get('reason', 'Unknown')}"
+        goto = "supervisor"
+    # Return with updated state
+    return Command(
+        goto=goto,
+        update={
+            "critic_verdict": verdict,
+            "messages": [
+                HumanMessage(
+                    content=verdict_message,
+                    name="critic"
+                )
+            ],
+            "steps_taken": steps_taken
+        }
+    )
+# --- Worker agent factory -------------------------------------------------
+def create_worker_node(worker_type: str):
+    """Factory function to create a worker node of the specified type.
+    Args:
+        worker_type: The type of worker to create (must be in WORKERS)
+    Returns:
+        A function that processes requests for the specified worker type
+    """
+    if worker_type not in WORKERS:
+        raise ValueError(f"Unknown worker type: {worker_type}")
+    configuration = Configuration.from_context()
+    # Select the appropriate model for each worker type
+    if worker_type == "researcher":
+        llm = load_chat_model(configuration.researcher_model)
+        worker_prompt = prompts.RESEARCHER_PROMPT
+        worker_tools = [tavily_tool]
+    elif worker_type == "coder":
+        llm = load_chat_model(configuration.coder_model)
+        worker_prompt = prompts.CODER_PROMPT
+        worker_tools = [python_repl_tool]
+    else:
+        # Default case
+        llm = load_chat_model(configuration.model)
+        worker_prompt = getattr(prompts, f"{worker_type.upper()}_PROMPT", prompts.SYSTEM_PROMPT)
+        worker_tools = TOOLS
+    # Create the agent
+    worker_agent = create_react_agent(
+        llm,
+        tools=worker_tools,
+        prompt=format_system_prompt(worker_prompt)
+    )
+    # Define node function
+    def worker_node(state: State) -> Command[WorkerDestination]:
+        """Process requests using the specified worker.
+        Args:
+            state: The current conversation state
+        Returns:
+            Command to return to supervisor with results
+        """
+        # Track steps
+        steps_taken = state.get("steps_taken", 0)
+        steps_taken += 1
+        # Get the last message from the supervisor, which contains our task
+        task_message = None
+        if state.get("messages"):
+            for msg in reversed(state["messages"]):
+                if hasattr(msg, "name") and msg.name == "supervisor":
+                    task_message = msg
+                    break
+        if not task_message:
+            return Command(
+                goto="supervisor",
+                update={
+                    "messages": [
+                        HumanMessage(
+                            content=f"Error: No task message found for {worker_type}",
+                            name=worker_type
+                        )
+                    ],
+                    "steps_taken": steps_taken
+                }
+            )
+        # Create a new state with just the relevant messages for this worker
+        # This prevents confusion from unrelated parts of the conversation
+        agent_input = {
+            "messages": [
+                # Include the first user message for context
+                state["messages"][0] if state["messages"] else HumanMessage(content="Help me"),
+                # Include the task message
+                task_message
+            ]
+        }
+        # Invoke the agent with the clean input
+        result = worker_agent.invoke(agent_input)
+        # Extract the result from the agent response
+        result_content = extract_worker_result(worker_type, result, state)
+        # Store the worker's result in shared context
+        context_update = state.get("context", {}).copy()
+        context_update[worker_type] = result_content
+        # Store in worker_results history
+        worker_results = state.get("worker_results", {}).copy()
+        if worker_type not in worker_results:
+            worker_results[worker_type] = []
+        worker_results[worker_type].append(result_content)
+        # Increment the step index after worker completes
+        current_step_index = state.get("current_step_index", 0)
+        return Command(
+            update={
+                "messages": [
+                    HumanMessage(content=result_content, name=worker_type)
+                ],
+                "current_step_index": current_step_index + 1,
+                "context": context_update,
+                "worker_results": worker_results,
+                "steps_taken": steps_taken
+            },
+            goto="supervisor",
+        )
+    return worker_node
+def extract_worker_result(worker_type: str, result: dict, state: State) -> str:
+    """Extract a clean, useful result from the worker's output.
+    This handles different response formats from different worker types.
+    Args:
+        worker_type: The type of worker (researcher or coder)
+        result: The raw result from the worker agent
+        state: The current state for context
+    Returns:
+        A cleaned string with the relevant result information
+    """
+    # Handle empty results
+    if not result or "messages" not in result or not result["messages"]:
+        return f"No output from {worker_type}"
+    # Get the last message from the agent
+    last_message = result["messages"][-1]
+    # Default to extracting content directly
+    if hasattr(last_message, "content") and last_message.content:
+        result_content = last_message.content
+    else:
+        result_content = f"No content from {worker_type}"
+    # Special handling based on worker type
+    if worker_type == "coder":
+        # For coder outputs, extract the actual result values from code execution
+        if "```" in result_content:
+            # Try to extract stdout from code execution
+            import re
+            stdout_match = re.search(r"Stdout:\s*(.*?)(?:\n\n|$)", result_content, re.DOTALL)
+            if stdout_match:
+                # Extract the actual execution output, not just the code
+                execution_result = stdout_match.group(1).strip()
+                if execution_result:
+                    # Check if this is just a simple number result
+                    if re.match(r"^\d+(\.\d+)?$", execution_result):
+                        return execution_result
+                    else:
+                        return f"Code executed with result: {execution_result}"
+            # If we couldn't find stdout, try to extract output in a different way
+            # Look for "Result:" or similar indicators
+            result_match = re.search(r"(?:Result|Output|Answer):\s*(.*?)(?:\n\n|$)", result_content, re.DOTALL)
+            if result_match:
+                return result_match.group(1).strip()
+    elif worker_type == "researcher":
+        # For researcher outputs, keep the full detailed response
+        # but ensure it's well-formatted
+        if len(result_content) > 800:
+            # If too long, try to extract key sections
+            # Look for summary or conclusion sections
+            import re
+            summary_match = re.search(r"(?:Summary|Conclusion|To summarize|In summary):(.*?)(?:\n\n|$)",
+                                      result_content, re.IGNORECASE | re.DOTALL)
+            if summary_match:
+                return summary_match.group(1).strip()
+    # If no special handling was triggered, return the content as is
+    return result_content
+# --- Graph assembly -------------------------------------------------------
+def create_agent_supervisor_graph() -> StateGraph:
+    """Create the agent supervisor graph with all nodes and edges.
+    Returns:
+        Compiled StateGraph ready for execution
+    """
+    # Initialize the graph with our State type
+    builder = StateGraph(State)
+    # Add control nodes
+    builder.add_node("planner", planner_node)
+    builder.add_node("supervisor", supervisor_node)
+    builder.add_node("critic", critic_node)
+    builder.add_node("final_answer", final_answer_node)
+    # Add worker nodes dynamically based on WORKERS list
+    for worker_type in WORKERS:
+        builder.add_node(worker_type, create_worker_node(worker_type))
+    # Define the workflow
+    builder.add_edge(START, "supervisor")
+    builder.add_edge("planner", "supervisor")
+    builder.add_edge("critic", "supervisor")
+    builder.add_edge("critic", "final_answer")  # Add edge from critic to final_answer
+    builder.add_edge("final_answer", END)  # Final answer node goes to END
+    builder.add_edge("supervisor", END)  # Allow the supervisor to end the workflow
+    # Connect all workers to supervisor
+    for worker_type in WORKERS:
+        builder.add_edge(worker_type, "supervisor")
+    # Return the builder, not a compiled graph
+    # This allows the caller to compile with a checkpointer
+    return builder
+# --- Graph instantiation (with flexible checkpointing) -----------------------------
+def get_compiled_graph(checkpointer=None):
+    """Get a compiled graph with optional checkpointer.
+    Args:
+        checkpointer: Optional checkpointer for persistence
+    Returns:
+        Compiled StateGraph ready for execution
+    """
+    # Get configuration
+    configuration = Configuration.from_context()
+    builder = create_agent_supervisor_graph()
+    # Define termination condition function to prevent loops
+    def should_end(state):
+        """Determine if the graph should terminate."""
+        # End if status is set to final_answer_generated
+        if state.get("status") == "final_answer_generated":
+            return True
+        # End if retry_exhausted flag is set and we've gone through final_answer
+        if state.get("retry_exhausted") and state.get("gaia_answer"):
+            return True
+        # End if we've hit maximum recursion limit defined by LangGraph
+        steps_taken = state.get("steps_taken", 0)
+        if steps_taken >= configuration.recursion_limit - 5:  # Leave buffer
+            return True
+        return False
+    # Define step counter for tracking step count
+    def count_steps(state):
+        """Count steps to prevent infinite loops."""
+        steps_taken = state.get("steps_taken", 0)
+        return {"steps_taken": steps_taken + 1}
+    # Compile the graph (don't use add_state_transform which isn't available)
+    if checkpointer:
+        graph = builder.compile(
+            checkpointer=checkpointer,
+            name="Structured Reasoning Loop"
+        )
+    else:
+        graph = builder.compile(
+            name="Structured Reasoning Loop"
+        )
+    # Configure the graph with recursion limit and max iterations
+    graph = graph.with_config({
+        "recursion_limit": configuration.recursion_limit,
+        "max_iterations": configuration.max_iterations
+    })
+    return graph
+# Initialize a default non-checkpointed graph (for backward compatibility)
+graph = get_compiled_graph()

prompts.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""System prompts used by the agent supervisor and worker agents."""
+from react_agent.state import WORKERS, VERDICTS
+# --- Supervisor prompt -----------------------------------------------------
+SUPERVISOR_PROMPT = """You are a supervisor tasked with managing a conversation between the \
+following workers: {workers}. Given the following user request, \
+respond with the worker to act next. Each worker will perform a \
+task and respond with their results and status. When finished, \
+respond with FINISH.
+System time: {system_time}"""
+# --- Planner prompt -------------------------------------------------------
+PLANNER_PROMPT = """**Role**: You are a Planner node in a LangGraph supervisor workflow
+**Goal**: Given the user's original request, create a concise, focused plan that directly answers the question.
+Requirements:
+1. Output only a JSON object with one key `steps`, whose value is an **ordered list** of at least 1 and at most 3 objects.
+   Each object has:
+   • `worker` – one of: {worker_options}
+   • `instruction` – ≤ 20 words telling that worker what to do
+2. Your plan MUST:
+   • Directly address the user's specific question
+   • Include at least one step (never return empty steps)
+   • Be focused on finding the exact answer requested, not the process of answering
+   • Use researcher for information gathering
+   • Use coder for calculations or data analysis if needed
+3. Common tasks:
+   • For factual questions: use researcher to find the specific fact
+   • For calculations: use researcher to find data, then coder to calculate
+   • For multiple-part questions: break into steps with the right workers
+   • Ensure your last step gets the exact answer in the format requested
+Example:
+```
+{{
+  "steps": [
+    {{"worker": "{example_worker_1}", "instruction": "Find inflation rate in 2023"}},
+    {{"worker": "{example_worker_2}", "instruction": "Compute average of 2019–2023 rates"}}
+  ]
+}}
+```
+System time: {system_time}"""
+# --- Critic prompt --------------------------------------------------------
+CRITIC_PROMPT = """**Role**: You are a Critic node specializing in GAIA benchmark format validation
+**Goal**: Strictly check if the answer follows GAIA format requirements
+Requirements:
+1. You will check if the answer:
+   • Addresses all parts of the user's question correctly
+   • Follows the EXACT required GAIA format: "FINAL ANSWER: [concise response]"
+   • Contains ONLY the essential information in the [concise response]:
+     - A single number (no commas, no units like $ or % unless specified)
+     - A single word or very short phrase
+     - A comma-separated list of numbers or strings
+   • Has NO explanations, reasoning, or extra text
+   • For strings: no articles or abbreviations
+   • For numbers: digits only without commas
+2. If the answer is CORRECT, respond ONLY with this exact JSON:
+   • `{{"verdict":"{correct_verdict}"}}`
+3. If ANY requirement is NOT MET, respond with this JSON including a SPECIFIC reason:
+   • `{{"verdict":"{retry_verdict}","reason":"<specific format issue>"}}`
+   • IMPORTANT: You MUST provide a substantive reason that clearly explains what's wrong
+   • NEVER leave the reason empty or only containing quotes
+4. Common reason examples:
+   • "Answer not formatted as 'FINAL ANSWER: [response]'"
+   • "Answer contains explanations instead of just the concise response"
+   • "Answer does not address the question about [specific topic]"
+   • "Answer contains units when it should just be a number"
+DO NOT include any text before or after the JSON. Your complete response must be valid JSON that can be parsed.
+System time: {system_time}"""
+# --- Critic user prompt ---------------------------------------------------
+CRITIC_USER_PROMPT = """Original question: {question}
+Draft answer: {answer}
+Check if the draft answer follows GAIA format requirements:
+1. Format must be exactly "FINAL ANSWER: [concise response]"
+2. [concise response] must ONLY be:
+   - A single number (no commas or units unless specified)
+   - A single word or very short phrase
+   - A comma-separated list of numbers or strings
+3. NO explanations or additional text is allowed
+4. Strings should not have articles or abbreviations
+5. Numbers should be in digits without commas
+Does the answer meet these requirements and correctly answer the question?"""
+# --- Final Answer format for GAIA benchmark -------------------------------
+FINAL_ANSWER_PROMPT = """You are a response formatter for a GAIA benchmark question.
+Your only job is to format the final answer in the exact format required: "FINAL ANSWER: [concise response]"
+Requirements for [concise response]:
+1. Response must ONLY be one of these formats:
+   - A single number (no commas, no units like $ or % unless specified)
+   - A single word or very short phrase
+   - A comma-separated list of numbers or strings
+2. DO NOT include any explanations, reasoning, or extra text
+3. For strings, don't use articles or abbreviations unless specified
+4. For numbers, write digits (not spelled out) without commas
+5. The response should be as concise as possible while being correct
+Original question: {question}
+Information available:
+{context}
+After reviewing the information, extract just the essential answer and output ONLY:
+FINAL ANSWER: [your concise response]
+"""
+# --- Final Answer user prompt ---------------------------------------------
+FINAL_ANSWER_USER_PROMPT = """Original question: {question}
+Information available:
+{context}
+Remember to output ONLY 'FINAL ANSWER: [your concise response]' with no explanations."""
+# --- Worker agent prompts -------------------------------------------------
+RESEARCHER_PROMPT = """You are a research specialist focused on finding information and providing context.
+Your key responsibilities:
+1. Search for accurate, up-to-date information on any topic
+2. Provide factual knowledge about products, concepts, and terminology
+3. Explain real-world contexts and background information
+4. Identify relevant parameters and variables needed for calculations
+5. Present information clearly with proper citations
+DO NOT perform complex calculations or coding tasks - these will be handled by the coder agent.
+You MAY provide simple arithmetic or basic formulas to illustrate concepts.
+Always return information in a structured, organized format that will be useful for the next steps.
+System time: {system_time}
+"""
+CODER_PROMPT = """You are a computational specialist focused on calculations, coding, and data analysis.
+Your key responsibilities:
+1. Write and execute Python code for calculations and data manipulation
+2. Perform precise numerical analyses based on inputs from the researcher
+3. Format results clearly with appropriate units and precision
+4. Use markdown to structure your response with headings and bullet points
+5. Verify calculations through multiple methods when possible
+Important:
+1. Always include both your calculation process AND final result values
+2. Always clearly state your assumptions when making calculations
+3. Format numerical results with appropriate precision and units
+4. When receiving data from the researcher, acknowledge and build upon it directly
+5. If calculation involves multiple steps or cases, organize them with headings
+System time: {system_time}
+"""
+# --- Legacy system prompt (kept for backward compatibility) ---------------
+SYSTEM_PROMPT = """You are a helpful AI assistant.
+System time: {system_time}"""

state.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""Define the state structures for the agent supervisor."""
+from __future__ import annotations
+from typing import Dict, List, Literal, Optional, Sequence, Any
+from langchain_core.messages import AnyMessage
+from langgraph.graph import MessagesState, add_messages
+from typing_extensions import TypedDict, Annotated
+# --- Constants and shared definitions ---------------------------------------
+# Define worker types (specialized agents that perform tasks)
+WORKERS = ["researcher", "coder"]
+# Define all member types (including control nodes)
+MEMBERS = WORKERS + ["planner", "critic", "supervisor"]
+# Define status/routing options
+VERDICTS = ["CORRECT", "RETRY"]
+ROUTING = ["FINISH"] + WORKERS
+OPTIONS = ROUTING + VERDICTS
+# --- Router for supervisor decisions ---------------------------------------
+class Router(TypedDict):
+    """Determines which worker to route to next or if the task is complete.
+    The supervisor returns this structure to navigate the workflow.
+    Valid values are defined in the ROUTING list.
+    """
+    next: Literal[*ROUTING]
+# --- Plan structure for the Planner node -----------------------------------
+class PlanStep(TypedDict):
+    """A single step in the plan created by the Planner."""
+    worker: Literal[*WORKERS]
+    instruction: str
+class Plan(TypedDict):
+    """The complete plan produced by the Planner node."""
+    steps: List[PlanStep]
+# --- Critic verdict structure ----------------------------------------------
+class CriticVerdict(TypedDict):
+    """The verdict from the Critic on whether the answer is satisfactory."""
+    verdict: Literal[*VERDICTS]
+    reason: Optional[str]
+# --- State for the agent supervisor ----------------------------------------
+class State(MessagesState):
+    """State for the agent supervisor workflow.
+    Extends MessagesState which provides message history tracking.
+    Adds fields to track routing information, plan, and critic verdict.
+    """
+    next: str
+    plan: Optional[Plan] = None
+    current_step_index: Optional[int] = None
+    draft_answer: Optional[str] = None
+    critic_verdict: Optional[CriticVerdict] = None
+    context: Dict[str, Any] = {}  # Shared context accessible to all agents
+    worker_results: Dict[str, List[str]] = {}  # Store results from each worker

supervisor_node.py ADDED Viewed

	@@ -0,0 +1,406 @@

+"""Supervisor node implementation for the agent supervisor system."""
+from typing import Dict, List, Literal, Optional, Union, Type, cast
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import StateGraph, START, END
+from langgraph.types import Command
+from react_agent.configuration import Configuration
+from react_agent.state import WORKERS, MEMBERS, ROUTING, VERDICTS, State, Router
+from react_agent.utils import load_chat_model, format_system_prompt, get_message_text
+from react_agent import prompts
+# Compile-time type definitions
+SupervisorDestinations = Literal["planner", "critic", "researcher", "coder", "final_answer", "__end__"]
+def supervisor_node(state: State) -> Command[SupervisorDestinations]:
+    """Supervising LLM that decides which specialized agent should act next.
+    Args:
+        state: The current state with messages
+    Returns:
+        Command with routing information
+    """
+    # Get configuration to use supervisor_model
+    configuration = Configuration.from_context()
+    # Track steps to prevent infinite loops
+    steps_taken = state.get("steps_taken", 0)
+    steps_taken += 1
+    state_updates = {"steps_taken": steps_taken}
+    # Check if we've hit our step limit
+    if steps_taken >= configuration.recursion_limit - 5:  # Buffer of 5 steps
+        # Extract the best answer we have from context if possible
+        context = state.get("context", {})
+        answer = extract_best_answer_from_context(context)
+        return Command(
+            goto="final_answer",
+            update={
+                "messages": [
+                    HumanMessage(
+                        content=f"Maximum steps ({steps_taken}) reached. Extracting best answer from available information.",
+                        name="supervisor"
+                    )
+                ],
+                "draft_answer": f"FINAL ANSWER: {answer}",
+                "retry_exhausted": True,  # Flag to indicate we've exhausted retries
+                "steps_taken": steps_taken
+            }
+        )
+    # Safety check - prevent infinite loops by forcing termination after too many retry steps
+    retry_count = state.get("retry_count", 0)
+    max_retries = 2  # Maximum number of allowed retries
+    if retry_count > max_retries:
+        # Extract the best answer we have from context if possible
+        context = state.get("context", {})
+        answer = extract_best_answer_from_context(context)
+        return Command(
+            goto="final_answer",
+            update={
+                "messages": [
+                    HumanMessage(
+                        content=f"Maximum retries ({max_retries}) reached. Extracting best answer from available information.",
+                        name="supervisor"
+                    )
+                ],
+                "draft_answer": f"FINAL ANSWER: {answer}",
+                "retry_exhausted": True,  # Flag to indicate we've exhausted retries
+                "steps_taken": steps_taken
+            }
+        )
+    # Check if we need a plan
+    if not state.get("plan"):
+        return Command(
+            goto="planner",
+            update={
+                **state_updates
+            }
+        )
+    # Validate that the plan has at least one step
+    plan = state.get("plan")
+    if not plan.get("steps") or len(plan.get("steps", [])) == 0:
+        # Plan has no steps, go back to planner with explicit instructions
+        return Command(
+            goto="planner",
+            update={
+                "messages": [
+                    HumanMessage(
+                        content="Previous plan had 0 steps. Please create a plan with at least 1 step to solve the user's question.",
+                        name="supervisor"
+                    )
+                ],
+                "plan": None,
+                **state_updates
+            }
+        )
+    # Check if we have a critic verdict that requires replanning
+    critic_verdict = state.get("critic_verdict")
+    if critic_verdict:
+        if critic_verdict.get("verdict") == VERDICTS[0]:  # CORRECT
+            # Final answer is approved, navigate to the final_answer node
+            # This will generate a polished response before ending
+            return Command(
+                goto="final_answer",
+                update={
+                    "messages": [
+                        HumanMessage(
+                            content="Answer approved by critic. Generating final response.",
+                            name="supervisor"
+                        )
+                    ]
+                }
+            )
+        elif critic_verdict.get("verdict") == VERDICTS[1]:  # RETRY
+            # IMPORTANT: Get the current retry count BEFORE incrementing
+            current_retry_count = state.get("retry_count", 0)
+            # Check if we're at the maximum allowed retries
+            if current_retry_count >= max_retries:
+                # Extract best answer and go to final_answer
+                context = state.get("context", {})
+                answer = extract_best_answer_from_context(context)
+                return Command(
+                    goto="final_answer",
+                    update={
+                        "messages": [
+                            HumanMessage(
+                                content=f"Maximum retries ({max_retries}) reached. Proceeding with best available answer.",
+                                name="supervisor"
+                            )
+                        ],
+                        "draft_answer": f"FINAL ANSWER: {answer}",
+                        "retry_exhausted": True  # Flag to indicate we've exhausted retries
+                    }
+                )
+            # Reset the plan but KEEP the context from previous iterations
+            context = state.get("context", {})
+            worker_results = state.get("worker_results", {})
+            # Get the critic's reason for rejection, if any
+            reason = critic_verdict.get("reason", "")
+            if not reason or reason.strip() == "\"":
+                reason = "Answer did not meet format requirements"
+            # Check if this is a formatting issue
+            format_issues = [
+                "format", "concise", "explanation", "not formatted",
+                "instead of just", "contains explanations", "FINAL ANSWER"
+            ]
+            is_format_issue = any(issue in reason.lower() for issue in format_issues)
+            # If we have enough information but the format is wrong, go directly to final answer
+            has_sufficient_info = has_sufficient_information(state)
+            if is_format_issue and has_sufficient_info and current_retry_count >= 0:
+                # We have information but formatting is wrong - skip planning and go to final answer
+                return Command(
+                    goto="final_answer",
+                    update={
+                        "messages": [
+                            HumanMessage(
+                                content="We have sufficient information but formatting issues. Generating properly formatted answer.",
+                                name="supervisor"
+                            )
+                        ],
+                        "retry_count": current_retry_count + 1  # Still increment retry count
+                    }
+                )
+            # Increment the retry counter
+            next_retry_count = current_retry_count + 1
+            return Command(
+                goto="planner",
+                update={
+                    "plan": None,
+                    "current_step_index": None,
+                    "draft_answer": None,
+                    "critic_verdict": None,
+                    # Keep the context and worker_results
+                    "context": context,
+                    "worker_results": worker_results,
+                    # Track retries - IMPORTANT: store the incremented count
+                    "retry_count": next_retry_count,
+                    # Add a message about the retry (using the INCREMENTED count)
+                    "messages": [
+                        HumanMessage(
+                            content=f"Retrying with new plan (retry #{next_retry_count}). Reason: {reason}",
+                            name="supervisor"
+                        )
+                    ]
+                }
+            )
+    # Get the current step from the plan
+    plan = state["plan"]
+    current_step_index = state.get("current_step_index", 0)
+    # Check if we've completed all steps
+    if current_step_index >= len(plan["steps"]):
+        # Use context to compile the draft answer
+        context = state.get("context", {})
+        # Combine the most recent worker outputs as the draft answer
+        worker_results = []
+        for worker in WORKERS:
+            if worker in context:
+                worker_results.append(f"**{worker.title()}**: {context[worker]}")
+        # Compile the draft answer from all worker outputs
+        draft_content = "\n\n".join(worker_results)
+        # Send to the critic for evaluation
+        return Command(
+            goto="critic",
+            update={
+                "draft_answer": draft_content,
+                # Add a message about moving to evaluation
+                "messages": [
+                    HumanMessage(
+                        content="All steps completed. Evaluating the answer.",
+                        name="supervisor"
+                    )
+                ]
+            }
+        )
+    # Get the current step
+    current_step = plan["steps"][current_step_index]
+    worker = current_step["worker"]
+    instruction = current_step["instruction"]
+    # Extract only the most relevant context for the current worker and task
+    context_info = ""
+    if state.get("context"):
+        # Filter context by relevance to the current task
+        relevant_context = {}
+        # For the coder, extract numerical data and parameters from researcher
+        if worker == "coder" and "researcher" in state["context"]:
+            relevant_context["researcher"] = state["context"]["researcher"]
+        # For the researcher, previous coder calculations might be relevant
+        if worker == "researcher" and "coder" in state["context"]:
+            # Only include numerical results from coder, not code snippets
+            coder_content = state["context"]["coder"]
+            if len(coder_content) < 100:  # Only short results are likely just numbers
+                relevant_context["coder"] = coder_content
+        # Format the relevant context items
+        context_items = []
+        for key, value in relevant_context.items():
+            # Summarize if value is too long
+            if len(value) > 200:
+                # Find first sentence or up to 200 chars
+                summary = value[:200]
+                if '.' in summary:
+                    summary = summary.split('.')[0] + '.'
+                context_items.append(f"Previous {key} found: {summary}...")
+            else:
+                context_items.append(f"Previous {key} found: {value}")
+        if context_items:
+            context_info = "\n\nRelevant context: " + "\n".join(context_items)
+    # Enhance the instruction with context
+    enhanced_instruction = f"{instruction}{context_info}"
+    # Add guidance based on worker type
+    if worker == "coder":
+        enhanced_instruction += "\nProvide both your calculation method AND the final result value."
+    elif worker == "researcher":
+        enhanced_instruction += "\nFocus on gathering factual information related to the task."
+    # Add the instruction to the messages
+    messages_update = [
+        HumanMessage(
+            content=f"Step {current_step_index + 1}: {enhanced_instruction}",
+            name="supervisor"
+        )
+    ]
+    # Cast worker to appropriate type to satisfy type checking
+    worker_destination = cast(SupervisorDestinations, worker)
+    # Move to the appropriate worker
+    return Command(
+        goto=worker_destination,
+        update={
+            "messages": messages_update,
+            "next": worker,  # For backward compatibility
+            **state_updates
+        }
+    )
+def extract_best_answer_from_context(context):
+    """Extract the best available answer from context.
+    This is a generic function to extract answers from any type of question context.
+    It progressively tries different strategies to find a suitable answer.
+    Args:
+        context: The state context containing worker outputs
+    Returns:
+        Best answer found or "unknown" if nothing suitable is found
+    """
+    answer = "unknown"
+    # First check if the coder already provided a properly formatted answer
+    if "coder" in context:
+        coder_content = context["coder"]
+        # Look for "FINAL ANSWER: X" pattern in the coder output
+        import re
+        answer_match = re.search(r"FINAL ANSWER:\s*(.*?)(?:\n|$)", coder_content, re.IGNORECASE)
+        if answer_match:
+            return answer_match.group(1).strip()
+    # If no answer in coder output, check researcher content
+    if "researcher" in context:
+        researcher_content = context["researcher"]
+        # Look for lists in the researcher content (common pattern)
+        import re
+        # Look for bulleted list items
+        list_items = re.findall(r"[-•*]\s+([^:\n]+)", researcher_content)
+        if list_items:
+            # Format as comma-separated list
+            answer = ",".join(item.strip() for item in list_items)
+            return answer
+        # Look for emphasized/bold items which might be key information
+        bold_items = re.findall(r"\*\*([^*]+)\*\*", researcher_content)
+        if bold_items:
+            # Join the important items as a comma-separated list
+            processed_items = []
+            for item in bold_items:
+                # Remove common filler words and clean up the item
+                clean_item = re.sub(r'(^|\s)(a|an|the|is|are|was|were|be|been)(\s|$)', ' ', item)
+                clean_item = clean_item.strip()
+                if clean_item and len(clean_item) < 30:  # Only include reasonably short items
+                    processed_items.append(clean_item)
+            if processed_items:
+                answer = ",".join(processed_items)
+                return answer
+    # If we still don't have an answer, try to extract common entities
+    combined_content = ""
+    for worker_type, content in context.items():
+        combined_content += " " + content
+    # Look for numbers in the content
+    import re
+    numbers = re.findall(r'\b(\d+(?:\.\d+)?)\b', combined_content)
+    if numbers:
+        answer = numbers[0]  # Use the first number found
+    return answer
+def has_sufficient_information(state):
+    """Determine if we have enough information to generate a final answer.
+    Args:
+        state: The current conversation state
+    Returns:
+        Boolean indicating if we have sufficient information
+    """
+    context = state.get("context", {})
+    # If we have both researcher and coder outputs, we likely have enough info
+    if "researcher" in context and "coder" in context:
+        return True
+    # If we have a substantial researcher output, that might be enough
+    if "researcher" in context and len(context["researcher"]) > 150:
+        return True
+    # If we have any worker output that contains lists or formatted data
+    for worker, content in context.items():
+        if content and (
+            "- " in content or  # Bullet point
+            "•" in content or   # Bullet point
+            "*" in content or   # Emphasis or bullet
+            ":" in content      # Definition or explanation
+        ):
+            return True
+    return False

tools.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""This module provides tools for the agent supervisor.
+It includes:
+- Web Search: For general web results using Tavily.
+- Python REPL: For executing Python code (Use with caution!).
+"""
+from typing import Annotated, List, Any, Callable, Optional, cast
+# Core Tools & Utilities
+from langchain_core.tools import tool
+# Experimental Tools (Use with caution)
+from langchain_experimental.utilities import PythonREPL
+# Use TavilySearchResults from langchain_community like in the notebook
+from langchain_community.tools.tavily_search import TavilySearchResults
+from react_agent.configuration import Configuration
+# Create Tavily tool using configuration from context (more consistent approach)
+def create_tavily_tool():
+    """Create the Tavily search tool with configuration from context.
+    Returns:
+        Configured TavilySearchResults tool
+    """
+    configuration = Configuration.from_context()
+    return TavilySearchResults(max_results=configuration.max_search_results)
+# Initialize the tool
+tavily_tool = create_tavily_tool()
+# --- Python REPL Tool ---
+# WARNING: Executes arbitrary Python code locally. Be extremely careful
+#          about exposing this tool, especially in production environments.
+repl = PythonREPL()
+@tool
+def python_repl_tool(
+    code: Annotated[str, "The python code to execute. Use print(...) to see output."],
+):
+    """Use this to execute python code. If you want to see the output of a value,
+    you should print it out with `print(...)`. This is visible to the user."""
+    try:
+        result = repl.run(code)
+    except BaseException as e:
+        return f"Failed to execute. Error: {repr(e)}"
+    # Filter out potentially sensitive REPL implementation details
+    result_str = f"Successfully executed:\n\`\`\`python\n{code}\n\`\`\`\nStdout: {result}"
+    return result_str
+# --- Tool List ---
+# The list of tools available to the agent supervisor.
+TOOLS: List[Callable[..., Any]] = [tavily_tool, python_repl_tool]

utils.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""Utility & helper functions."""
+import os
+from dotenv import load_dotenv
+from langchain.chat_models import init_chat_model
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import BaseMessage
+import asyncio
+from datetime import UTC, datetime
+from react_agent.state import WORKERS, MEMBERS, ROUTING, VERDICTS
+# Load environment variables from .env file
+load_dotenv()
+def get_message_text(msg: BaseMessage) -> str:
+    """Get the text content of a message."""
+    content = msg.content
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, dict):
+        return content.get("text", "")
+    else:
+        txts = [c if isinstance(c, str) else (c.get("text") or "") for c in content]
+        return "".join(txts).strip()
+def format_system_prompt(prompt_template: str) -> str:
+    """Format a system prompt template with current system time and available agents.
+    Args:
+        prompt_template: The prompt template to format
+    Returns:
+        The formatted prompt with system time and agent information
+    """
+    # Get example workers for templates
+    example_worker_1 = WORKERS[0] if WORKERS else "researcher"
+    example_worker_2 = WORKERS[1] if len(WORKERS) > 1 else "coder"
+    # Get verdicts for templates
+    correct_verdict = VERDICTS[0] if VERDICTS else "CORRECT"
+    retry_verdict = VERDICTS[1] if len(VERDICTS) > 1 else "RETRY"
+    return prompt_template.format(
+        system_time=datetime.now(tz=UTC).isoformat(),
+        workers=", ".join(WORKERS),
+        members=", ".join(MEMBERS),
+        worker_options=", ".join([f'"{w}"' for w in WORKERS]),
+        example_worker_1=example_worker_1,
+        example_worker_2=example_worker_2,
+        correct_verdict=correct_verdict,
+        retry_verdict=retry_verdict
+    )
+def load_chat_model(fully_specified_name: str) -> BaseChatModel:
+    """Load a chat model from a fully specified name.
+    Args:
+        fully_specified_name (str): String in the format 'provider/model'.
+    """
+    provider, model = fully_specified_name.split("/", maxsplit=1)
+    # Special handling for Google Genai models to ensure they're configured for async
+    if provider == "google_genai":
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        # Make sure we have the API key
+        if not os.environ.get("GOOGLE_API_KEY"):
+            raise ValueError("GOOGLE_API_KEY environment variable is required for google_genai models")
+        return ChatGoogleGenerativeAI(model=model)
+    else:
+        return init_chat_model(model, model_provider=provider)