| """ |
| GAIA Benchmark Agent |
| |
| Uses smolagents CodeAgent with Meta-Llama-3.3-70B-Instruct model. |
| Tools: DuckDuckGo web search, Wikipedia search, URL content fetcher. |
| """ |
|
|
| import yaml |
| import os |
| from dotenv import load_dotenv |
| from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool |
| from tools import custom_tools |
|
|
| load_dotenv() |
|
|
|
|
| def get_gaia_prompt_templates(): |
| """Load default prompts and add GAIA-specific answer formatting instructions.""" |
| import smolagents |
| pkg_path = os.path.dirname(smolagents.__file__) |
| yaml_path = os.path.join(pkg_path, "prompts", "code_agent.yaml") |
|
|
| with open(yaml_path, 'r') as f: |
| templates = yaml.safe_load(f) |
|
|
| |
| gaia_instructions = """ |
| |
| CRITICAL INSTRUCTIONS FOR GAIA BENCHMARK - YOUR FINAL ANSWER FORMAT: |
| |
| You MUST call final_answer() with ONLY the exact answer requested. This is non-negotiable. |
| |
| RULES: |
| 1. final_answer() must contain ONLY the answer - no explanations, no context, no preamble |
| 2. NEVER include "The answer is", "Based on my research", "According to", etc. |
| 3. NEVER include the question or restate what was asked |
| 4. NEVER include units unless the question specifically asks for them |
| 5. NEVER include citations, links, or references in your final answer |
| 6. If asked for a code/abbreviation, give ONLY the code (e.g., "CUB" not "Cuba (CUB)") |
| 7. If asked for a number, give ONLY the number (e.g., "519" not "519 at bats") |
| 8. If asked for a name, give ONLY the name (e.g., "Paris" not "The answer is Paris") |
| 9. For lists: comma-separated, no extra words (e.g., "apple, banana, cherry") |
| 10. Work efficiently - don't loop endlessly. If you have enough information, provide the answer. |
| |
| EXAMPLES OF CORRECT final_answer() USAGE: |
| - Question: "What is the IOC code for Cuba?" → final_answer("CUB") |
| - Question: "How many at bats?" → final_answer("519") |
| - Question: "What NASA award number?" → final_answer("80GSFC21M0002") |
| - Question: "What is the capital of France?" → final_answer("Paris") |
| |
| WRONG (DO NOT DO THIS): |
| - final_answer("The answer is Paris") ❌ |
| - final_answer("Based on my research, the code is CUB") ❌ |
| - final_answer("519 at bats") ❌ |
| - final_answer("The NASA award number is 80GSFC21MXX00") ❌ |
| """ |
|
|
| templates["system_prompt"] = templates["system_prompt"] + gaia_instructions |
| return templates |
|
|
|
|
| def create_agent(verbosity: int = 1, max_steps: int = 10): |
| """Creates a CodeAgent configured for GAIA benchmark questions.""" |
|
|
| |
| model_id = "meta-llama/Llama-3.3-70B-Instruct" |
|
|
| print(f"🔧 Initializing model: {model_id}") |
| model = InferenceClientModel( |
| model_id=model_id, |
| max_tokens=2096, |
| temperature=0.3, |
| timeout=120, |
| ) |
| print("✅ Model initialized") |
|
|
| print("🛠️ Loading tools...") |
| web_search = DuckDuckGoSearchTool() |
| all_tools = [web_search] + custom_tools |
|
|
| print(f"✅ Loaded {len(all_tools)} tools:") |
| for tool in all_tools: |
| print(f" - {tool.name}") |
|
|
| print("\n🤖 Creating CodeAgent...") |
| prompt_templates = get_gaia_prompt_templates() |
|
|
| agent = CodeAgent( |
| model=model, |
| tools=all_tools, |
| max_steps=max_steps, |
| verbosity_level=verbosity, |
| prompt_templates=prompt_templates, |
| additional_authorized_imports=["re", "json", "math", "datetime", "collections"], |
| ) |
|
|
| print("✅ Agent created") |
| print(f" Max steps: {max_steps}") |
| print(f" Verbosity: {verbosity}") |
|
|
| return agent |
|
|
|
|
| def run_agent_on_question(question: str, agent=None, verbosity: int = 1): |
| """Runs the agent on a question and returns the answer.""" |
| if agent is None: |
| agent = create_agent(verbosity=verbosity) |
|
|
| print(f"\n{'='*60}") |
| print(f"Question: {question}") |
| print(f"{'='*60}\n") |
|
|
| try: |
| result = agent.run(question) |
| print(f"\n{'='*60}") |
| print(f"Final Answer: {result}") |
| print(f"{'='*60}\n") |
| return result |
| except Exception as e: |
| error_msg = f"Error running agent: {str(e)}" |
| print(f"\n❌ {error_msg}") |
| return error_msg |
|
|
|
|
| if __name__ == "__main__": |
| print("\n" + "="*70) |
| print("GAIA Benchmark Agent - Test") |
| print("="*70 + "\n") |
|
|
| agent = create_agent(verbosity=2) |
| test_question = "What is 15 multiplied by 23?" |
| answer = run_agent_on_question(test_question, agent) |
| print("\n✅ Test complete!") |
|
|