Spaces:
Running
Running
| """ | |
| BaseResearchAgent - The standard interface for DR-Bench agents. | |
| All agents submitted to the benchmark must implement this interface. | |
| The benchmark framework provides standardized LLM and WebSearch clients | |
| to ensure fair comparison (same model, same search provider for all agents). | |
| The agent's job is purely ORCHESTRATION: | |
| - Deciding what to search for | |
| - How to decompose the research topic | |
| - How to synthesize search results into a final report | |
| - How many iterations/turns to use | |
| Example minimal agent: | |
| class MyAgent(BaseResearchAgent): | |
| @property | |
| def name(self) -> str: | |
| return "my-simple-agent" | |
| @property | |
| def description(self) -> str: | |
| return "Simple single-pass research agent" | |
| @property | |
| def author(self) -> str: | |
| return "Jane Doe" | |
| async def research(self, topic, llm, websearch, **kwargs): | |
| # 1. Generate search queries | |
| queries_response = await llm.chat.completions.create( | |
| model=self.model_name, | |
| messages=[{"role": "user", "content": f"Generate 3 search queries for: {topic}"}], | |
| ) | |
| queries = queries_response.choices[0].message.content.split("\\n") | |
| # 2. Search | |
| results = await websearch.search(queries[:3], **kwargs) | |
| # 3. Synthesize | |
| synthesis = await llm.chat.completions.create( | |
| model=self.model_name, | |
| messages=[ | |
| {"role": "system", "content": "Synthesize search results into a report."}, | |
| {"role": "user", "content": f"Topic: {topic}\\n\\nSearch Results:\\n{results}"}, | |
| ], | |
| ) | |
| return ResearchOutput( | |
| report=synthesis.choices[0].message.content, | |
| searches_made=[{"queries": queries[:3]}], | |
| ) | |
| """ | |
| from __future__ import annotations | |
| from abc import ABC, abstractmethod | |
| from dataclasses import dataclass, field | |
| from typing import Any, Optional, TYPE_CHECKING | |
| if TYPE_CHECKING: | |
| from openai import AsyncOpenAI | |
| from benchmark.websearch import BenchmarkWebSearchClient | |
| class ResearchOutput: | |
| """Output from a research agent.""" | |
| report: str | |
| searches_made: list[dict[str, Any]] = field(default_factory=list) | |
| metadata: dict[str, Any] = field(default_factory=dict) | |
| class BaseResearchAgent(ABC): | |
| """ | |
| Abstract base class for all DR-Bench research agents. | |
| Agents receive: | |
| - An AsyncOpenAI client (connected to the benchmark's standardized LLM) | |
| - A BenchmarkWebSearchClient (connected to the benchmark's Brightdata-based search) | |
| - The model name to use with the LLM client | |
| Agents must implement: | |
| - name: Agent identifier (alphanumeric + hyphens) | |
| - description: Short description of the methodology | |
| - author: Author name / team | |
| - research(): The core research method | |
| Agents may optionally import any Python libraries they need for their | |
| orchestration logic (LangGraph, DSPy, custom code, etc.), as long as | |
| all LLM calls go through the provided client and all web searches go | |
| through the provided websearch client. | |
| """ | |
| def __init__(self, model_name: str = "gpt-4o"): | |
| """ | |
| Initialize the agent. | |
| Args: | |
| model_name: The LLM model name to use (provided by the benchmark). | |
| All agents use the same model for fairness. | |
| """ | |
| self.model_name = model_name | |
| def name(self) -> str: | |
| """Unique identifier for this agent (alphanumeric + hyphens, e.g. 'my-agent-v1').""" | |
| ... | |
| def description(self) -> str: | |
| """Short description of the agent's methodology.""" | |
| ... | |
| def author(self) -> str: | |
| """Author name or team.""" | |
| ... | |
| async def research( | |
| self, | |
| topic: str, | |
| llm: AsyncOpenAI, | |
| websearch: BenchmarkWebSearchClient, | |
| *, | |
| start_date: Optional[str] = None, | |
| end_date: Optional[str] = None, | |
| **kwargs: Any, | |
| ) -> ResearchOutput: | |
| """ | |
| Run research on a given topic. | |
| Args: | |
| topic: The research prompt / topic to investigate. | |
| llm: AsyncOpenAI client - use for all LLM calls. | |
| Call with: await llm.chat.completions.create(model=self.model_name, ...) | |
| websearch: WebSearch client - use for all web searches. | |
| Call with: await websearch.search(queries, start_date=..., end_date=...) | |
| start_date: Optional search date filter start (YYYY-MM-DD). | |
| end_date: Optional search date filter end (YYYY-MM-DD). | |
| **kwargs: Additional benchmark-provided parameters. | |
| Returns: | |
| ResearchOutput with the research report and metadata. | |
| """ | |
| ... | |
| def get_info(self) -> dict[str, str]: | |
| """Return agent metadata.""" | |
| return { | |
| "name": self.name, | |
| "description": self.description, | |
| "author": self.author, | |
| "model_name": self.model_name, | |
| } | |