Spaces:

behavior-in-the-wild
/

SDR-Arena

Running

File size: 5,231 Bytes

f9e2361

"""
BaseResearchAgent - The standard interface for DR-Bench agents.

All agents submitted to the benchmark must implement this interface.
The benchmark framework provides standardized LLM and WebSearch clients
to ensure fair comparison (same model, same search provider for all agents).

The agent's job is purely ORCHESTRATION:
- Deciding what to search for
- How to decompose the research topic
- How to synthesize search results into a final report
- How many iterations/turns to use

Example minimal agent:

    class MyAgent(BaseResearchAgent):
        @property
        def name(self) -> str:
            return "my-simple-agent"

        @property
        def description(self) -> str:
            return "Simple single-pass research agent"

        @property
        def author(self) -> str:
            return "Jane Doe"

        async def research(self, topic, llm, websearch, **kwargs):
            # 1. Generate search queries
            queries_response = await llm.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": f"Generate 3 search queries for: {topic}"}],
            )
            queries = queries_response.choices[0].message.content.split("\\n")

            # 2. Search
            results = await websearch.search(queries[:3], **kwargs)

            # 3. Synthesize
            synthesis = await llm.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "Synthesize search results into a report."},
                    {"role": "user", "content": f"Topic: {topic}\\n\\nSearch Results:\\n{results}"},
                ],
            )
            return ResearchOutput(
                report=synthesis.choices[0].message.content,
                searches_made=[{"queries": queries[:3]}],
            )
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Optional, TYPE_CHECKING

if TYPE_CHECKING:
    from openai import AsyncOpenAI
    from benchmark.websearch import BenchmarkWebSearchClient


@dataclass
class ResearchOutput:
    """Output from a research agent."""
    report: str
    searches_made: list[dict[str, Any]] = field(default_factory=list)
    metadata: dict[str, Any] = field(default_factory=dict)


class BaseResearchAgent(ABC):
    """
    Abstract base class for all DR-Bench research agents.

    Agents receive:
    - An AsyncOpenAI client (connected to the benchmark's standardized LLM)
    - A BenchmarkWebSearchClient (connected to the benchmark's Brightdata-based search)
    - The model name to use with the LLM client

    Agents must implement:
    - name: Agent identifier (alphanumeric + hyphens)
    - description: Short description of the methodology
    - author: Author name / team
    - research(): The core research method

    Agents may optionally import any Python libraries they need for their
    orchestration logic (LangGraph, DSPy, custom code, etc.), as long as
    all LLM calls go through the provided client and all web searches go
    through the provided websearch client.
    """

    def __init__(self, model_name: str = "gpt-4o"):
        """
        Initialize the agent.

        Args:
            model_name: The LLM model name to use (provided by the benchmark).
                        All agents use the same model for fairness.
        """
        self.model_name = model_name

    @property
    @abstractmethod
    def name(self) -> str:
        """Unique identifier for this agent (alphanumeric + hyphens, e.g. 'my-agent-v1')."""
        ...

    @property
    @abstractmethod
    def description(self) -> str:
        """Short description of the agent's methodology."""
        ...

    @property
    @abstractmethod
    def author(self) -> str:
        """Author name or team."""
        ...

    @abstractmethod
    async def research(
        self,
        topic: str,
        llm: AsyncOpenAI,
        websearch: BenchmarkWebSearchClient,
        *,
        start_date: Optional[str] = None,
        end_date: Optional[str] = None,
        **kwargs: Any,
    ) -> ResearchOutput:
        """
        Run research on a given topic.

        Args:
            topic: The research prompt / topic to investigate.
            llm: AsyncOpenAI client - use for all LLM calls.
                 Call with: await llm.chat.completions.create(model=self.model_name, ...)
            websearch: WebSearch client - use for all web searches.
                       Call with: await websearch.search(queries, start_date=..., end_date=...)
            start_date: Optional search date filter start (YYYY-MM-DD).
            end_date: Optional search date filter end (YYYY-MM-DD).
            **kwargs: Additional benchmark-provided parameters.

        Returns:
            ResearchOutput with the research report and metadata.
        """
        ...

    def get_info(self) -> dict[str, str]:
        """Return agent metadata."""
        return {
            "name": self.name,
            "description": self.description,
            "author": self.author,
            "model_name": self.model_name,
        }