SDR-Arena / agents /baseline_agent.py
behavior-in-the-wild's picture
Deploy SDR-Arena leaderboard
f9e2361 verified
"""
Baseline Agent - Simple single-pass research agent.
Strategy:
1. Ask the LLM to generate search queries from the topic
2. Execute all queries in one batch
3. Feed search results back to the LLM for synthesis
4. Return the synthesized report
"""
from __future__ import annotations
import json
from typing import Any, Optional
from benchmark.interface import BaseResearchAgent, ResearchOutput
from benchmark.websearch import BenchmarkWebSearchClient
class BaselineAgent(BaseResearchAgent):
"""Single-pass: generate queries -> search -> synthesize."""
@property
def name(self) -> str:
return "baseline-single-pass"
@property
def description(self) -> str:
return (
"Simple single-pass agent: generates search queries from the topic, "
"executes one batch search, and synthesizes results in a single LLM call."
)
@property
def author(self) -> str:
return "DR-Bench Team"
async def research(
self,
topic: str,
llm: Any,
websearch: BenchmarkWebSearchClient,
*,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
**kwargs,
) -> ResearchOutput:
searches_made = []
# Step 1: Generate search queries
query_response = await llm.chat.completions.create(
model=self.model_name,
messages=[
{
"role": "system",
"content": (
"You are a research assistant. Given a research topic, "
"generate 3-5 specific web search queries that would help "
"gather relevant information. Return ONLY a JSON array of "
"query strings, nothing else."
),
},
{"role": "user", "content": topic},
],
temperature=0.3,
)
queries_text = query_response.choices[0].message.content or "[]"
try:
queries = json.loads(queries_text)
if not isinstance(queries, list):
queries = [queries_text]
except json.JSONDecodeError:
queries = [
q.strip().strip('"').strip("'").lstrip("- ").lstrip("0123456789. ")
for q in queries_text.strip().split("\n")
if q.strip()
]
queries = [q for q in queries if q][:5]
# Step 2: Search
results = await websearch.search(
queries=queries, start_date=start_date, end_date=end_date,
)
searches_made.append({
"queries": queries,
"urls": results.get_all_urls(),
"num_results": results.total_results,
})
# Step 3: Synthesize
search_context = results.get_all_content()
if len(search_context) > 50000:
search_context = search_context[:50000] + "\n\n... [truncated]"
synthesis_response = await llm.chat.completions.create(
model=self.model_name,
messages=[
{
"role": "system",
"content": (
"You are a business development research assistant. "
"Using the provided web search results, respond to the "
"user's research request. Be specific, use real facts "
"from the search results, and cite sources where possible."
),
},
{
"role": "user",
"content": (
f"Research Request:\n{topic}\n\n"
f"Web Search Results:\n{search_context}"
),
},
],
temperature=0.4,
)
report = synthesis_response.choices[0].message.content or ""
return ResearchOutput(report=report, searches_made=searches_made)