Spaces:

behavior-in-the-wild
/

SDR-Arena

Sleeping

App Files Files Community

SDR-Arena / agents /baseline_agent.py

behavior-in-the-wild

Deploy SDR-Arena leaderboard

f9e2361 verified 2 months ago

raw

history blame contribute delete

4.01 kB

	"""
	Baseline Agent - Simple single-pass research agent.

	Strategy:
	1. Ask the LLM to generate search queries from the topic
	2. Execute all queries in one batch
	3. Feed search results back to the LLM for synthesis
	4. Return the synthesized report
	"""

	from __future__ import annotations

	import json
	from typing import Any, Optional

	from benchmark.interface import BaseResearchAgent, ResearchOutput
	from benchmark.websearch import BenchmarkWebSearchClient


	class BaselineAgent(BaseResearchAgent):
	"""Single-pass: generate queries -> search -> synthesize."""

	@property
	def name(self) -> str:
	return "baseline-single-pass"

	@property
	def description(self) -> str:
	return (
	"Simple single-pass agent: generates search queries from the topic, "
	"executes one batch search, and synthesizes results in a single LLM call."
	)

	@property
	def author(self) -> str:
	return "DR-Bench Team"

	async def research(
	self,
	topic: str,
	llm: Any,
	websearch: BenchmarkWebSearchClient,
	*,
	start_date: Optional[str] = None,
	end_date: Optional[str] = None,
	**kwargs,
	) -> ResearchOutput:
	searches_made = []

	# Step 1: Generate search queries
	query_response = await llm.chat.completions.create(
	model=self.model_name,
	messages=[
	{
	"role": "system",
	"content": (
	"You are a research assistant. Given a research topic, "
	"generate 3-5 specific web search queries that would help "
	"gather relevant information. Return ONLY a JSON array of "
	"query strings, nothing else."
	),
	},
	{"role": "user", "content": topic},
	],
	temperature=0.3,
	)

	queries_text = query_response.choices[0].message.content or "[]"
	try:
	queries = json.loads(queries_text)
	if not isinstance(queries, list):
	queries = [queries_text]
	except json.JSONDecodeError:
	queries = [
	q.strip().strip('"').strip("'").lstrip("- ").lstrip("0123456789. ")
	for q in queries_text.strip().split("\n")
	if q.strip()
	]
	queries = [q for q in queries if q][:5]

	# Step 2: Search
	results = await websearch.search(
	queries=queries, start_date=start_date, end_date=end_date,
	)
	searches_made.append({
	"queries": queries,
	"urls": results.get_all_urls(),
	"num_results": results.total_results,
	})

	# Step 3: Synthesize
	search_context = results.get_all_content()
	if len(search_context) > 50000:
	search_context = search_context[:50000] + "\n\n... [truncated]"

	synthesis_response = await llm.chat.completions.create(
	model=self.model_name,
	messages=[
	{
	"role": "system",
	"content": (
	"You are a business development research assistant. "
	"Using the provided web search results, respond to the "
	"user's research request. Be specific, use real facts "
	"from the search results, and cite sources where possible."
	),
	},
	{
	"role": "user",
	"content": (
	f"Research Request:\n{topic}\n\n"
	f"Web Search Results:\n{search_context}"
	),
	},
	],
	temperature=0.4,
	)

	report = synthesis_response.choices[0].message.content or ""
	return ResearchOutput(report=report, searches_made=searches_made)