DeepBoner / src /agents /judge_agent_llm.py
VibecoderMcSwaggins's picture
fix(arch): End-to-end BYOK support for unified architecture
915b009
raw
history blame
1.51 kB
"""LLM Judge for sub-iterations."""
from typing import Any
import structlog
from pydantic_ai import Agent
from src.agent_factory.judges import get_model
from src.utils.models import JudgeAssessment
logger = structlog.get_logger()
class LLMSubIterationJudge:
"""Judge that uses an LLM to assess sub-iteration results."""
def __init__(self, api_key: str | None = None) -> None:
"""Initialize the judge with optional BYOK key.
Args:
api_key: Optional BYOK key (auto-detects provider from prefix).
"""
self.model = get_model(api_key=api_key)
self.agent = Agent(
model=self.model,
output_type=JudgeAssessment,
system_prompt="""You are a strict judge evaluating a research task.
Evaluate if the result is sufficient to answer the task.
Provide scores and detailed reasoning.
If not sufficient, suggest next steps.""",
retries=3,
)
async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
"""Assess the result using LLM."""
logger.info("LLM judge assessing result", task=task[:100], history_len=len(history))
prompt = f"""Task: {task}
Current Result:
{str(result)[:4000]}
History of previous attempts: {len(history)}
Evaluate validity and sufficiency."""
run_result = await self.agent.run(prompt)
logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
return run_result.output