File size: 1,514 Bytes
20f762e 915b009 20f762e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
"""LLM Judge for sub-iterations."""
from typing import Any
import structlog
from pydantic_ai import Agent
from src.agent_factory.judges import get_model
from src.utils.models import JudgeAssessment
logger = structlog.get_logger()
class LLMSubIterationJudge:
"""Judge that uses an LLM to assess sub-iteration results."""
def __init__(self, api_key: str | None = None) -> None:
"""Initialize the judge with optional BYOK key.
Args:
api_key: Optional BYOK key (auto-detects provider from prefix).
"""
self.model = get_model(api_key=api_key)
self.agent = Agent(
model=self.model,
output_type=JudgeAssessment,
system_prompt="""You are a strict judge evaluating a research task.
Evaluate if the result is sufficient to answer the task.
Provide scores and detailed reasoning.
If not sufficient, suggest next steps.""",
retries=3,
)
async def assess(self, task: str, result: Any, history: list[Any]) -> JudgeAssessment:
"""Assess the result using LLM."""
logger.info("LLM judge assessing result", task=task[:100], history_len=len(history))
prompt = f"""Task: {task}
Current Result:
{str(result)[:4000]}
History of previous attempts: {len(history)}
Evaluate validity and sufficiency."""
run_result = await self.agent.run(prompt)
logger.info("LLM judge assessment complete", sufficient=run_result.output.sufficient)
return run_result.output
|