|
|
"""Critic Agent - Evaluates and reviews responses for quality and accuracy""" |
|
|
from typing import Dict, Any |
|
|
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage |
|
|
from langchain_groq import ChatGroq |
|
|
from src.tracing import get_langfuse_callback_handler |
|
|
|
|
|
|
|
|
def load_critic_prompt() -> str: |
|
|
"""Load the critic prompt from file""" |
|
|
try: |
|
|
with open("./prompts/critic_prompt.txt", "r", encoding="utf-8") as f: |
|
|
return f.read().strip() |
|
|
except FileNotFoundError: |
|
|
return """You are a specialized critic agent. Evaluate responses for accuracy, completeness, and quality.""" |
|
|
|
|
|
|
|
|
def critic_agent(state: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
Critic agent that evaluates responses for quality and accuracy |
|
|
""" |
|
|
print("Critic Agent: Evaluating response quality") |
|
|
|
|
|
try: |
|
|
|
|
|
critic_prompt = load_critic_prompt() |
|
|
|
|
|
|
|
|
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.2) |
|
|
|
|
|
|
|
|
callback_handler = get_langfuse_callback_handler() |
|
|
callbacks = [callback_handler] if callback_handler else [] |
|
|
|
|
|
|
|
|
messages = state.get("messages", []) |
|
|
|
|
|
|
|
|
agent_response = state.get("agent_response") |
|
|
if not agent_response: |
|
|
|
|
|
for msg in reversed(messages): |
|
|
if msg.type == "ai": |
|
|
agent_response = msg |
|
|
break |
|
|
|
|
|
if not agent_response: |
|
|
print("Critic Agent: No response to evaluate") |
|
|
return { |
|
|
**state, |
|
|
"critic_assessment": "No response found to evaluate", |
|
|
"quality_score": 0, |
|
|
"current_step": "verification" |
|
|
} |
|
|
|
|
|
|
|
|
user_query = None |
|
|
for msg in reversed(messages): |
|
|
if msg.type == "human": |
|
|
user_query = msg.content |
|
|
break |
|
|
|
|
|
|
|
|
critic_messages = [SystemMessage(content=critic_prompt)] |
|
|
|
|
|
|
|
|
evaluation_request = f""" |
|
|
Please evaluate the following response: |
|
|
|
|
|
Original Query: {user_query or "Unknown query"} |
|
|
|
|
|
Response to Evaluate: |
|
|
{agent_response.content} |
|
|
|
|
|
Provide your evaluation following the format specified in your instructions. |
|
|
""" |
|
|
|
|
|
critic_messages.append(HumanMessage(content=evaluation_request)) |
|
|
|
|
|
|
|
|
evaluation = llm.invoke(critic_messages, config={"callbacks": callbacks}) |
|
|
|
|
|
|
|
|
evaluation_text = evaluation.content.lower() |
|
|
quality_pass = True |
|
|
quality_score = 7 |
|
|
|
|
|
|
|
|
if "fail" in evaluation_text or "poor" in evaluation_text: |
|
|
quality_pass = False |
|
|
quality_score = 3 |
|
|
elif "excellent" in evaluation_text or "outstanding" in evaluation_text: |
|
|
quality_score = 9 |
|
|
elif "good" in evaluation_text: |
|
|
quality_score = 7 |
|
|
elif "issues" in evaluation_text or "problems" in evaluation_text: |
|
|
quality_score = 5 |
|
|
|
|
|
|
|
|
updated_messages = messages + [evaluation] |
|
|
|
|
|
return { |
|
|
**state, |
|
|
"messages": updated_messages, |
|
|
"critic_assessment": evaluation.content, |
|
|
"quality_pass": quality_pass, |
|
|
"quality_score": quality_score, |
|
|
"current_step": "verification" |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Critic Agent Error: {e}") |
|
|
return { |
|
|
**state, |
|
|
"critic_assessment": f"Error during evaluation: {e}", |
|
|
"quality_pass": False, |
|
|
"quality_score": 0, |
|
|
"current_step": "verification" |
|
|
} |