File size: 4,083 Bytes
fe36046 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
"""Critic Agent - Evaluates and reviews responses for quality and accuracy"""
from typing import Dict, Any
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_groq import ChatGroq
from src.tracing import get_langfuse_callback_handler
def load_critic_prompt() -> str:
"""Load the critic prompt from file"""
try:
with open("./prompts/critic_prompt.txt", "r", encoding="utf-8") as f:
return f.read().strip()
except FileNotFoundError:
return """You are a specialized critic agent. Evaluate responses for accuracy, completeness, and quality."""
def critic_agent(state: Dict[str, Any]) -> Dict[str, Any]:
"""
Critic agent that evaluates responses for quality and accuracy
"""
print("Critic Agent: Evaluating response quality")
try:
# Get critic prompt
critic_prompt = load_critic_prompt()
# Initialize LLM for criticism
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.2)
# Get callback handler for tracing
callback_handler = get_langfuse_callback_handler()
callbacks = [callback_handler] if callback_handler else []
# Build messages
messages = state.get("messages", [])
# Get the agent response to evaluate
agent_response = state.get("agent_response")
if not agent_response:
# Find the last AI message
for msg in reversed(messages):
if msg.type == "ai":
agent_response = msg
break
if not agent_response:
print("Critic Agent: No response to evaluate")
return {
**state,
"critic_assessment": "No response found to evaluate",
"quality_score": 0,
"current_step": "verification"
}
# Get user query for context
user_query = None
for msg in reversed(messages):
if msg.type == "human":
user_query = msg.content
break
# Build critic messages
critic_messages = [SystemMessage(content=critic_prompt)]
# Add evaluation request
evaluation_request = f"""
Please evaluate the following response:
Original Query: {user_query or "Unknown query"}
Response to Evaluate:
{agent_response.content}
Provide your evaluation following the format specified in your instructions.
"""
critic_messages.append(HumanMessage(content=evaluation_request))
# Get critic evaluation
evaluation = llm.invoke(critic_messages, config={"callbacks": callbacks})
# Parse evaluation to determine if it passes
evaluation_text = evaluation.content.lower()
quality_pass = True
quality_score = 7 # Default moderate score
# Simple heuristics for quality assessment
if "fail" in evaluation_text or "poor" in evaluation_text:
quality_pass = False
quality_score = 3
elif "excellent" in evaluation_text or "outstanding" in evaluation_text:
quality_score = 9
elif "good" in evaluation_text:
quality_score = 7
elif "issues" in evaluation_text or "problems" in evaluation_text:
quality_score = 5
# Add critic evaluation to messages
updated_messages = messages + [evaluation]
return {
**state,
"messages": updated_messages,
"critic_assessment": evaluation.content,
"quality_pass": quality_pass,
"quality_score": quality_score,
"current_step": "verification"
}
except Exception as e:
print(f"Critic Agent Error: {e}")
return {
**state,
"critic_assessment": f"Error during evaluation: {e}",
"quality_pass": False,
"quality_score": 0,
"current_step": "verification"
} |