Spaces:

HF-Pawan
/

LangGraph-Agent

Sleeping

LangGraph-Agent / app /nodes /evaluation.py

Pawan Mane

Memory Poisoning Error Fixising

1d6b948 about 1 month ago

2.7 kB

	"""
	app/nodes/evaluation.py — CHECKPOINT 7: Evaluation

	Clean, simple quality scoring with no hardcoded safety logic.
	Safety is handled entirely by safety_node (before the graph runs)
	and guardrails_node (after the response is generated).

	Auto-pass cases:
	- tool route: tool outputs are always valid, not prose
	- general route: subjective responses (jokes, opinions) can't be scored fairly
	"""
	from langchain_core.messages import HumanMessage
	from app.state import AgentState
	from app.utils.llm import llm
	from app.config import settings


	def evaluation_node(state: AgentState) -> AgentState:
	log = state.get("node_log", [])
	response = state.get("response", "")
	route = state.get("route", "")

	# Tool outputs are structured data, not prose — always pass
	if route == "tool" or state.get("tool_results"):
	print("[EVAL] Tool response — auto-passed.")
	return {**state, "evaluation_score": 1.0,
	"node_log": log + ["evaluation (tool auto-pass ✅)"]}

	# General/casual — subjective, not scoreable fairly
	if route == "general":
	print("[EVAL] General route — auto-passed.")
	return {**state, "evaluation_score": 1.0,
	"node_log": log + ["evaluation (general auto-pass ✅)"]}

	# RAG responses — score relevance and quality
	eval_prompt = f"""Rate this AI response for relevance and quality on a scale of 0.0 to 1.0.
	Return ONLY a float between 0.0 and 1.0 — nothing else.

	Query: {state['query']}
	Response: {response}

	Score:"""

	try:
	raw = llm.invoke([HumanMessage(content=eval_prompt)]).content.strip()
	score = max(0.0, min(1.0, float(raw)))
	except Exception:
	score = 0.8 # safe fallback

	current_retries = state.get("retry_count", 0)
	below = score < settings.EVAL_THRESHOLD
	new_retries = (current_retries + 1) if below else current_retries

	print(f"[EVAL] Score: {score:.2f} (threshold: {settings.EVAL_THRESHOLD}, retries: {current_retries})")
	return {
	**state,
	"evaluation_score": score,
	"retry_count": new_retries,
	"node_log": log + [f"evaluation (score={score:.2f}, retry={new_retries})"],
	}


	def eval_route(state: AgentState) -> str:
	score = state["evaluation_score"]
	retry_count = state.get("retry_count", 0)
	if score < settings.EVAL_THRESHOLD and retry_count <= settings.MAX_RETRIES:
	print(f"[EVAL] Score {score:.2f} below threshold — retry {retry_count}/{settings.MAX_RETRIES}")
	return "retry"
	if score < settings.EVAL_THRESHOLD:
	print(f"[EVAL] Max retries reached — proceeding.")
	return "guardrails"