# app/graph/nodes/evaluator.py
from app.core.llm_engine import eval_llm
from app.core.prompts.evaluator_prompt import evaluator_prompt
from langchain_core.output_parsers import StrOutputParser
import json, re
chain = evaluator_prompt | eval_llm | StrOutputParser()
def _extract_json(text: str) -> dict:
"""Robustly extract JSON from LLM response, handling thinking blocks."""
# ✅ Strip Gemini thinking/reasoning blocks
text = re.sub(r".*?", "", text, flags=re.DOTALL)
text = re.sub(r".*?", "", text, flags=re.DOTALL)
# ✅ Strip markdown code fences
text = re.sub(r"```(?:json)?", "", text)
text = text.strip()
# ✅ Greedy match — finds outermost { ... } correctly
# [^{}]* fails on any nested structure, use .* with DOTALL instead
match = re.search(r"\{.*\}", text, re.DOTALL)
if not match:
raise ValueError(f"No JSON found. Raw: {text[:300]}")
raw_json = match.group(0).strip()
return json.loads(raw_json)
def _fallback_evaluation():
"""Explicit fallback — always returns a valid dict."""
return {
"relevance_score": 0.5,
"context_usage": 0.5,
"hallucination": True,
"route": "rag"
}
def evaluator_node(state):
query = state.get("query")
answer = state.get("final_answer")
context = state.get("context", "")
route = state.get("route", "general")
# ✅ Don't evaluate general answers against RAG context — they'll always score 0
if route == "general" or not context:
return {
**state,
"evaluation": {
"relevance_score": 1.0,
"context_usage": None, # N/A for general
"hallucination": False,
"route": "general"
}
}
try:
raw_response = chain.invoke({
"query": query,
"answer": answer,
"context": context[:600]
}).strip()
print(f"EVALUATOR RAW → {raw_response[:300]}") # ✅ log first 200 chars to debug
parsed= _extract_json(raw_response)
evaluation = {
"relevance_score": round(min(max(float(parsed.get("relevance_score", 0)), 0), 1), 3),
"context_usage": round(min(max(float(parsed.get("context_usage", 0)), 0), 1), 3),
"hallucination": bool(parsed.get("hallucination", True)),
"route": "rag"
}
print(f"EVALUATOR SUCCESS → {evaluation}")
# ✅ return is INSIDE try — only reached if no exception above
return {**state, "evaluation": evaluation}
except Exception as e:
print("EVALUATOR ERROR →", e)
# ✅ return is INSIDE except — evaluation variable always defined
return {**state, "evaluation": _fallback_evaluation()}