pluto90 commited on
Commit
7a24d7f
Β·
verified Β·
1 Parent(s): 082f3f8

Update app/graph/nodes/evaluator.py

Browse files
Files changed (1) hide show
  1. app/graph/nodes/evaluator.py +92 -92
app/graph/nodes/evaluator.py CHANGED
@@ -1,92 +1,92 @@
1
- # app/graph/nodes/evaluator.py
2
-
3
- from app.core.llm_engine import eval_llm
4
- from app.core.prompts.evaluator_prompt import evaluator_prompt
5
- from langchain_core.output_parsers import StrOutputParser
6
- import json, re
7
-
8
- chain = evaluator_prompt | eval_llm | StrOutputParser()
9
-
10
-
11
- def _extract_json(text: str) -> dict:
12
- """Robustly extract JSON from LLM response, handling thinking blocks."""
13
-
14
- # βœ… Strip Gemini thinking/reasoning blocks
15
- text = re.sub(r"<thinking>.*?</thinking>", "", text, flags=re.DOTALL)
16
- text = re.sub(r"<thought>.*?</thought>", "", text, flags=re.DOTALL)
17
-
18
- # βœ… Strip markdown code fences
19
- text = re.sub(r"```(?:json)?", "", text)
20
- text = text.strip()
21
-
22
- # βœ… Greedy match β€” finds outermost { ... } correctly
23
- # [^{}]* fails on any nested structure, use .* with DOTALL instead
24
- match = re.search(r"\{.*\}", text, re.DOTALL)
25
- if not match:
26
- raise ValueError(f"No JSON found. Raw: {text[:300]}")
27
-
28
- raw_json = match.group(0).strip()
29
- return json.loads(raw_json)
30
-
31
-
32
- def _fallback_evaluation():
33
- """Explicit fallback β€” always returns a valid dict."""
34
- return {
35
- "relevance_score": 0.5,
36
- "context_usage": 0.5,
37
- "hallucination": True,
38
- "route": "rag"
39
- }
40
-
41
-
42
-
43
- def evaluator_node(state):
44
- query = state.get("query")
45
- answer = state.get("final_answer")
46
- context = state.get("context", "")
47
- route = state.get("route", "general")
48
-
49
- # βœ… Don't evaluate general answers against RAG context β€” they'll always score 0
50
- if route == "general" or not context:
51
- return {
52
- **state,
53
- "evaluation": {
54
- "relevance_score": 1.0,
55
- "context_usage": None, # N/A for general
56
- "hallucination": False,
57
- "route": "general"
58
- }
59
- }
60
-
61
- try:
62
- raw_response = chain.invoke({
63
- "query": query,
64
- "answer": answer,
65
- "context": context[:600]
66
- }).strip()
67
-
68
- print(f"EVALUATOR RAW β†’ {raw_response[:300]}") # βœ… log first 200 chars to debug
69
-
70
- parsed= _extract_json(raw_response)
71
-
72
- evaluation = {
73
- "relevance_score": round(min(max(float(parsed.get("relevance_score", 0)), 0), 1), 3),
74
- "context_usage": round(min(max(float(parsed.get("context_usage", 0)), 0), 1), 3),
75
- "hallucination": bool(parsed.get("hallucination", True)),
76
- "route": "rag"
77
- }
78
-
79
- print(f"EVALUATOR SUCCESS β†’ {evaluation}")
80
-
81
- # βœ… return is INSIDE try β€” only reached if no exception above
82
- return {**state, "evaluation": evaluation}
83
-
84
-
85
-
86
- except Exception as e:
87
- print("EVALUATOR ERROR β†’", e)
88
-
89
- # βœ… return is INSIDE except β€” evaluation variable always defined
90
- return {**state, "evaluation": _fallback_evaluation()}
91
-
92
-
 
1
+ # app/graph/nodes/evaluator.py
2
+
3
+ from app.core.llm_engine import eval_llm
4
+ from app.core.prompts.evaluator_prompt import evaluator_prompt
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ import json, re
7
+
8
+ chain = evaluator_prompt | eval_llm | StrOutputParser()
9
+
10
+
11
+ def _extract_json(text: str) -> dict:
12
+ """Robustly extract JSON from LLM response, handling thinking blocks."""
13
+
14
+ # βœ… Strip Gemini thinking/reasoning blocks
15
+ text = re.sub(r"<thinking>.*?</thinking>", "", text, flags=re.DOTALL)
16
+ text = re.sub(r"<thought>.*?</thought>", "", text, flags=re.DOTALL)
17
+
18
+ # βœ… Strip markdown code fences
19
+ text = re.sub(r"```(?:json)?", "", text)
20
+ text = text.strip()
21
+
22
+ # βœ… Greedy match β€” finds outermost { ... } correctly
23
+ # [^{}]* fails on any nested structure, use .* with DOTALL instead
24
+ match = re.search(r"\{.*\}", text, re.DOTALL)
25
+ if not match:
26
+ raise ValueError(f"No JSON found. Raw: {text[:300]}")
27
+
28
+ # raw_json = match.group(0).strip()
29
+ return json.loads(raw_json)
30
+
31
+
32
+ def _fallback_evaluation(route="rag"):
33
+ """Explicit fallback β€” always returns a valid dict."""
34
+ return {
35
+ "relevance_score": 0.5,
36
+ "context_usage": 0.5,
37
+ "hallucination": True,
38
+ "route": route
39
+ }
40
+
41
+
42
+
43
+ def evaluator_node(state):
44
+ query = state.get("query")
45
+ answer = state.get("final_answer")
46
+ context = state.get("context", "")
47
+ route = state.get("route", "general")
48
+
49
+ # βœ… Don't evaluate general answers against RAG context β€” they'll always score 0
50
+ if route == "general" or not context:
51
+ return {
52
+ **state,
53
+ "evaluation": {
54
+ "relevance_score": 1.0,
55
+ "context_usage": None, # N/A for general
56
+ "hallucination": False,
57
+ "route": "general"
58
+ }
59
+ }
60
+
61
+ try:
62
+ raw_response = chain.invoke({
63
+ "query": query,
64
+ "answer": answer,
65
+ "context": context[:600]
66
+ }).strip()
67
+
68
+ print(f"EVALUATOR RAW β†’ {raw_response[:200]}") # βœ… log first 200 chars to debug
69
+
70
+ parsed= _extract_json(raw_response)
71
+
72
+ evaluation = {
73
+ "relevance_score": round(min(max(float(parsed.get("relevance_score", 0)), 0), 1), 3),
74
+ "context_usage": round(min(max(float(parsed.get("context_usage", 0)), 0), 1), 3),
75
+ "hallucination": bool(parsed.get("hallucination", True)),
76
+ "route": route
77
+ }
78
+
79
+ print(f"EVALUATOR SUCCESS β†’ {evaluation}")
80
+
81
+ # βœ… return is INSIDE try β€” only reached if no exception above
82
+ return {**state, "evaluation": evaluation}
83
+
84
+
85
+
86
+ except Exception as e:
87
+ print("EVALUATOR ERROR β†’", e)
88
+
89
+ # βœ… return is INSIDE except β€” evaluation variable always defined
90
+ return {**state, "evaluation": _fallback_evaluation()}
91
+
92
+