Inframat-x commited on
Commit
4a81d08
·
verified ·
1 Parent(s): 5310e9a

Create rag_eval_metrics.py

Browse files
Files changed (1) hide show
  1. rag_eval_metrics.py +50 -0
rag_eval_metrics.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rag_eval_metrics.py
2
+ import json
3
+ import pandas as pd
4
+ from pathlib import Path
5
+ from sklearn.metrics import recall_score
6
+
7
+ LOGS_PATH = Path("rag_artifacts/logs.jsonl")
8
+ GOLD_PATH = Path("gold.csv")
9
+
10
+ def load_logs():
11
+ data = []
12
+ if not LOGS_PATH.exists():
13
+ print("❌ logs.jsonl not found in rag_artifacts/")
14
+ return pd.DataFrame()
15
+ with open(LOGS_PATH, "r", encoding="utf-8") as f:
16
+ for line in f:
17
+ data.append(json.loads(line))
18
+ return pd.DataFrame(data)
19
+
20
+ def load_gold():
21
+ if not GOLD_PATH.exists():
22
+ print("❌ gold.csv not found in repo")
23
+ return pd.DataFrame()
24
+ return pd.read_csv(GOLD_PATH)
25
+
26
+ def evaluate():
27
+ logs = load_logs()
28
+ gold = load_gold()
29
+
30
+ if logs.empty or gold.empty:
31
+ print("⚠️ Missing logs or gold — evaluation aborted.")
32
+ return
33
+
34
+ df = gold.merge(logs, on="question", how="left")
35
+
36
+ df['is_null'] = df['type'].eq("null").astype(int)
37
+ df['pred_null'] = df['answer'].fillna("").str.contains(
38
+ "not found|insufficient evidence|No indexed PDFs",
39
+ case=False
40
+ ).astype(int)
41
+
42
+ null_recall = recall_score(df['is_null'], df['pred_null'])
43
+
44
+ print("=== ✅ Evaluation Results ===")
45
+ print(f"Questions Evaluated: {len(df)}")
46
+ print(f"Null-Question Recall (hallucination control): {null_recall:.2f}")
47
+ print(f"Missing answers in logs: {(df['answer'].isna()).sum()}")
48
+
49
+ if __name__ == "__main__":
50
+ evaluate()