SharvNey commited on
Commit
be5a976
·
verified ·
1 Parent(s): 1dd3a75

Create evaluator.py

Browse files
Files changed (1) hide show
  1. evaluator.py +42 -0
evaluator.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from backend import run_llm
4
+
5
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
6
+
7
+ def similarity_score(output: str, reference: str) -> float:
8
+ if not reference.strip():
9
+ return 0.0
10
+ e1 = embed_model.encode([output])
11
+ e2 = embed_model.encode([reference])
12
+ return float(cosine_similarity(e1, e2)[0][0])
13
+
14
+ def conciseness_score(text: str) -> float:
15
+ words = len(text.split())
16
+ return max(0.0, 1 - (words / 300))
17
+
18
+ def llm_judge_score(output: str, goal="overall quality") -> float:
19
+ try:
20
+ judge_prompt = f"""
21
+ Score the following answer from 1 to 10 based on {goal}.
22
+ Only return a single number.
23
+
24
+ Answer:
25
+ {output}
26
+ """
27
+ score = run_llm(judge_prompt)
28
+ return float(score.strip())
29
+ except:
30
+ return 5.0 # safe fallback
31
+
32
+ def final_score(output: str, reference: str = "") -> float:
33
+ llm_score = llm_judge_score(output) / 10
34
+ conc = conciseness_score(output)
35
+
36
+ if reference.strip():
37
+ sim = similarity_score(output, reference)
38
+ score = 0.5 * llm_score + 0.3 * sim + 0.2 * conc
39
+ else:
40
+ score = 0.7 * llm_score + 0.3 * conc
41
+
42
+ return round(score, 3)