elly99 commited on
Commit
4a1962b
·
verified ·
1 Parent(s): 9397062

Create sematic_evaluation.py

Browse files
src/evaluation/sematic_evaluation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # © 2025 Elena Marziali — Code released under Apache 2.0 license.
2
+ # See LICENSE in the repository for details.
3
+ # Removal of this copyright is prohibited.
4
+
5
+ # Load the model only once
6
+ cross_encoder = CrossEncoder("cross-encoder/nli-deberta-base")
7
+
8
+ def evaluate_coherence(question, answer):
9
+ score = cross_encoder.predict([(question, answer)])
10
+ try:
11
+ logit = float(score[0]) if isinstance(score[0], (int, float, np.floating)) else float(score[0][0])
12
+ probability = 1 / (1 + math.exp(-logit)) # Sigmoid function
13
+ return round(probability, 3)
14
+ except Exception:
15
+ return 0.0
16
+
17
+ # === Scientific reliability score calculation ===
18
+ def calculate_impact_score(citations, h_index, peer_review, publication_year):
19
+ score = (citations * 0.4) + (h_index * 0.3) + (peer_review * 0.2) - (2025 - publication_year) * 0.1
20
+ return max(0, score) # Ensure non-negative
21
+
22
+ def check_topic_relevance(user_question, extracted_text, threshold=0.7):
23
+ """Checks whether the topic of the question is consistent with the uploaded file content."""
24
+ emb_question = embedding_model.encode([user_question])
25
+ emb_text = embedding_model.encode([extracted_text])
26
+
27
+ similarity = np.dot(emb_question, emb_text.T) / (np.linalg.norm(emb_question) * np.linalg.norm(emb_text))
28
+ return round(similarity, 3), similarity >= threshold
29
+
30
+ def calculate_response_score(question, answer):
31
+ score = cross_encoder.predict([(question, answer)])
32
+ return float(score[0])
33
+
34
+ def regenerate_if_low_score(question, answer, level, threshold=0.7, iterations=2):
35
+ evaluation = evaluate_responses_with_ai(question, answer, level)
36
+ if evaluation["semantic_score"] < threshold:
37
+ new_question = reformulate_question(question)
38
+ for i in range(iterations):
39
+ new_answer = generate_response(new_question, temperature=0.7)
40
+ new_evaluation = evaluate_responses_with_ai(new_question, new_answer, level)
41
+ if new_evaluation["semantic_score"] >= threshold:
42
+ return new_answer
43
+ return answer
44
+
45
+ def select_best_version(question, answers):
46
+ scored = [(r, calculate_response_score(question, r)) for r in answers]
47
+ scored.sort(key=lambda x: x[1], reverse=True)
48
+ return scored[0] # (answer, score)