MissSqui commited on
Commit
8dfc466
·
verified ·
1 Parent(s): 2d6c260

Update abc

Browse files
Files changed (1) hide show
  1. abc +95 -1
abc CHANGED
@@ -29,4 +29,98 @@ print("Cosine Similarity Matrix (rows: retrieved, columns: relevant):\n")
29
  for i, retrieved in enumerate(retrieved_chunks):
30
  for j, relevant in enumerate(relevant_chunks):
31
  score = cosine_sim_matrix[i][j].item()
32
- print(f"Similarity between:\n Retrieved: \"{retrieved}\"\n Relevant : \"{relevant}\"\n Score : {score:.4f}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  for i, retrieved in enumerate(retrieved_chunks):
30
  for j, relevant in enumerate(relevant_chunks):
31
  score = cosine_sim_matrix[i][j].item()
32
+ print(f"Similarity between:\n Retrieved: \"{retrieved}\"\n Relevant : \"{relevant}\"\n Score : {score:.4f}\n")------
33
+ -----------------------------------------
34
+
35
+
36
+ import numpy as np
37
+ from nltk.translate.bleu_score import sentence_bleu
38
+ from rouge_score import rouge_scorer
39
+ from sentence_transformers import SentenceTransformer
40
+ from sklearn.metrics.pairwise import cosine_similarity
41
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
42
+ import torch
43
+
44
+ #client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
45
+
46
+ # Load models
47
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
48
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
49
+ perplexity_model = GPT2LMHeadModel.from_pretrained("gpt2")
50
+ perplexity_model.eval()
51
+
52
+ # Evaluation Metrics
53
+ def bleu_rouge_score(reference, generated):
54
+ bleu = sentence_bleu([reference.split()], generated.split())
55
+ rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
56
+ rougeL = rouge.score(reference, generated)['rougeL'].fmeasure
57
+ return {'bleu': bleu, 'rougeL': rougeL}
58
+
59
+ def cosine_sim(reference, generated):
60
+ emb_ref = embedding_model.encode([reference])[0]
61
+ emb_gen = embedding_model.encode([generated])[0]
62
+ sim = cosine_similarity([emb_ref], [emb_gen])[0][0]
63
+ return sim
64
+
65
+ def perplexity_score(text):
66
+ inputs = tokenizer(text, return_tensors="pt")
67
+ with torch.no_grad():
68
+ outputs = perplexity_model(**inputs, labels=inputs["input_ids"])
69
+ loss = outputs.loss
70
+ return torch.exp(loss).item()
71
+
72
+ def precision_at_k(retrieved, relevant, k):
73
+ top_k = retrieved[:k]
74
+ correct = sum(1 for item in top_k if item in relevant)
75
+ return correct / k
76
+
77
+ def recall_at_k(retrieved, relevant, k):
78
+ correct = sum(1 for item in retrieved[:k] if item in relevant)
79
+ return correct / len(relevant)
80
+
81
+ def ndcg_at_k(retrieved, relevant, k):
82
+ def dcg(items):
83
+ return sum([1 / np.log2(i+2) if items[i] in relevant else 0 for i in range(len(items))])
84
+ ideal = dcg(relevant[:k])
85
+ actual = dcg(retrieved[:k])
86
+ return actual / ideal if ideal != 0 else 0
87
+
88
+ def hit_at_k(retrieved, relevant, k):
89
+ top_k = retrieved[:k]
90
+ return int(any(item in relevant for item in top_k))
91
+
92
+ # Main Evaluation
93
+ def full_evaluation(reference, generated, retrieved, relevant_chunks):
94
+ return {
95
+ **bleu_rouge_score(reference, generated),
96
+ "cosine_similarity": cosine_sim(reference, generated),
97
+ "perplexity": perplexity_score(generated),
98
+ "precision@5": precision_at_k(retrieved, relevant_chunks, 5),
99
+ "recall@5": recall_at_k(retrieved, relevant_chunks, 5),
100
+ "ndcg@5": ndcg_at_k(retrieved, relevant_chunks, 5),
101
+ "hit@5": hit_at_k(retrieved, relevant_chunks, 5)
102
+ }
103
+
104
+ # Sample Run
105
+ if __name__ == "__main__":
106
+ reference_answer = "The Eiffel Tower is located in Paris."
107
+ generated_response = "Eiffel Tower stands in Paris."
108
+
109
+ retrieved_chunks = [
110
+ "The Eiffel Tower is a landmark in Paris.",
111
+ "Paris is the capital of France.",
112
+ "The Louvre is also in Paris.",
113
+ "Eiffel Tower was built in 1889.",
114
+ "It is a famous tourist spot."
115
+ ]
116
+
117
+ relevant_chunks = [
118
+ "The Eiffel Tower is a landmark in Paris.",
119
+ "Eiffel Tower was built in 1889."
120
+ ]
121
+
122
+ scores = full_evaluation(reference_answer, generated_response, retrieved_chunks, relevant_chunks)
123
+
124
+ for metric, score in scores.items():
125
+ print(f"{metric}: {score:.4f}" if isinstance(score, float) else f"{metric}: {score}")
126
+