Rodrigo Ferreira Rodrigues commited on
Commit
440ceb6
·
1 Parent(s): 6cd9340

adding bert score

Browse files
Files changed (2) hide show
  1. mcq_eval.py +6 -8
  2. requirements.txt +6 -1
mcq_eval.py CHANGED
@@ -15,9 +15,7 @@
15
 
16
  import evaluate
17
  import datasets
18
-
19
- bleu = evaluate.load('bleu')
20
- #bert_score = evaluate.load('bertscore')
21
 
22
 
23
  # TODO: Add BibTeX citation
@@ -50,7 +48,7 @@ Examples:
50
  Here is an exemple on how to use the metric:
51
 
52
  >>> metric = evaluate.load("rfr2003/MQC_eval")
53
- >>> results = metric.compute(references=["A", "B"], predictions=["A", "D"])
54
  >>> print(results)
55
  {'accuracy': 0.5, 'bleu-1': 0.5}
56
  """
@@ -82,8 +80,8 @@ class MCQ_eval(evaluate.Metric):
82
 
83
  def _download_and_prepare(self, dl_manager):
84
  """Optional: download external resources useful to compute the scores"""
85
- # TODO: Download external resources if needed
86
- pass
87
 
88
  def _compute(self, generations, golds):
89
  """Returns the scores"""
@@ -107,10 +105,10 @@ class MCQ_eval(evaluate.Metric):
107
  references.append(gold)
108
 
109
  metrics = {}
110
- #metrics = {f"bert_score_{k}":np.mean(v).item() for k,v in bert_score.compute(predictions=predictions, references=references, lang="en").items() if k in ['recall', 'precision', 'f1']}
111
  metrics.update({
112
  'accuracy': correct/total,
113
- 'bleu-1': bleu.compute(predictions=predictions, references=references, max_order=1)['bleu']
114
  })
115
 
116
  return metrics
 
15
 
16
  import evaluate
17
  import datasets
18
+ import numpy as np
 
 
19
 
20
 
21
  # TODO: Add BibTeX citation
 
48
  Here is an exemple on how to use the metric:
49
 
50
  >>> metric = evaluate.load("rfr2003/MQC_eval")
51
+ >>> results = metric.compute(generations=["A", "B"], golds=["A", "D"])
52
  >>> print(results)
53
  {'accuracy': 0.5, 'bleu-1': 0.5}
54
  """
 
80
 
81
  def _download_and_prepare(self, dl_manager):
82
  """Optional: download external resources useful to compute the scores"""
83
+ self.bleu = evaluate.load('bleu')
84
+ self.bert_score = evaluate.load('bertscore')
85
 
86
  def _compute(self, generations, golds):
87
  """Returns the scores"""
 
105
  references.append(gold)
106
 
107
  metrics = {}
108
+ metrics = {f"bert_score_{k}": np.mean(v).item() for k,v in self.bert_score.compute(predictions=predictions, references=references, lang="en").items() if k in ['recall', 'precision', 'f1']}
109
  metrics.update({
110
  'accuracy': correct/total,
111
+ 'bleu-1': self.bleu.compute(predictions=predictions, references=references, max_order=1)['bleu']
112
  })
113
 
114
  return metrics
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ transformers
3
+ torch
4
+ datasets
5
+ numpy
6
+ bert_score