Upload metrics.py with huggingface_hub
Browse files- metrics.py +26 -10
metrics.py
CHANGED
|
@@ -4,6 +4,7 @@ import uuid
|
|
| 4 |
from abc import ABC, abstractmethod
|
| 5 |
from collections import Counter
|
| 6 |
from dataclasses import field
|
|
|
|
| 7 |
from typing import Any, Dict, Generator, List, Optional, Tuple
|
| 8 |
|
| 9 |
import evaluate
|
|
@@ -1329,14 +1330,13 @@ class Perplexity(BulkInstanceMetric):
|
|
| 1329 |
|
| 1330 |
:return: the likelihood of generating text Y_i after text X_i = P(Y_i|X_i) for every i.
|
| 1331 |
"""
|
| 1332 |
-
|
| 1333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1334 |
|
| 1335 |
-
# add the instruction as prefix
|
| 1336 |
-
predictions = [f"{self.perplexity_prompt} {x}" for x in predictions]
|
| 1337 |
-
references = [y[0] for y in references]
|
| 1338 |
-
|
| 1339 |
-
# check if the model is enc-dec or dec-only to use the right perplexity computation
|
| 1340 |
from transformers import AutoConfig
|
| 1341 |
|
| 1342 |
config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=True)
|
|
@@ -1348,10 +1348,24 @@ class Perplexity(BulkInstanceMetric):
|
|
| 1348 |
|
| 1349 |
# compute P(Q|P) and store in queue
|
| 1350 |
scores = lm.compute_lm(
|
| 1351 |
-
source=
|
| 1352 |
)
|
| 1353 |
|
| 1354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1355 |
|
| 1356 |
class AbstractLM(ABC):
|
| 1357 |
def __init__(self, model_name):
|
|
@@ -1363,7 +1377,9 @@ class Perplexity(BulkInstanceMetric):
|
|
| 1363 |
self.model = self.model_class().from_pretrained(self.model_name)
|
| 1364 |
self.is_cuda = torch.cuda.is_available()
|
| 1365 |
|
| 1366 |
-
def compute_lm(
|
|
|
|
|
|
|
| 1367 |
import torch
|
| 1368 |
|
| 1369 |
scores = []
|
|
|
|
| 4 |
from abc import ABC, abstractmethod
|
| 5 |
from collections import Counter
|
| 6 |
from dataclasses import field
|
| 7 |
+
from statistics import mean
|
| 8 |
from typing import Any, Dict, Generator, List, Optional, Tuple
|
| 9 |
|
| 10 |
import evaluate
|
|
|
|
| 1330 |
|
| 1331 |
:return: the likelihood of generating text Y_i after text X_i = P(Y_i|X_i) for every i.
|
| 1332 |
"""
|
| 1333 |
+
sources = []
|
| 1334 |
+
targets = []
|
| 1335 |
+
for prediction, instance_references in zip(predictions, references):
|
| 1336 |
+
for instance_reference in instance_references:
|
| 1337 |
+
sources.append(f"{self.perplexity_prompt} {prediction}")
|
| 1338 |
+
targets.append(instance_reference)
|
| 1339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1340 |
from transformers import AutoConfig
|
| 1341 |
|
| 1342 |
config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=True)
|
|
|
|
| 1348 |
|
| 1349 |
# compute P(Q|P) and store in queue
|
| 1350 |
scores = lm.compute_lm(
|
| 1351 |
+
source=sources, target=targets, batch_size=self.batch_size
|
| 1352 |
)
|
| 1353 |
|
| 1354 |
+
index = 0
|
| 1355 |
+
all_instances_scores = []
|
| 1356 |
+
for instance_references in references:
|
| 1357 |
+
instance_scores = {}
|
| 1358 |
+
instance_scores_list = []
|
| 1359 |
+
for _ in range(len(instance_references)):
|
| 1360 |
+
instance_scores_list.append(scores[index])
|
| 1361 |
+
index += 1
|
| 1362 |
+
instance_scores["reference_scores"] = instance_scores_list
|
| 1363 |
+
instance_scores[self.main_score] = mean(instance_scores_list)
|
| 1364 |
+
|
| 1365 |
+
instance_scores[self.main_score] = mean(instance_scores_list)
|
| 1366 |
+
all_instances_scores.append(instance_scores)
|
| 1367 |
+
|
| 1368 |
+
return all_instances_scores
|
| 1369 |
|
| 1370 |
class AbstractLM(ABC):
|
| 1371 |
def __init__(self, model_name):
|
|
|
|
| 1377 |
self.model = self.model_class().from_pretrained(self.model_name)
|
| 1378 |
self.is_cuda = torch.cuda.is_available()
|
| 1379 |
|
| 1380 |
+
def compute_lm(
|
| 1381 |
+
self, source: List[str], target: List[str], batch_size: int
|
| 1382 |
+
) -> List[float]:
|
| 1383 |
import torch
|
| 1384 |
|
| 1385 |
scores = []
|