aloobun's picture
Create ppl.py
4db3c47 verified
from datasets import load_dataset
import evaluate
perplexity = evaluate.load("perplexity", module_type="metric")
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
input_texts = [text for text in dataset["text"] if text.strip() != ""]
results = perplexity.compute(
model_id='tinycompany/ShawtyIsBad-bgem3',
predictions=input_texts,
batch_size=1,
add_start_token=False,
device="cuda"
)
print("Mean Perplexity:", results["mean_perplexity"])