from datasets import load_dataset import evaluate perplexity = evaluate.load("perplexity", module_type="metric") dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") input_texts = [text for text in dataset["text"] if text.strip() != ""] results = perplexity.compute( model_id='tinycompany/ShawtyIsBad-bgem3', predictions=input_texts, batch_size=1, add_start_token=False, device="cuda" ) print("Mean Perplexity:", results["mean_perplexity"])