File size: 1,372 Bytes
cb301d1 d17d151 cb301d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import json
import evaluate
from tqdm import tqdm
from transformers import pipeline
# Load model pipeline
pipe = pipeline("text2text-generation", model = "crossroderick/dalat5", tokenizer = "crossroderick/dalat5")
# Load metrics
bleu = evaluate.load("bleu")
chrf = evaluate.load("chrf")
# Load JSONL dataset
dataset_path = "src/data/clean_corpus.jsonl"
examples = []
with open(dataset_path, "r", encoding="utf-8") as f:
for line in f:
obj = json.loads(line)
if "transliteration" in obj and "src" in obj["transliteration"] and "tgt" in obj["transliteration"]:
examples.append((obj["transliteration"]["src"], obj["transliteration"]["tgt"]))
# Run predictions
predictions = []
references = []
print(f"Evaluating on {len(examples)} examples...\n")
for src, tgt in tqdm(examples):
input_prompt = f"Cyrillic2Latin: {src}"
output = pipe(input_prompt, max_length = 128, do_sample = False)[0]["generated_text"]
predictions.append(output.strip())
references.append([tgt.strip()]) # wrap in list for BLEU
# Evaluate
bleu_result = bleu.compute(predictions = predictions, references = references)
chrf_result = chrf.compute(predictions = predictions, references = references)
# Print results
print("\nEvaluation results:")
print(f"BLEU Score: {bleu_result['bleu']:.2f}")
print(f"chrF Score: {chrf_result['score']:.2f}")
|