| |
|
| |
|
| | import json
|
| | from pathlib import Path
|
| | import sacrebleu
|
| | from rouge_score import rouge_scorer, scoring
|
| |
|
| |
|
| | RESULTS_FILE = "./output/eval_results.json"
|
| | assert Path(RESULTS_FILE).exists(), f"File not found: {RESULTS_FILE}"
|
| |
|
| |
|
| | with open(RESULTS_FILE, "r", encoding="utf-8") as f:
|
| | data = json.load(f)
|
| |
|
| | references = [entry["reference"] for entry in data]
|
| | predictions = [entry["prediction"] for entry in data]
|
| |
|
| |
|
| | bleu = sacrebleu.corpus_bleu(predictions, [references])
|
| | print("✅ BLEU Score:", bleu.score)
|
| |
|
| |
|
| | scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
|
| | aggregator = scoring.BootstrapAggregator()
|
| |
|
| | for pred, ref in zip(predictions, references):
|
| | scores = scorer.score(ref, pred)
|
| | aggregator.add_scores(scores)
|
| |
|
| | rouge_result = aggregator.aggregate()
|
| | print("\n✅ ROUGE Scores:")
|
| | for k, v in rouge_result.items():
|
| | print(f"{k}: P={v.mid.precision:.4f}, R={v.mid.recall:.4f}, F1={v.mid.fmeasure:.4f}")
|
| |
|
| |
|