Maaac
/

CodeLLaMA-Linux-BugFix

Text Generation

Eval Results (legacy)

Model card Files Files and versions

CodeLLaMA-Linux-BugFix / evaluate /compute_metrics.py

Mac

Push evaluation results and update readme

5de6ff4 8 months ago

1.07 kB

	# compute_metrics.py

	import json
	from pathlib import Path
	import sacrebleu
	from rouge_score import rouge_scorer, scoring

	# === Config ===
	RESULTS_FILE = "./output/eval_results.json"
	assert Path(RESULTS_FILE).exists(), f"File not found: {RESULTS_FILE}"

	# === Load data ===
	with open(RESULTS_FILE, "r", encoding="utf-8") as f:
	data = json.load(f)

	references = [entry["reference"] for entry in data]
	predictions = [entry["prediction"] for entry in data]

	# === Compute BLEU ===
	bleu = sacrebleu.corpus_bleu(predictions, [references])
	print("✅ BLEU Score:", bleu.score)

	# === Compute ROUGE ===
	scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
	aggregator = scoring.BootstrapAggregator()

	for pred, ref in zip(predictions, references):
	scores = scorer.score(ref, pred)
	aggregator.add_scores(scores)

	rouge_result = aggregator.aggregate()
	print("\n✅ ROUGE Scores:")
	for k, v in rouge_result.items():
	print(f"{k}: P={v.mid.precision:.4f}, R={v.mid.recall:.4f}, F1={v.mid.fmeasure:.4f}")