Spaces:
Running
Running
| import ast | |
| import statistics | |
| import sacrebleu | |
| from rouge_score import rouge_scorer | |
| def sentence_bleu(references, predictions): | |
| scores = [] | |
| for reference, prediction in zip(references, predictions): | |
| scores.append( | |
| sacrebleu.sentence_bleu( | |
| prediction, | |
| reference if isinstance(reference, list) else [reference], | |
| smooth_method="exp", | |
| smooth_value=0.0, | |
| lowercase=True, | |
| tokenize="intl", | |
| ).score | |
| ) | |
| return statistics.mean(scores) | |
| def corpus_bleu(references, predictions): | |
| return sacrebleu.corpus_bleu( | |
| predictions, | |
| references, | |
| smooth_method="exp", | |
| smooth_value=0.0, | |
| lowercase=True, | |
| tokenize="intl", | |
| ).score | |
| def rouge(scope, references, predictions): | |
| rouge = f"rouge{scope}" | |
| scorer = rouge_scorer.RougeScorer([rouge], use_stemmer=True) | |
| references = [r if isinstance(r,list) else [r] for r in references] | |
| scores_dict = {"precision": 0, "recall": 0, "f1": 0} | |
| for refs, prediction in zip(references, predictions): | |
| best_f1 = 0 | |
| for ref in refs: | |
| scores = scorer.score(ref, prediction) | |
| if scores[rouge].fmeasure > best_f1: | |
| best_f1 = scores[rouge].fmeasure | |
| scores_dict["precision"] += scores[rouge].precision | |
| scores_dict["recall"] += scores[rouge].recall | |
| scores_dict["f1"] += scores[rouge].fmeasure | |
| return {k: v / len(references) for k, v in scores_dict.items()} | |
| def parse_string(string): | |
| try: | |
| return list(ast.literal_eval(string)) | |
| except (ValueError, SyntaxError) as e: | |
| print(f"Error parsing string: {e}") | |
| return None | |