| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from collections import defaultdict |
| from pathlib import Path |
|
|
| import pandas as pd |
| from rouge_cli import calculate_rouge_path |
|
|
| from utils import calculate_rouge |
|
|
|
|
| PRED = [ |
| 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of the' |
| ' final seconds on board Flight 9525. The Germanwings co-pilot says he had a "previous episode of severe' |
| " depression\" German airline confirms it knew of Andreas Lubitz's depression years before he took control.", |
| "The Palestinian Authority officially becomes the 123rd member of the International Criminal Court. The formal" |
| " accession was marked with a ceremony at The Hague, in the Netherlands. The Palestinians signed the ICC's" |
| " founding Rome Statute in January. Israel and the United States opposed the Palestinians' efforts to join the" |
| " body.", |
| "Amnesty International releases its annual report on the death penalty. The report catalogs the use of" |
| " state-sanctioned killing as a punitive measure across the globe. At least 607 people were executed around the" |
| " world in 2014, compared to 778 in 2013. The U.S. remains one of the worst offenders for imposing capital" |
| " punishment.", |
| ] |
|
|
| TGT = [ |
| 'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports .' |
| ' Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz' |
| " had informed his Lufthansa training school of an episode of severe depression, airline says .", |
| "Membership gives the ICC jurisdiction over alleged crimes committed in Palestinian territories since last June ." |
| " Israel and the United States opposed the move, which could open the door to war crimes investigations against" |
| " Israelis .", |
| "Amnesty's annual death penalty report catalogs encouraging signs, but setbacks in numbers of those sentenced to" |
| " death . Organization claims that governments around the world are using the threat of terrorism to advance" |
| " executions . The number of executions worldwide has gone down by almost 22% compared with 2013, but death" |
| " sentences up by 28% .", |
| ] |
|
|
|
|
| def test_disaggregated_scores_are_determinstic(): |
| no_aggregation = calculate_rouge(PRED, TGT, bootstrap_aggregation=False, rouge_keys=["rouge2", "rougeL"]) |
| assert isinstance(no_aggregation, defaultdict) |
| no_aggregation_just_r2 = calculate_rouge(PRED, TGT, bootstrap_aggregation=False, rouge_keys=["rouge2"]) |
| assert ( |
| pd.DataFrame(no_aggregation["rouge2"]).fmeasure.mean() |
| == pd.DataFrame(no_aggregation_just_r2["rouge2"]).fmeasure.mean() |
| ) |
|
|
|
|
| def test_newline_cnn_improvement(): |
| k = "rougeLsum" |
| score = calculate_rouge(PRED, TGT, newline_sep=True, rouge_keys=[k])[k] |
| score_no_sep = calculate_rouge(PRED, TGT, newline_sep=False, rouge_keys=[k])[k] |
| assert score > score_no_sep |
|
|
|
|
| def test_newline_irrelevant_for_other_metrics(): |
| k = ["rouge1", "rouge2", "rougeL"] |
| score_sep = calculate_rouge(PRED, TGT, newline_sep=True, rouge_keys=k) |
| score_no_sep = calculate_rouge(PRED, TGT, newline_sep=False, rouge_keys=k) |
| assert score_sep == score_no_sep |
|
|
|
|
| def test_single_sent_scores_dont_depend_on_newline_sep(): |
| pred = [ |
| "Her older sister, Margot Frank, died in 1945, a month earlier than previously thought.", |
| 'Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports .', |
| ] |
| tgt = [ |
| "Margot Frank, died in 1945, a month earlier than previously thought.", |
| 'Prosecutor: "No videos were used in the crash investigation" German papers say they saw a cell phone video of' |
| " the final seconds on board Flight 9525.", |
| ] |
| assert calculate_rouge(pred, tgt, newline_sep=True) == calculate_rouge(pred, tgt, newline_sep=False) |
|
|
|
|
| def test_pegasus_newline(): |
| pred = [ |
| """" "a person who has such a video needs to immediately give it to the investigators," prosecutor says .<n> "it is a very disturbing scene," editor-in-chief of bild online tells "erin burnett: outfront" """ |
| ] |
| tgt = [ |
| """ Marseille prosecutor says "so far no videos were used in the crash investigation" despite media reports . Journalists at Bild and Paris Match are "very confident" the video clip is real, an editor says . Andreas Lubitz had informed his Lufthansa training school of an episode of severe depression, airline says .""" |
| ] |
|
|
| prev_score = calculate_rouge(pred, tgt, rouge_keys=["rougeLsum"], newline_sep=False)["rougeLsum"] |
| new_score = calculate_rouge(pred, tgt, rouge_keys=["rougeLsum"])["rougeLsum"] |
| assert new_score > prev_score |
|
|
|
|
| def test_rouge_cli(): |
| data_dir = Path("examples/seq2seq/test_data/wmt_en_ro") |
| metrics = calculate_rouge_path(data_dir.joinpath("test.source"), data_dir.joinpath("test.target")) |
| assert isinstance(metrics, dict) |
| metrics_default_dict = calculate_rouge_path( |
| data_dir.joinpath("test.source"), data_dir.joinpath("test.target"), bootstrap_aggregation=False |
| ) |
| assert isinstance(metrics_default_dict, defaultdict) |
|
|