Spaces:
Running
Running
David Dale commited on
Commit ·
f7ac4d6
1
Parent(s): d970296
Add language difficulty comparison to the leaderboard
Browse files- leaderboard.py +42 -0
leaderboard.py
CHANGED
|
@@ -36,7 +36,12 @@ def leaderboard_tab():
|
|
| 36 |
stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
|
| 37 |
|
| 38 |
metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
|
|
|
|
| 39 |
ALL = "ALL"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
lang_src2tgt = defaultdict(set)
|
| 41 |
lang_tgt2src = defaultdict(set)
|
| 42 |
langs_src = set()
|
|
@@ -98,6 +103,43 @@ def leaderboard_tab():
|
|
| 98 |
gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
|
| 99 |
gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
gr.Markdown(LANGS_EXPLANATION)
|
| 102 |
gr.Markdown(METRICS_EXPLANATION)
|
| 103 |
gr.Markdown(SYSTEMS_EXPLANATION)
|
|
|
|
| 36 |
stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
|
| 37 |
|
| 38 |
metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
|
| 39 |
+
systems = sorted(set(stats["system"]))
|
| 40 |
ALL = "ALL"
|
| 41 |
+
MEAN = "Average"
|
| 42 |
+
XX2EN = "Everything-into-English"
|
| 43 |
+
EN2XX = "English-into-Everything"
|
| 44 |
+
|
| 45 |
lang_src2tgt = defaultdict(set)
|
| 46 |
lang_tgt2src = defaultdict(set)
|
| 47 |
langs_src = set()
|
|
|
|
| 103 |
gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
|
| 104 |
gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
|
| 105 |
|
| 106 |
+
gr.Markdown("## Languages difficulty")
|
| 107 |
+
gr_system = gr.Dropdown([MEAN] + systems, value=MEAN, label="Translation system")
|
| 108 |
+
gr_direction = gr.Dropdown([XX2EN, EN2XX], value=XX2EN, label="Translation direction")
|
| 109 |
+
gr_metric = gr.Dropdown(metrics, label="Quality metric", value="score_metricx_both")
|
| 110 |
+
bar_controls = [gr_system, gr_direction, gr_metric]
|
| 111 |
+
|
| 112 |
+
def get_hist(system, direction, metric):
|
| 113 |
+
# decide on the data to process
|
| 114 |
+
if direction == EN2XX:
|
| 115 |
+
direction_filter = stats['src_lang'].eq('eng_Latn')
|
| 116 |
+
lang_col = "tgt_lang"
|
| 117 |
+
else:
|
| 118 |
+
direction_filter = stats['tgt_lang'].eq('eng_Latn')
|
| 119 |
+
lang_col = "src_lang"
|
| 120 |
+
if system == MEAN:
|
| 121 |
+
system_filter = stats["system"].astype(bool)
|
| 122 |
+
else:
|
| 123 |
+
system_filter = stats['system'].eq(system)
|
| 124 |
+
subset = stats[system_filter & direction_filter]
|
| 125 |
+
|
| 126 |
+
# Compute the means and update the plot
|
| 127 |
+
means = subset.groupby(lang_col)[metric].mean().sort_values(
|
| 128 |
+
ascending=(metric=="score_metricx_both")
|
| 129 |
+
)
|
| 130 |
+
means = means.to_frame().reset_index()
|
| 131 |
+
return gr.update(
|
| 132 |
+
value=means, x=lang_col, y=metric, caption=metric, x_label_angle=-90,
|
| 133 |
+
height=300,
|
| 134 |
+
sort="y",
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
default_bar = get_hist(gr_system.value, gr_direction.value, gr_metric.value)
|
| 138 |
+
gr_barplot = gr.BarPlot(**default_bar)
|
| 139 |
+
|
| 140 |
+
for inp in bar_controls:
|
| 141 |
+
inp.change(fn=get_hist, inputs=bar_controls, outputs=gr_barplot)
|
| 142 |
+
|
| 143 |
gr.Markdown(LANGS_EXPLANATION)
|
| 144 |
gr.Markdown(METRICS_EXPLANATION)
|
| 145 |
gr.Markdown(SYSTEMS_EXPLANATION)
|