Spaces:

facebook
/

bouquet

Running

App Files Files Community

David Dale commited on Nov 8, 2025

Commit

f7ac4d6

1 Parent(s): d970296

Add language difficulty comparison to the leaderboard

Browse files

Files changed (1) hide show

leaderboard.py +42 -0

leaderboard.py CHANGED Viewed

@@ -36,7 +36,12 @@ def leaderboard_tab():
     stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
     metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
     ALL = "ALL"
     lang_src2tgt = defaultdict(set)
     lang_tgt2src = defaultdict(set)
     langs_src = set()
@@ -98,6 +103,43 @@ def leaderboard_tab():
         gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
         gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
         gr.Markdown(LANGS_EXPLANATION)
         gr.Markdown(METRICS_EXPLANATION)
         gr.Markdown(SYSTEMS_EXPLANATION)

     stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
     metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
+    systems = sorted(set(stats["system"]))
     ALL = "ALL"
+    MEAN = "Average"
+    XX2EN = "Everything-into-English"
+    EN2XX = "English-into-Everything"
     lang_src2tgt = defaultdict(set)
     lang_tgt2src = defaultdict(set)
     langs_src = set()
         gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
         gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
+        gr.Markdown("## Languages difficulty")
+        gr_system = gr.Dropdown([MEAN] + systems, value=MEAN, label="Translation system")
+        gr_direction = gr.Dropdown([XX2EN, EN2XX], value=XX2EN, label="Translation direction")
+        gr_metric = gr.Dropdown(metrics, label="Quality metric", value="score_metricx_both")
+        bar_controls = [gr_system, gr_direction, gr_metric]
+        def get_hist(system, direction, metric):
+            # decide on the data to process
+            if direction == EN2XX:
+                direction_filter = stats['src_lang'].eq('eng_Latn')
+                lang_col = "tgt_lang"
+            else:
+                direction_filter = stats['tgt_lang'].eq('eng_Latn')
+                lang_col = "src_lang"
+            if system == MEAN:
+                system_filter = stats["system"].astype(bool)
+            else:
+                system_filter = stats['system'].eq(system)
+            subset = stats[system_filter & direction_filter]
+            # Compute the means and update the plot
+            means = subset.groupby(lang_col)[metric].mean().sort_values(
+                ascending=(metric=="score_metricx_both")
+            )
+            means = means.to_frame().reset_index()
+            return gr.update(
+                value=means, x=lang_col, y=metric, caption=metric, x_label_angle=-90,
+                height=300,
+                sort="y",
+            )
+        default_bar = get_hist(gr_system.value, gr_direction.value, gr_metric.value)
+        gr_barplot = gr.BarPlot(**default_bar)
+        for inp in bar_controls:
+            inp.change(fn=get_hist, inputs=bar_controls, outputs=gr_barplot)
         gr.Markdown(LANGS_EXPLANATION)
         gr.Markdown(METRICS_EXPLANATION)
         gr.Markdown(SYSTEMS_EXPLANATION)