David Dale commited on
Commit
f7ac4d6
·
1 Parent(s): d970296

Add language difficulty comparison to the leaderboard

Browse files
Files changed (1) hide show
  1. leaderboard.py +42 -0
leaderboard.py CHANGED
@@ -36,7 +36,12 @@ def leaderboard_tab():
36
  stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
37
 
38
  metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
 
39
  ALL = "ALL"
 
 
 
 
40
  lang_src2tgt = defaultdict(set)
41
  lang_tgt2src = defaultdict(set)
42
  langs_src = set()
@@ -98,6 +103,43 @@ def leaderboard_tab():
98
  gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
99
  gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  gr.Markdown(LANGS_EXPLANATION)
102
  gr.Markdown(METRICS_EXPLANATION)
103
  gr.Markdown(SYSTEMS_EXPLANATION)
 
36
  stats = pd.read_csv("data/benchmark_stats.tsv", sep="\t", quoting=csv.QUOTE_NONE)
37
 
38
  metrics = ['score_metricx_both', 'score_xcomet_both', 'score_CHRFpp', 'score_glotlid_ref']
39
+ systems = sorted(set(stats["system"]))
40
  ALL = "ALL"
41
+ MEAN = "Average"
42
+ XX2EN = "Everything-into-English"
43
+ EN2XX = "English-into-Everything"
44
+
45
  lang_src2tgt = defaultdict(set)
46
  lang_tgt2src = defaultdict(set)
47
  langs_src = set()
 
103
  gr_src_lang.input(fn=src2tgt, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_tgt_lang)
104
  gr_tgt_lang.input(fn=tgt2src, inputs=[gr_src_lang, gr_tgt_lang], outputs=gr_src_lang)
105
 
106
+ gr.Markdown("## Languages difficulty")
107
+ gr_system = gr.Dropdown([MEAN] + systems, value=MEAN, label="Translation system")
108
+ gr_direction = gr.Dropdown([XX2EN, EN2XX], value=XX2EN, label="Translation direction")
109
+ gr_metric = gr.Dropdown(metrics, label="Quality metric", value="score_metricx_both")
110
+ bar_controls = [gr_system, gr_direction, gr_metric]
111
+
112
+ def get_hist(system, direction, metric):
113
+ # decide on the data to process
114
+ if direction == EN2XX:
115
+ direction_filter = stats['src_lang'].eq('eng_Latn')
116
+ lang_col = "tgt_lang"
117
+ else:
118
+ direction_filter = stats['tgt_lang'].eq('eng_Latn')
119
+ lang_col = "src_lang"
120
+ if system == MEAN:
121
+ system_filter = stats["system"].astype(bool)
122
+ else:
123
+ system_filter = stats['system'].eq(system)
124
+ subset = stats[system_filter & direction_filter]
125
+
126
+ # Compute the means and update the plot
127
+ means = subset.groupby(lang_col)[metric].mean().sort_values(
128
+ ascending=(metric=="score_metricx_both")
129
+ )
130
+ means = means.to_frame().reset_index()
131
+ return gr.update(
132
+ value=means, x=lang_col, y=metric, caption=metric, x_label_angle=-90,
133
+ height=300,
134
+ sort="y",
135
+ )
136
+
137
+ default_bar = get_hist(gr_system.value, gr_direction.value, gr_metric.value)
138
+ gr_barplot = gr.BarPlot(**default_bar)
139
+
140
+ for inp in bar_controls:
141
+ inp.change(fn=get_hist, inputs=bar_controls, outputs=gr_barplot)
142
+
143
  gr.Markdown(LANGS_EXPLANATION)
144
  gr.Markdown(METRICS_EXPLANATION)
145
  gr.Markdown(SYSTEMS_EXPLANATION)