Spaces:
Running
Running
new
Browse files
app.py
CHANGED
|
@@ -2193,7 +2193,6 @@ MRPC_FIVE_SHOT = get_data_mrpc(eval_mode="five_shot")
|
|
| 2193 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
| 2194 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
| 2195 |
|
| 2196 |
-
# block = gr.Blocks(theme=gr.themes.Soft())
|
| 2197 |
|
| 2198 |
theme = gr.themes.Soft().set(
|
| 2199 |
background_fill_primary='*secondary_50'
|
|
@@ -2204,13 +2203,13 @@ block = gr.Blocks(theme='rottenlittlecreature/Moon_Goblin')
|
|
| 2204 |
|
| 2205 |
with block:
|
| 2206 |
gr.Markdown(f"""
|
| 2207 |
-
|
| 2208 |
- **Number of Datasets**: > 30
|
| 2209 |
- **Number of Languages**: > 8
|
| 2210 |
- **Number of Models**: {NUM_MODELS}
|
| 2211 |
- **Mode of Evaluation**: Zero-Shot, Five-Shot
|
| 2212 |
|
| 2213 |
-
|
| 2214 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
| 2215 |
|
| 2216 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
|
@@ -2220,11 +2219,7 @@ with block:
|
|
| 2220 |
|
| 2221 |
""")
|
| 2222 |
|
| 2223 |
-
|
| 2224 |
-
|
| 2225 |
with gr.Tabs():
|
| 2226 |
-
|
| 2227 |
-
|
| 2228 |
with gr.TabItem("Cross-Lingual Consistency"):
|
| 2229 |
|
| 2230 |
# dataset 1: cross-mmlu
|
|
@@ -3225,8 +3220,8 @@ with block:
|
|
| 3225 |
""")
|
| 3226 |
|
| 3227 |
|
| 3228 |
-
gr.Markdown(
|
| 3229 |
-
|
| 3230 |
```bibtex
|
| 3231 |
@article{SeaEval,
|
| 3232 |
title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
|
|
@@ -3237,25 +3232,6 @@ with block:
|
|
| 3237 |
""")
|
| 3238 |
|
| 3239 |
|
| 3240 |
-
# Running the functions on page load in addition to when the button is clicked
|
| 3241 |
-
# This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
|
| 3242 |
-
"""
|
| 3243 |
-
block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
|
| 3244 |
-
"""
|
| 3245 |
-
|
| 3246 |
-
|
| 3247 |
-
|
| 3248 |
-
|
| 3249 |
-
|
| 3250 |
block.queue(max_size=10)
|
| 3251 |
-
block.launch(server_name="0.0.0.0", share=False)
|
| 3252 |
-
|
| 3253 |
-
|
| 3254 |
-
# Possible changes:
|
| 3255 |
-
# Could add graphs / other visual content
|
| 3256 |
-
# Could add verification marks
|
| 3257 |
-
|
| 3258 |
-
# Sources:
|
| 3259 |
-
# https://huggingface.co/spaces/gradio/leaderboard
|
| 3260 |
-
# https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
|
| 3261 |
-
# https://getemoji.com/
|
|
|
|
| 2193 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
| 2194 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
| 2195 |
|
|
|
|
| 2196 |
|
| 2197 |
theme = gr.themes.Soft().set(
|
| 2198 |
background_fill_primary='*secondary_50'
|
|
|
|
| 2203 |
|
| 2204 |
with block:
|
| 2205 |
gr.Markdown(f"""
|
| 2206 |
+
### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
|
| 2207 |
- **Number of Datasets**: > 30
|
| 2208 |
- **Number of Languages**: > 8
|
| 2209 |
- **Number of Models**: {NUM_MODELS}
|
| 2210 |
- **Mode of Evaluation**: Zero-Shot, Five-Shot
|
| 2211 |
|
| 2212 |
+
### Know Issues:
|
| 2213 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
| 2214 |
|
| 2215 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
|
|
|
| 2219 |
|
| 2220 |
""")
|
| 2221 |
|
|
|
|
|
|
|
| 2222 |
with gr.Tabs():
|
|
|
|
|
|
|
| 2223 |
with gr.TabItem("Cross-Lingual Consistency"):
|
| 2224 |
|
| 2225 |
# dataset 1: cross-mmlu
|
|
|
|
| 3220 |
""")
|
| 3221 |
|
| 3222 |
|
| 3223 |
+
gr.Markdown(r"""
|
| 3224 |
+
### If our datasets and leaderboard are useful, please consider cite:
|
| 3225 |
```bibtex
|
| 3226 |
@article{SeaEval,
|
| 3227 |
title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
|
|
|
|
| 3232 |
""")
|
| 3233 |
|
| 3234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3235 |
block.queue(max_size=10)
|
| 3236 |
+
# block.launch(server_name="0.0.0.0", share=False)
|
| 3237 |
+
block.launch(server_name="0.0.0.0", share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|