Spaces:

MERaLiON
/

SeaEval_Leaderboard

Running

App Files Files Community

binwang commited on Apr 12, 2024

Commit

1ad7fa2

1 Parent(s): 96acb28

new

Browse files

Files changed (1) hide show

app.py +6 -30

app.py CHANGED Viewed

@@ -2193,7 +2193,6 @@ MRPC_FIVE_SHOT = get_data_mrpc(eval_mode="five_shot")
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
-# block = gr.Blocks(theme=gr.themes.Soft())
 theme = gr.themes.Soft().set(
     background_fill_primary='*secondary_50'
@@ -2204,13 +2203,13 @@ block = gr.Blocks(theme='rottenlittlecreature/Moon_Goblin')
 with block:
     gr.Markdown(f"""
-    ## SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>.  Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
     - **Number of Datasets**: > 30
     - **Number of Languages**: > 8
     - **Number of Models**: {NUM_MODELS}
     - **Mode of Evaluation**: Zero-Shot, Five-Shot
-    ## Know Issues:
     - For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
     ### The following table shows the performance of the models on the SeaEval benchmark.
@@ -2220,11 +2219,7 @@ with block:
     """)
     with gr.Tabs():
         with gr.TabItem("Cross-Lingual Consistency"):
             # dataset 1: cross-mmlu
@@ -3225,8 +3220,8 @@ with block:
                     """)
-    gr.Markdown(f"""
-    ## If our datasets and leaderboard are useful, please consider cite:
     ```bibtex
         @article{SeaEval,
         title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
@@ -3237,25 +3232,6 @@ with block:
     """)
-    # Running the functions on page load in addition to when the button is clicked
-    # This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
-    """
-    block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
-    """
 block.queue(max_size=10)
-block.launch(server_name="0.0.0.0", share=False)
-# Possible changes:
-# Could add graphs / other visual content
-# Could add verification marks
-# Sources:
-# https://huggingface.co/spaces/gradio/leaderboard
-# https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
-# https://getemoji.com/

 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 theme = gr.themes.Soft().set(
     background_fill_primary='*secondary_50'
 with block:
     gr.Markdown(f"""
+    ### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>.  Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
     - **Number of Datasets**: > 30
     - **Number of Languages**: > 8
     - **Number of Models**: {NUM_MODELS}
     - **Mode of Evaluation**: Zero-Shot, Five-Shot
+    ### Know Issues:
     - For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
     ### The following table shows the performance of the models on the SeaEval benchmark.
     """)
     with gr.Tabs():
         with gr.TabItem("Cross-Lingual Consistency"):
             # dataset 1: cross-mmlu
                     """)
+    gr.Markdown(r"""
+    ### If our datasets and leaderboard are useful, please consider cite:
     ```bibtex
         @article{SeaEval,
         title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
     """)
 block.queue(max_size=10)
+# block.launch(server_name="0.0.0.0", share=False)
+block.launch(server_name="0.0.0.0", share=True)