Spaces:
Running
Running
new
Browse files
app.py
CHANGED
|
@@ -2204,12 +2204,9 @@ block = gr.Blocks(theme='rottenlittlecreature/Moon_Goblin')
|
|
| 2204 |
with block:
|
| 2205 |
gr.Markdown(f"""
|
| 2206 |
### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
|
| 2207 |
-
- **Number of Datasets**: > 30
|
| 2208 |
-
- **Number of Languages**: > 8
|
| 2209 |
-
- **Number of Models**: {NUM_MODELS}
|
| 2210 |
-
- **Mode of Evaluation**: Zero-Shot, Five-Shot
|
| 2211 |
|
| 2212 |
-
###
|
| 2213 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
| 2214 |
|
| 2215 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
|
@@ -2261,7 +2258,6 @@ with block:
|
|
| 2261 |
with gr.Row():
|
| 2262 |
gr.Markdown("""
|
| 2263 |
**Cross-XQUAD Leaderboard** ๐ฎ
|
| 2264 |
-
|
| 2265 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2266 |
- **Languages:** English, Chinese, Spanish, Vietnamese
|
| 2267 |
""")
|
|
@@ -2305,7 +2301,6 @@ with block:
|
|
| 2305 |
with gr.Row():
|
| 2306 |
gr.Markdown("""
|
| 2307 |
**Cross-MMLU Leaderboard** ๐ฎ
|
| 2308 |
-
|
| 2309 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2310 |
- **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
|
| 2311 |
""")
|
|
@@ -2347,7 +2342,6 @@ with block:
|
|
| 2347 |
with gr.Row():
|
| 2348 |
gr.Markdown("""
|
| 2349 |
**Cross-LogiQA Leaderboard** ๐ฎ
|
| 2350 |
-
|
| 2351 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2352 |
- **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
|
| 2353 |
""")
|
|
@@ -2377,7 +2371,6 @@ with block:
|
|
| 2377 |
with gr.Row():
|
| 2378 |
gr.Markdown("""
|
| 2379 |
**SG_EVAL Leaderboard** ๐ฎ
|
| 2380 |
-
|
| 2381 |
- **Metric:** Accuracy
|
| 2382 |
- **Languages:** English
|
| 2383 |
""")
|
|
@@ -2406,7 +2399,6 @@ with block:
|
|
| 2406 |
with gr.Row():
|
| 2407 |
gr.Markdown("""
|
| 2408 |
**US_EVAL Leaderboard** ๐ฎ
|
| 2409 |
-
|
| 2410 |
- **Metric:** Accuracy
|
| 2411 |
- **Languages:** English
|
| 2412 |
""")
|
|
@@ -2434,7 +2426,6 @@ with block:
|
|
| 2434 |
with gr.Row():
|
| 2435 |
gr.Markdown("""
|
| 2436 |
**CN_EVAL Leaderboard** ๐ฎ
|
| 2437 |
-
|
| 2438 |
- **Metric:** Accuracy
|
| 2439 |
- **Languages:** Chinese
|
| 2440 |
""")
|
|
@@ -2461,7 +2452,6 @@ with block:
|
|
| 2461 |
with gr.Row():
|
| 2462 |
gr.Markdown("""
|
| 2463 |
**PH_EVAL Leaderboard** ๐ฎ
|
| 2464 |
-
|
| 2465 |
- **Metric:** Accuracy
|
| 2466 |
- **Languages:** English
|
| 2467 |
""")
|
|
@@ -2488,7 +2478,6 @@ with block:
|
|
| 2488 |
with gr.Row():
|
| 2489 |
gr.Markdown("""
|
| 2490 |
**SING2ENG Leaderboard** ๐ฎ
|
| 2491 |
-
|
| 2492 |
- **Metric:** BLEU Avg.
|
| 2493 |
- **Languages:** English
|
| 2494 |
""")
|
|
@@ -2518,7 +2507,6 @@ with block:
|
|
| 2518 |
with gr.Row():
|
| 2519 |
gr.Markdown("""
|
| 2520 |
**MMLU Leaderboard** ๐ฎ
|
| 2521 |
-
|
| 2522 |
- **Metric:** Accuracy.
|
| 2523 |
- **Languages:** English
|
| 2524 |
""")
|
|
@@ -2546,7 +2534,6 @@ with block:
|
|
| 2546 |
with gr.Row():
|
| 2547 |
gr.Markdown("""
|
| 2548 |
**MMLU Full Leaderboard** ๐ฎ
|
| 2549 |
-
|
| 2550 |
- **Metric:** Accuracy.
|
| 2551 |
- **Languages:** English
|
| 2552 |
""")
|
|
@@ -2574,7 +2561,6 @@ with block:
|
|
| 2574 |
with gr.Row():
|
| 2575 |
gr.Markdown("""
|
| 2576 |
**C_EVAL Leaderboard** ๐ฎ
|
| 2577 |
-
|
| 2578 |
- **Metric:** Accuracy.
|
| 2579 |
- **Languages:** Chinese
|
| 2580 |
""")
|
|
@@ -2602,7 +2588,6 @@ with block:
|
|
| 2602 |
with gr.Row():
|
| 2603 |
gr.Markdown("""
|
| 2604 |
**C_EVAL Full Leaderboard** ๐ฎ
|
| 2605 |
-
|
| 2606 |
- **Metric:** Accuracy.
|
| 2607 |
- **Languages:** Chinese
|
| 2608 |
""")
|
|
@@ -2629,7 +2614,6 @@ with block:
|
|
| 2629 |
with gr.Row():
|
| 2630 |
gr.Markdown("""
|
| 2631 |
**CMMLU Leaderboard** ๐ฎ
|
| 2632 |
-
|
| 2633 |
- **Metric:** Accuracy.
|
| 2634 |
- **Languages:** Chinese
|
| 2635 |
""")
|
|
@@ -2657,7 +2641,6 @@ with block:
|
|
| 2657 |
with gr.Row():
|
| 2658 |
gr.Markdown("""
|
| 2659 |
**CMMLU Full Leaderboard** ๐ฎ
|
| 2660 |
-
|
| 2661 |
- **Metric:** Accuracy.
|
| 2662 |
- **Languages:** Chinese
|
| 2663 |
""")
|
|
@@ -2684,7 +2667,6 @@ with block:
|
|
| 2684 |
with gr.Row():
|
| 2685 |
gr.Markdown("""
|
| 2686 |
**ZBench Leaderboard** ๐ฎ
|
| 2687 |
-
|
| 2688 |
- **Metric:** Accuracy.
|
| 2689 |
- **Languages:** Chinese
|
| 2690 |
""")
|
|
@@ -2710,7 +2692,6 @@ with block:
|
|
| 2710 |
with gr.Row():
|
| 2711 |
gr.Markdown("""
|
| 2712 |
**IndoMMLU Leaderboard** ๐ฎ
|
| 2713 |
-
|
| 2714 |
- **Metric:** Accuracy.
|
| 2715 |
- **Languages:** Bahasa Indonesian
|
| 2716 |
""")
|
|
@@ -2740,7 +2721,6 @@ with block:
|
|
| 2740 |
with gr.Row():
|
| 2741 |
gr.Markdown("""
|
| 2742 |
**flores_ind2eng Leaderboard** ๐ฎ
|
| 2743 |
-
|
| 2744 |
- **Metric:** BLEU Avg.
|
| 2745 |
- **Languages:** English
|
| 2746 |
""")
|
|
@@ -2767,7 +2747,6 @@ with block:
|
|
| 2767 |
with gr.Row():
|
| 2768 |
gr.Markdown("""
|
| 2769 |
**flores_vie2eng Leaderboard** ๐ฎ
|
| 2770 |
-
|
| 2771 |
- **Metric:** BLEU Avg.
|
| 2772 |
- **Languages:** English
|
| 2773 |
""")
|
|
@@ -2795,7 +2774,6 @@ with block:
|
|
| 2795 |
with gr.Row():
|
| 2796 |
gr.Markdown("""
|
| 2797 |
**flores_zho2eng Leaderboard** ๐ฎ
|
| 2798 |
-
|
| 2799 |
- **Metric:** BLEU Avg.
|
| 2800 |
- **Languages:** English
|
| 2801 |
""")
|
|
@@ -2822,7 +2800,6 @@ with block:
|
|
| 2822 |
with gr.Row():
|
| 2823 |
gr.Markdown("""
|
| 2824 |
**flores_zsm2eng Leaderboard** ๐ฎ
|
| 2825 |
-
|
| 2826 |
- **Metric:** BLEU Avg.
|
| 2827 |
- **Languages:** English
|
| 2828 |
""")
|
|
@@ -2850,8 +2827,7 @@ with block:
|
|
| 2850 |
)
|
| 2851 |
with gr.Row():
|
| 2852 |
gr.Markdown("""
|
| 2853 |
-
**
|
| 2854 |
-
|
| 2855 |
- **Metric:** Accuracy.
|
| 2856 |
- **Languages:** Indonesian
|
| 2857 |
""")
|
|
@@ -2878,7 +2854,6 @@ with block:
|
|
| 2878 |
with gr.Row():
|
| 2879 |
gr.Markdown("""
|
| 2880 |
**SST2 Leaderboard** ๐ฎ
|
| 2881 |
-
|
| 2882 |
- **Metric:** Accuracy.
|
| 2883 |
- **Languages:** English
|
| 2884 |
""")
|
|
@@ -2909,7 +2884,6 @@ with block:
|
|
| 2909 |
with gr.Row():
|
| 2910 |
gr.Markdown("""
|
| 2911 |
**DREAM Leaderboard** ๐ฎ
|
| 2912 |
-
|
| 2913 |
- **Metric:** Accuracy.
|
| 2914 |
- **Languages:** English
|
| 2915 |
""")
|
|
@@ -2935,7 +2909,6 @@ with block:
|
|
| 2935 |
with gr.Row():
|
| 2936 |
gr.Markdown("""
|
| 2937 |
**SAMSum Leaderboard** ๐ฎ
|
| 2938 |
-
|
| 2939 |
- **Metric:** ROUGE.
|
| 2940 |
- **Languages:** English
|
| 2941 |
""")
|
|
@@ -2962,7 +2935,6 @@ with block:
|
|
| 2962 |
with gr.Row():
|
| 2963 |
gr.Markdown("""
|
| 2964 |
**DialogSum Leaderboard** ๐ฎ
|
| 2965 |
-
|
| 2966 |
- **Metric:** ROUGE.
|
| 2967 |
- **Languages:** English
|
| 2968 |
""")
|
|
@@ -2993,7 +2965,6 @@ with block:
|
|
| 2993 |
with gr.Row():
|
| 2994 |
gr.Markdown("""
|
| 2995 |
**OCNLI Leaderboard** ๐ฎ
|
| 2996 |
-
|
| 2997 |
- **Metric:** Accuracy.
|
| 2998 |
- **Languages:** Chinese
|
| 2999 |
""")
|
|
@@ -3020,7 +2991,6 @@ with block:
|
|
| 3020 |
with gr.Row():
|
| 3021 |
gr.Markdown("""
|
| 3022 |
**C3 Leaderboard** ๐ฎ
|
| 3023 |
-
|
| 3024 |
- **Metric:** Accuracy.
|
| 3025 |
- **Languages:** Chinese
|
| 3026 |
""")
|
|
@@ -3049,7 +3019,6 @@ with block:
|
|
| 3049 |
with gr.Row():
|
| 3050 |
gr.Markdown("""
|
| 3051 |
**COLA Leaderboard** ๐ฎ
|
| 3052 |
-
|
| 3053 |
- **Metric:** Accuracy.
|
| 3054 |
- **Languages:** English
|
| 3055 |
""")
|
|
@@ -3076,7 +3045,6 @@ with block:
|
|
| 3076 |
with gr.Row():
|
| 3077 |
gr.Markdown("""
|
| 3078 |
**QQP Leaderboard** ๐ฎ
|
| 3079 |
-
|
| 3080 |
- **Metric:** Accuracy.
|
| 3081 |
- **Languages:** English
|
| 3082 |
""")
|
|
@@ -3103,7 +3071,6 @@ with block:
|
|
| 3103 |
with gr.Row():
|
| 3104 |
gr.Markdown("""
|
| 3105 |
**MNLI Leaderboard** ๐ฎ
|
| 3106 |
-
|
| 3107 |
- **Metric:** Accuracy.
|
| 3108 |
- **Languages:** English
|
| 3109 |
""")
|
|
@@ -3130,7 +3097,6 @@ with block:
|
|
| 3130 |
with gr.Row():
|
| 3131 |
gr.Markdown("""
|
| 3132 |
**QNLI Leaderboard** ๐ฎ
|
| 3133 |
-
|
| 3134 |
- **Metric:** Accuracy.
|
| 3135 |
- **Languages:** English
|
| 3136 |
""")
|
|
@@ -3158,7 +3124,6 @@ with block:
|
|
| 3158 |
with gr.Row():
|
| 3159 |
gr.Markdown("""
|
| 3160 |
**WNLI Leaderboard** ๐ฎ
|
| 3161 |
-
|
| 3162 |
- **Metric:** Accuracy.
|
| 3163 |
- **Languages:** English
|
| 3164 |
""")
|
|
@@ -3186,13 +3151,10 @@ with block:
|
|
| 3186 |
with gr.Row():
|
| 3187 |
gr.Markdown("""
|
| 3188 |
**RTE Leaderboard** ๐ฎ
|
| 3189 |
-
|
| 3190 |
- **Metric:** Accuracy.
|
| 3191 |
- **Languages:** English
|
| 3192 |
""")
|
| 3193 |
|
| 3194 |
-
|
| 3195 |
-
|
| 3196 |
# dataset
|
| 3197 |
with gr.TabItem("MRPC"):
|
| 3198 |
with gr.TabItem("Zero Shot"):
|
|
@@ -3214,12 +3176,10 @@ with block:
|
|
| 3214 |
with gr.Row():
|
| 3215 |
gr.Markdown("""
|
| 3216 |
**MRPC Leaderboard** ๐ฎ
|
| 3217 |
-
|
| 3218 |
- **Metric:** Accuracy.
|
| 3219 |
- **Languages:** English
|
| 3220 |
""")
|
| 3221 |
|
| 3222 |
-
|
| 3223 |
gr.Markdown(r"""
|
| 3224 |
### If our datasets and leaderboard are useful, please consider cite:
|
| 3225 |
```bibtex
|
|
|
|
| 2204 |
with block:
|
| 2205 |
gr.Markdown(f"""
|
| 2206 |
### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
|
| 2207 |
+
- **Number of Datasets**: > 30, **Number of Languages**: > 8, **Number of Models**: {NUM_MODELS}, **Mode of Evaluation**: Zero-Shot, Five-Shot
|
|
|
|
|
|
|
|
|
|
| 2208 |
|
| 2209 |
+
### Possible Issues:
|
| 2210 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
| 2211 |
|
| 2212 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
|
|
|
| 2258 |
with gr.Row():
|
| 2259 |
gr.Markdown("""
|
| 2260 |
**Cross-XQUAD Leaderboard** ๐ฎ
|
|
|
|
| 2261 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2262 |
- **Languages:** English, Chinese, Spanish, Vietnamese
|
| 2263 |
""")
|
|
|
|
| 2301 |
with gr.Row():
|
| 2302 |
gr.Markdown("""
|
| 2303 |
**Cross-MMLU Leaderboard** ๐ฎ
|
|
|
|
| 2304 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2305 |
- **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
|
| 2306 |
""")
|
|
|
|
| 2342 |
with gr.Row():
|
| 2343 |
gr.Markdown("""
|
| 2344 |
**Cross-LogiQA Leaderboard** ๐ฎ
|
|
|
|
| 2345 |
- **Metric:** Cross-Lingual Consistency, Accuracy, AC3
|
| 2346 |
- **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
|
| 2347 |
""")
|
|
|
|
| 2371 |
with gr.Row():
|
| 2372 |
gr.Markdown("""
|
| 2373 |
**SG_EVAL Leaderboard** ๐ฎ
|
|
|
|
| 2374 |
- **Metric:** Accuracy
|
| 2375 |
- **Languages:** English
|
| 2376 |
""")
|
|
|
|
| 2399 |
with gr.Row():
|
| 2400 |
gr.Markdown("""
|
| 2401 |
**US_EVAL Leaderboard** ๐ฎ
|
|
|
|
| 2402 |
- **Metric:** Accuracy
|
| 2403 |
- **Languages:** English
|
| 2404 |
""")
|
|
|
|
| 2426 |
with gr.Row():
|
| 2427 |
gr.Markdown("""
|
| 2428 |
**CN_EVAL Leaderboard** ๐ฎ
|
|
|
|
| 2429 |
- **Metric:** Accuracy
|
| 2430 |
- **Languages:** Chinese
|
| 2431 |
""")
|
|
|
|
| 2452 |
with gr.Row():
|
| 2453 |
gr.Markdown("""
|
| 2454 |
**PH_EVAL Leaderboard** ๐ฎ
|
|
|
|
| 2455 |
- **Metric:** Accuracy
|
| 2456 |
- **Languages:** English
|
| 2457 |
""")
|
|
|
|
| 2478 |
with gr.Row():
|
| 2479 |
gr.Markdown("""
|
| 2480 |
**SING2ENG Leaderboard** ๐ฎ
|
|
|
|
| 2481 |
- **Metric:** BLEU Avg.
|
| 2482 |
- **Languages:** English
|
| 2483 |
""")
|
|
|
|
| 2507 |
with gr.Row():
|
| 2508 |
gr.Markdown("""
|
| 2509 |
**MMLU Leaderboard** ๐ฎ
|
|
|
|
| 2510 |
- **Metric:** Accuracy.
|
| 2511 |
- **Languages:** English
|
| 2512 |
""")
|
|
|
|
| 2534 |
with gr.Row():
|
| 2535 |
gr.Markdown("""
|
| 2536 |
**MMLU Full Leaderboard** ๐ฎ
|
|
|
|
| 2537 |
- **Metric:** Accuracy.
|
| 2538 |
- **Languages:** English
|
| 2539 |
""")
|
|
|
|
| 2561 |
with gr.Row():
|
| 2562 |
gr.Markdown("""
|
| 2563 |
**C_EVAL Leaderboard** ๐ฎ
|
|
|
|
| 2564 |
- **Metric:** Accuracy.
|
| 2565 |
- **Languages:** Chinese
|
| 2566 |
""")
|
|
|
|
| 2588 |
with gr.Row():
|
| 2589 |
gr.Markdown("""
|
| 2590 |
**C_EVAL Full Leaderboard** ๐ฎ
|
|
|
|
| 2591 |
- **Metric:** Accuracy.
|
| 2592 |
- **Languages:** Chinese
|
| 2593 |
""")
|
|
|
|
| 2614 |
with gr.Row():
|
| 2615 |
gr.Markdown("""
|
| 2616 |
**CMMLU Leaderboard** ๐ฎ
|
|
|
|
| 2617 |
- **Metric:** Accuracy.
|
| 2618 |
- **Languages:** Chinese
|
| 2619 |
""")
|
|
|
|
| 2641 |
with gr.Row():
|
| 2642 |
gr.Markdown("""
|
| 2643 |
**CMMLU Full Leaderboard** ๐ฎ
|
|
|
|
| 2644 |
- **Metric:** Accuracy.
|
| 2645 |
- **Languages:** Chinese
|
| 2646 |
""")
|
|
|
|
| 2667 |
with gr.Row():
|
| 2668 |
gr.Markdown("""
|
| 2669 |
**ZBench Leaderboard** ๐ฎ
|
|
|
|
| 2670 |
- **Metric:** Accuracy.
|
| 2671 |
- **Languages:** Chinese
|
| 2672 |
""")
|
|
|
|
| 2692 |
with gr.Row():
|
| 2693 |
gr.Markdown("""
|
| 2694 |
**IndoMMLU Leaderboard** ๐ฎ
|
|
|
|
| 2695 |
- **Metric:** Accuracy.
|
| 2696 |
- **Languages:** Bahasa Indonesian
|
| 2697 |
""")
|
|
|
|
| 2721 |
with gr.Row():
|
| 2722 |
gr.Markdown("""
|
| 2723 |
**flores_ind2eng Leaderboard** ๐ฎ
|
|
|
|
| 2724 |
- **Metric:** BLEU Avg.
|
| 2725 |
- **Languages:** English
|
| 2726 |
""")
|
|
|
|
| 2747 |
with gr.Row():
|
| 2748 |
gr.Markdown("""
|
| 2749 |
**flores_vie2eng Leaderboard** ๐ฎ
|
|
|
|
| 2750 |
- **Metric:** BLEU Avg.
|
| 2751 |
- **Languages:** English
|
| 2752 |
""")
|
|
|
|
| 2774 |
with gr.Row():
|
| 2775 |
gr.Markdown("""
|
| 2776 |
**flores_zho2eng Leaderboard** ๐ฎ
|
|
|
|
| 2777 |
- **Metric:** BLEU Avg.
|
| 2778 |
- **Languages:** English
|
| 2779 |
""")
|
|
|
|
| 2800 |
with gr.Row():
|
| 2801 |
gr.Markdown("""
|
| 2802 |
**flores_zsm2eng Leaderboard** ๐ฎ
|
|
|
|
| 2803 |
- **Metric:** BLEU Avg.
|
| 2804 |
- **Languages:** English
|
| 2805 |
""")
|
|
|
|
| 2827 |
)
|
| 2828 |
with gr.Row():
|
| 2829 |
gr.Markdown("""
|
| 2830 |
+
**Ind_emotion Leaderboard** ๐ฎ
|
|
|
|
| 2831 |
- **Metric:** Accuracy.
|
| 2832 |
- **Languages:** Indonesian
|
| 2833 |
""")
|
|
|
|
| 2854 |
with gr.Row():
|
| 2855 |
gr.Markdown("""
|
| 2856 |
**SST2 Leaderboard** ๐ฎ
|
|
|
|
| 2857 |
- **Metric:** Accuracy.
|
| 2858 |
- **Languages:** English
|
| 2859 |
""")
|
|
|
|
| 2884 |
with gr.Row():
|
| 2885 |
gr.Markdown("""
|
| 2886 |
**DREAM Leaderboard** ๐ฎ
|
|
|
|
| 2887 |
- **Metric:** Accuracy.
|
| 2888 |
- **Languages:** English
|
| 2889 |
""")
|
|
|
|
| 2909 |
with gr.Row():
|
| 2910 |
gr.Markdown("""
|
| 2911 |
**SAMSum Leaderboard** ๐ฎ
|
|
|
|
| 2912 |
- **Metric:** ROUGE.
|
| 2913 |
- **Languages:** English
|
| 2914 |
""")
|
|
|
|
| 2935 |
with gr.Row():
|
| 2936 |
gr.Markdown("""
|
| 2937 |
**DialogSum Leaderboard** ๐ฎ
|
|
|
|
| 2938 |
- **Metric:** ROUGE.
|
| 2939 |
- **Languages:** English
|
| 2940 |
""")
|
|
|
|
| 2965 |
with gr.Row():
|
| 2966 |
gr.Markdown("""
|
| 2967 |
**OCNLI Leaderboard** ๐ฎ
|
|
|
|
| 2968 |
- **Metric:** Accuracy.
|
| 2969 |
- **Languages:** Chinese
|
| 2970 |
""")
|
|
|
|
| 2991 |
with gr.Row():
|
| 2992 |
gr.Markdown("""
|
| 2993 |
**C3 Leaderboard** ๐ฎ
|
|
|
|
| 2994 |
- **Metric:** Accuracy.
|
| 2995 |
- **Languages:** Chinese
|
| 2996 |
""")
|
|
|
|
| 3019 |
with gr.Row():
|
| 3020 |
gr.Markdown("""
|
| 3021 |
**COLA Leaderboard** ๐ฎ
|
|
|
|
| 3022 |
- **Metric:** Accuracy.
|
| 3023 |
- **Languages:** English
|
| 3024 |
""")
|
|
|
|
| 3045 |
with gr.Row():
|
| 3046 |
gr.Markdown("""
|
| 3047 |
**QQP Leaderboard** ๐ฎ
|
|
|
|
| 3048 |
- **Metric:** Accuracy.
|
| 3049 |
- **Languages:** English
|
| 3050 |
""")
|
|
|
|
| 3071 |
with gr.Row():
|
| 3072 |
gr.Markdown("""
|
| 3073 |
**MNLI Leaderboard** ๐ฎ
|
|
|
|
| 3074 |
- **Metric:** Accuracy.
|
| 3075 |
- **Languages:** English
|
| 3076 |
""")
|
|
|
|
| 3097 |
with gr.Row():
|
| 3098 |
gr.Markdown("""
|
| 3099 |
**QNLI Leaderboard** ๐ฎ
|
|
|
|
| 3100 |
- **Metric:** Accuracy.
|
| 3101 |
- **Languages:** English
|
| 3102 |
""")
|
|
|
|
| 3124 |
with gr.Row():
|
| 3125 |
gr.Markdown("""
|
| 3126 |
**WNLI Leaderboard** ๐ฎ
|
|
|
|
| 3127 |
- **Metric:** Accuracy.
|
| 3128 |
- **Languages:** English
|
| 3129 |
""")
|
|
|
|
| 3151 |
with gr.Row():
|
| 3152 |
gr.Markdown("""
|
| 3153 |
**RTE Leaderboard** ๐ฎ
|
|
|
|
| 3154 |
- **Metric:** Accuracy.
|
| 3155 |
- **Languages:** English
|
| 3156 |
""")
|
| 3157 |
|
|
|
|
|
|
|
| 3158 |
# dataset
|
| 3159 |
with gr.TabItem("MRPC"):
|
| 3160 |
with gr.TabItem("Zero Shot"):
|
|
|
|
| 3176 |
with gr.Row():
|
| 3177 |
gr.Markdown("""
|
| 3178 |
**MRPC Leaderboard** ๐ฎ
|
|
|
|
| 3179 |
- **Metric:** Accuracy.
|
| 3180 |
- **Languages:** English
|
| 3181 |
""")
|
| 3182 |
|
|
|
|
| 3183 |
gr.Markdown(r"""
|
| 3184 |
### If our datasets and leaderboard are useful, please consider cite:
|
| 3185 |
```bibtex
|