Lev Israel
commited on
Commit
Β·
9060c03
1
Parent(s):
102be2e
Leaderboard default
Browse files- app.py +16 -15
- dataset/README.md +2 -10
app.py
CHANGED
|
@@ -369,6 +369,7 @@ def create_app():
|
|
| 369 |
- **Total Pairs:** {benchmark_stats.get('total_pairs', 'N/A'):,}
|
| 370 |
- **Categories:** {len(benchmark_stats.get('categories', {}))}
|
| 371 |
- **Avg Hebrew Length:** {benchmark_stats.get('avg_he_length', 0):.0f} chars
|
|
|
|
| 372 |
""")
|
| 373 |
|
| 374 |
with gr.Column(scale=1):
|
|
@@ -381,7 +382,21 @@ def create_app():
|
|
| 381 |
|
| 382 |
gr.Markdown("---")
|
| 383 |
|
| 384 |
-
with gr.Tabs(selected=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
with gr.TabItem("π¬ Evaluate Model"):
|
| 386 |
with gr.Row():
|
| 387 |
with gr.Column(scale=2):
|
|
@@ -426,20 +441,6 @@ def create_app():
|
|
| 426 |
status_text = gr.Markdown("")
|
| 427 |
|
| 428 |
results_markdown = gr.Markdown("")
|
| 429 |
-
|
| 430 |
-
with gr.TabItem("π Leaderboard"):
|
| 431 |
-
leaderboard_table = gr.Dataframe(
|
| 432 |
-
value=format_leaderboard_df(),
|
| 433 |
-
label="Model Rankings",
|
| 434 |
-
interactive=False,
|
| 435 |
-
)
|
| 436 |
-
|
| 437 |
-
refresh_btn = gr.Button("π Refresh Leaderboard")
|
| 438 |
-
|
| 439 |
-
comparison_plot = gr.Plot(
|
| 440 |
-
value=create_leaderboard_comparison(),
|
| 441 |
-
label="Model Comparison"
|
| 442 |
-
)
|
| 443 |
|
| 444 |
gr.Markdown("""
|
| 445 |
---
|
|
|
|
| 369 |
- **Total Pairs:** {benchmark_stats.get('total_pairs', 'N/A'):,}
|
| 370 |
- **Categories:** {len(benchmark_stats.get('categories', {}))}
|
| 371 |
- **Avg Hebrew Length:** {benchmark_stats.get('avg_he_length', 0):.0f} chars
|
| 372 |
+
- **Dataset:** [View on Hugging Face](https://huggingface.co/datasets/{BENCHMARK_DATASET_ID})
|
| 373 |
""")
|
| 374 |
|
| 375 |
with gr.Column(scale=1):
|
|
|
|
| 382 |
|
| 383 |
gr.Markdown("---")
|
| 384 |
|
| 385 |
+
with gr.Tabs(selected=0): # Default to Leaderboard tab
|
| 386 |
+
with gr.TabItem("π Leaderboard"):
|
| 387 |
+
leaderboard_table = gr.Dataframe(
|
| 388 |
+
value=format_leaderboard_df(),
|
| 389 |
+
label="Model Rankings",
|
| 390 |
+
interactive=False,
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
refresh_btn = gr.Button("π Refresh Leaderboard")
|
| 394 |
+
|
| 395 |
+
comparison_plot = gr.Plot(
|
| 396 |
+
value=create_leaderboard_comparison(),
|
| 397 |
+
label="Model Comparison"
|
| 398 |
+
)
|
| 399 |
+
|
| 400 |
with gr.TabItem("π¬ Evaluate Model"):
|
| 401 |
with gr.Row():
|
| 402 |
with gr.Column(scale=2):
|
|
|
|
| 441 |
status_text = gr.Markdown("")
|
| 442 |
|
| 443 |
results_markdown = gr.Markdown("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
gr.Markdown("""
|
| 446 |
---
|
dataset/README.md
CHANGED
|
@@ -25,7 +25,7 @@ A benchmark dataset for evaluating embedding models on Rabbinic Hebrew and Arama
|
|
| 25 |
|
| 26 |
## Dataset Description
|
| 27 |
|
| 28 |
-
This dataset contains
|
| 29 |
|
| 30 |
### Languages
|
| 31 |
|
|
@@ -57,18 +57,10 @@ Each example contains:
|
|
| 57 |
|
| 58 |
## Intended Use
|
| 59 |
|
| 60 |
-
### Primary Use Case
|
| 61 |
-
|
| 62 |
Evaluating embedding models for cross-lingual retrieval:
|
| 63 |
- Given a Hebrew/Aramaic text, can the model find its English translation from a pool of candidates?
|
| 64 |
- Models that excel at this task likely capture the semantics of Rabbinic literature well.
|
| 65 |
|
| 66 |
-
### Evaluation Metrics
|
| 67 |
-
|
| 68 |
-
- **Recall@k**: Percentage of queries where correct translation is in top k results
|
| 69 |
-
- **MRR**: Mean Reciprocal Rank
|
| 70 |
-
- **Bitext Accuracy**: True pair vs random pair classification
|
| 71 |
-
|
| 72 |
## Source
|
| 73 |
|
| 74 |
All texts and translations are from [Sefaria](https://www.sefaria.org), a free library of Jewish texts.
|
|
@@ -88,7 +80,7 @@ If you use this dataset, please cite Sefaria:
|
|
| 88 |
@misc{sefaria,
|
| 89 |
title = {Sefaria: A Living Library of Jewish Texts},
|
| 90 |
url = {https://www.sefaria.org},
|
| 91 |
-
year = {
|
| 92 |
}
|
| 93 |
```
|
| 94 |
|
|
|
|
| 25 |
|
| 26 |
## Dataset Description
|
| 27 |
|
| 28 |
+
This dataset contains parallel text pairs spanning diverse Rabbinic literature across multiple centuries and genres. It is designed for evaluating cross-lingual embedding models on their ability to align Hebrew/Aramaic source texts with English translations.
|
| 29 |
|
| 30 |
### Languages
|
| 31 |
|
|
|
|
| 57 |
|
| 58 |
## Intended Use
|
| 59 |
|
|
|
|
|
|
|
| 60 |
Evaluating embedding models for cross-lingual retrieval:
|
| 61 |
- Given a Hebrew/Aramaic text, can the model find its English translation from a pool of candidates?
|
| 62 |
- Models that excel at this task likely capture the semantics of Rabbinic literature well.
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
## Source
|
| 65 |
|
| 66 |
All texts and translations are from [Sefaria](https://www.sefaria.org), a free library of Jewish texts.
|
|
|
|
| 80 |
@misc{sefaria,
|
| 81 |
title = {Sefaria: A Living Library of Jewish Texts},
|
| 82 |
url = {https://www.sefaria.org},
|
| 83 |
+
year = {2026}
|
| 84 |
}
|
| 85 |
```
|
| 86 |
|