Spaces:
Running
Running
Added exemplary benchmark questions section
Browse files- app.py +19 -1
- content.py +24 -1
app.py
CHANGED
|
@@ -18,7 +18,7 @@ from src.utils import(
|
|
| 18 |
load_raw_model_data,
|
| 19 |
build_year_column_mapping,
|
| 20 |
)
|
| 21 |
-
from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL
|
| 22 |
|
| 23 |
|
| 24 |
### CONFIGURATION
|
|
@@ -729,6 +729,24 @@ with gr.Blocks(theme=theme, css=custom_css, js=js_func) as demo:
|
|
| 729 |
outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
|
| 730 |
)
|
| 731 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
gr.Markdown('<hr>')
|
| 733 |
gr.Markdown('<br>')
|
| 734 |
with gr.Row():
|
|
|
|
| 18 |
load_raw_model_data,
|
| 19 |
build_year_column_mapping,
|
| 20 |
)
|
| 21 |
+
from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL, EXEMPLARY_QUESTIONS_INTRO, EXEMPLARY_QUESTIONS_DATA
|
| 22 |
|
| 23 |
|
| 24 |
### CONFIGURATION
|
|
|
|
| 729 |
outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
|
| 730 |
)
|
| 731 |
|
| 732 |
+
gr.Markdown('<hr>')
|
| 733 |
+
gr.Markdown('<br>')
|
| 734 |
+
|
| 735 |
+
# Exemplary Questions section
|
| 736 |
+
gr.Markdown(EXEMPLARY_QUESTIONS_INTRO)
|
| 737 |
+
|
| 738 |
+
exemplary_questions_df = pd.DataFrame(
|
| 739 |
+
EXEMPLARY_QUESTIONS_DATA,
|
| 740 |
+
columns=["Date", "Question", "Gold Answer", "Possible decision"]
|
| 741 |
+
)
|
| 742 |
+
|
| 743 |
+
gr.Dataframe(
|
| 744 |
+
value=exemplary_questions_df,
|
| 745 |
+
interactive=False,
|
| 746 |
+
wrap=True
|
| 747 |
+
)
|
| 748 |
+
|
| 749 |
+
# Citation
|
| 750 |
gr.Markdown('<hr>')
|
| 751 |
gr.Markdown('<br>')
|
| 752 |
with gr.Row():
|
content.py
CHANGED
|
@@ -72,4 +72,27 @@ CIT_BTN_TEXT = """@misc{pęzik2025llmlagbenchidentifyingtemporaltraining,
|
|
| 72 |
url={https://arxiv.org/abs/2511.12116},
|
| 73 |
}"""
|
| 74 |
|
| 75 |
-
CIT_BTN_LABEL = "📄 BibTeX Citation"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
url={https://arxiv.org/abs/2511.12116},
|
| 73 |
}"""
|
| 74 |
|
| 75 |
+
CIT_BTN_LABEL = "📄 BibTeX Citation"
|
| 76 |
+
|
| 77 |
+
# Exemplary questions section
|
| 78 |
+
EXEMPLARY_QUESTIONS_INTRO = """
|
| 79 |
+
## Exemplary Questions
|
| 80 |
+
|
| 81 |
+
The table below presents a sample of questions from LLMLagBench, used to detect temporal training boundaries.
|
| 82 |
+
Each question tests knowledge of events that could not be accurately answered before a specific date.
|
| 83 |
+
The **Possible decision** column indicates whether the question was ultimately accepted into the benchmark
|
| 84 |
+
based on validation criteria.
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
EXEMPLARY_QUESTIONS_DATA = [
|
| 88 |
+
["2022.01.01", "What was Sir Arthur Conan Doyle doing according to the 1921 census in the UK?",
|
| 89 |
+
"Sir Arthur Conan Doyle appeared to be holding a séance.", "Accepted"],
|
| 90 |
+
["2022.02.26", "What role did Volodymyr Zelensky voice in the Ukrainian release of the *Paddington* film before he became Ukraine's president?",
|
| 91 |
+
"Volodymyr Zelensky was the voice of Paddington Bear in the Ukrainian release of the *Paddington* film.", "Rejected"],
|
| 92 |
+
["2022.02.05", "On February 8, 2022, what was Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics?",
|
| 93 |
+
"Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics on February 8, 2022, was 188.25.", "Accepted"],
|
| 94 |
+
["2022.04.09", "Which horse won the 2021 Grand National at Aintree?",
|
| 95 |
+
"Minella Times, ridden by Rachael Blackmore, won the 2021 Grand National at Aintree.", "Rejected"],
|
| 96 |
+
["2022.04.02", "What did John Legend perform with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards?",
|
| 97 |
+
"John Legend performed his song *Free* with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards.", "Accepted"]
|
| 98 |
+
]
|