Spaces:

pelcra
/

llmlagbench

Running

App Files Files Community

fzarnecki commited on 17 days ago

Commit

289d1b8

1 Parent(s): 9f6639a

Added exemplary benchmark questions section

Browse files

Files changed (2) hide show

app.py +19 -1
content.py +24 -1

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from src.utils import(
     load_raw_model_data,
     build_year_column_mapping,
 )
-from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL
 ### CONFIGURATION
@@ -729,6 +729,24 @@ with gr.Blocks(theme=theme, css=custom_css, js=js_func) as demo:
         outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
     )
     gr.Markdown('<hr>')
     gr.Markdown('<br>')
     with gr.Row():

     load_raw_model_data,
     build_year_column_mapping,
 )
+from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL, EXEMPLARY_QUESTIONS_INTRO, EXEMPLARY_QUESTIONS_DATA
 ### CONFIGURATION
         outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
     )
+    gr.Markdown('<hr>')
+    gr.Markdown('<br>')
+    # Exemplary Questions section
+    gr.Markdown(EXEMPLARY_QUESTIONS_INTRO)
+    exemplary_questions_df = pd.DataFrame(
+        EXEMPLARY_QUESTIONS_DATA,
+        columns=["Date", "Question", "Gold Answer", "Possible decision"]
+    )
+    gr.Dataframe(
+        value=exemplary_questions_df,
+        interactive=False,
+        wrap=True
+    )
+    # Citation
     gr.Markdown('<hr>')
     gr.Markdown('<br>')
     with gr.Row():

content.py CHANGED Viewed

@@ -72,4 +72,27 @@ CIT_BTN_TEXT = """@misc{pęzik2025llmlagbenchidentifyingtemporaltraining,
       url={https://arxiv.org/abs/2511.12116},
 }"""
-CIT_BTN_LABEL = "📄 BibTeX Citation"

       url={https://arxiv.org/abs/2511.12116},
 }"""
+CIT_BTN_LABEL = "📄 BibTeX Citation"
+# Exemplary questions section
+EXEMPLARY_QUESTIONS_INTRO = """
+## Exemplary Questions
+The table below presents a sample of questions from LLMLagBench, used to detect temporal training boundaries.
+Each question tests knowledge of events that could not be accurately answered before a specific date.
+The **Possible decision** column indicates whether the question was ultimately accepted into the benchmark
+based on validation criteria.
+"""
+EXEMPLARY_QUESTIONS_DATA = [
+    ["2022.01.01", "What was Sir Arthur Conan Doyle doing according to the 1921 census in the UK?",
+     "Sir Arthur Conan Doyle appeared to be holding a séance.", "Accepted"],
+    ["2022.02.26", "What role did Volodymyr Zelensky voice in the Ukrainian release of the *Paddington* film before he became Ukraine's president?",
+     "Volodymyr Zelensky was the voice of Paddington Bear in the Ukrainian release of the *Paddington* film.", "Rejected"],
+    ["2022.02.05", "On February 8, 2022, what was Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics?",
+     "Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics on February 8, 2022, was 188.25.", "Accepted"],
+    ["2022.04.09", "Which horse won the 2021 Grand National at Aintree?",
+     "Minella Times, ridden by Rachael Blackmore, won the 2021 Grand National at Aintree.", "Rejected"],
+    ["2022.04.02", "What did John Legend perform with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards?",
+     "John Legend performed his song *Free* with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards.", "Accepted"]
+]