fzarnecki commited on
Commit
289d1b8
·
1 Parent(s): 9f6639a

Added exemplary benchmark questions section

Browse files
Files changed (2) hide show
  1. app.py +19 -1
  2. content.py +24 -1
app.py CHANGED
@@ -18,7 +18,7 @@ from src.utils import(
18
  load_raw_model_data,
19
  build_year_column_mapping,
20
  )
21
- from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL
22
 
23
 
24
  ### CONFIGURATION
@@ -729,6 +729,24 @@ with gr.Blocks(theme=theme, css=custom_css, js=js_func) as demo:
729
  outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
730
  )
731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  gr.Markdown('<hr>')
733
  gr.Markdown('<br>')
734
  with gr.Row():
 
18
  load_raw_model_data,
19
  build_year_column_mapping,
20
  )
21
+ from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL, EXEMPLARY_QUESTIONS_INTRO, EXEMPLARY_QUESTIONS_DATA
22
 
23
 
24
  ### CONFIGURATION
 
729
  outputs=[graph_model_filter, leaderboard, line_plot, model_dropdown_1, model_dropdown_2, comparison_plot_1, comparison_plot_2, col_plot_1, col_model_2, col_plot_2]
730
  )
731
 
732
+ gr.Markdown('<hr>')
733
+ gr.Markdown('<br>')
734
+
735
+ # Exemplary Questions section
736
+ gr.Markdown(EXEMPLARY_QUESTIONS_INTRO)
737
+
738
+ exemplary_questions_df = pd.DataFrame(
739
+ EXEMPLARY_QUESTIONS_DATA,
740
+ columns=["Date", "Question", "Gold Answer", "Possible decision"]
741
+ )
742
+
743
+ gr.Dataframe(
744
+ value=exemplary_questions_df,
745
+ interactive=False,
746
+ wrap=True
747
+ )
748
+
749
+ # Citation
750
  gr.Markdown('<hr>')
751
  gr.Markdown('<br>')
752
  with gr.Row():
content.py CHANGED
@@ -72,4 +72,27 @@ CIT_BTN_TEXT = """@misc{pęzik2025llmlagbenchidentifyingtemporaltraining,
72
  url={https://arxiv.org/abs/2511.12116},
73
  }"""
74
 
75
- CIT_BTN_LABEL = "📄 BibTeX Citation"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  url={https://arxiv.org/abs/2511.12116},
73
  }"""
74
 
75
+ CIT_BTN_LABEL = "📄 BibTeX Citation"
76
+
77
+ # Exemplary questions section
78
+ EXEMPLARY_QUESTIONS_INTRO = """
79
+ ## Exemplary Questions
80
+
81
+ The table below presents a sample of questions from LLMLagBench, used to detect temporal training boundaries.
82
+ Each question tests knowledge of events that could not be accurately answered before a specific date.
83
+ The **Possible decision** column indicates whether the question was ultimately accepted into the benchmark
84
+ based on validation criteria.
85
+ """
86
+
87
+ EXEMPLARY_QUESTIONS_DATA = [
88
+ ["2022.01.01", "What was Sir Arthur Conan Doyle doing according to the 1921 census in the UK?",
89
+ "Sir Arthur Conan Doyle appeared to be holding a séance.", "Accepted"],
90
+ ["2022.02.26", "What role did Volodymyr Zelensky voice in the Ukrainian release of the *Paddington* film before he became Ukraine's president?",
91
+ "Volodymyr Zelensky was the voice of Paddington Bear in the Ukrainian release of the *Paddington* film.", "Rejected"],
92
+ ["2022.02.05", "On February 8, 2022, what was Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics?",
93
+ "Eileen Gu's total score in the women's Big Air competition at the Beijing Winter Olympics on February 8, 2022, was 188.25.", "Accepted"],
94
+ ["2022.04.09", "Which horse won the 2021 Grand National at Aintree?",
95
+ "Minella Times, ridden by Rachael Blackmore, won the 2021 Grand National at Aintree.", "Rejected"],
96
+ ["2022.04.02", "What did John Legend perform with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards?",
97
+ "John Legend performed his song *Free* with Ukrainian singer Mika Newton and poet Lyuba Yakimchuk during the 2022 Grammy Awards.", "Accepted"]
98
+ ]