Jitin Krishnan commited on
Commit
262916e
·
1 Parent(s): db3375e
Files changed (3) hide show
  1. app.py +2 -2
  2. leaderboard_gaia.csv +4 -1
  3. leaderboard_swe.csv +4 -3
app.py CHANGED
@@ -30,10 +30,10 @@ with gr.Blocks() as demo:
30
  gr.Markdown("# Leaderboards")
31
  with gr.Row():
32
  with gr.Column():
33
- gr.Markdown("## Leaderboard 1")
34
  gr.Dataframe(leaderboard1)
35
  with gr.Column():
36
- gr.Markdown("## Leaderboard 2")
37
  gr.Dataframe(leaderboard2)
38
  gr.Markdown("# Submit Here")
39
  with gr.Row():
 
30
  gr.Markdown("# Leaderboards")
31
  with gr.Row():
32
  with gr.Column():
33
+ gr.Markdown("## SWE Leaderboard")
34
  gr.Dataframe(leaderboard1)
35
  with gr.Column():
36
+ gr.Markdown("## GAIA Leaderboard")
37
  gr.Dataframe(leaderboard2)
38
  gr.Markdown("# Submit Here")
39
  with gr.Row():
leaderboard_gaia.csv CHANGED
@@ -3,4 +3,7 @@ Rank,Model,Joint F1,Categorical F1,Location Accuracy,Date
3
  2,Gemini-2.5-Flash-Preview-04-17,0.100,0.337,0.372,2025-05-14
4
  3,Open AI o3,0.092,0.296,0.535,2025-05-14
5
  4,Anthropic Claude-3.7-Sonnet,0.047,0.254,0.204,2025-05-14
6
- 5,GPT-4.1,0.028,0.218 ,0.107,2025-05-14
 
 
 
 
3
  2,Gemini-2.5-Flash-Preview-04-17,0.100,0.337,0.372,2025-05-14
4
  3,Open AI o3,0.092,0.296,0.535,2025-05-14
5
  4,Anthropic Claude-3.7-Sonnet,0.047,0.254,0.204,2025-05-14
6
+ 5,GPT-4.1,0.028,0.218,0.107,2025-05-14
7
+ 6,Open AI o1,0.013,0.138,0.040,2025-05-14
8
+ 7,Llama-4-Maverick-17B-128E-Instruct,0.122,0.023,0.000,2025-05-14
9
+ 8,Llama-4-Scout-17B-16E-Instruct,0.041,0.000,0.000,2025-05-14
leaderboard_swe.csv CHANGED
@@ -3,7 +3,8 @@ Rank,Model,Joint F1,Categorical F1,Location Accuracy,Date
3
  2,Gemini-2.5-Flash-Preview-04-17,0.000,0.213,0.060,2025-05-14
4
  3,Llama-4-Maverick-17B-128E-Instruct,0.000,0.191,0.083,2025-05-14
5
  4,GPT-4.1,0.000,0.166,0.000,2025-05-14
6
- 5,Open AI o1,CLE,CLE,CLE,2025-05-14
7
- 6,Open AI o3,CLE,CLE,CLE,2025-05-14
8
- 7,Anthropic Claude-3.7-Sonnet,CLE,CLE,CLE,2025-05-14
 
9
 
 
3
  2,Gemini-2.5-Flash-Preview-04-17,0.000,0.213,0.060,2025-05-14
4
  3,Llama-4-Maverick-17B-128E-Instruct,0.000,0.191,0.083,2025-05-14
5
  4,GPT-4.1,0.000,0.166,0.000,2025-05-14
6
+ 5,Llama-4-Scout-17B-16E-Instruct,0.000,0.050,0.000,2025-05-14
7
+ 6,Open AI o1,CLE,CLE,CLE,2025-05-14
8
+ 7,Open AI o3,CLE,CLE,CLE,2025-05-14
9
+ 8,Anthropic Claude-3.7-Sonnet,CLE,CLE,CLE,2025-05-14
10