Spaces:
Sleeping
Sleeping
Jitin Krishnan commited on
Commit ·
262916e
1
Parent(s): db3375e
test
Browse files- app.py +2 -2
- leaderboard_gaia.csv +4 -1
- leaderboard_swe.csv +4 -3
app.py
CHANGED
|
@@ -30,10 +30,10 @@ with gr.Blocks() as demo:
|
|
| 30 |
gr.Markdown("# Leaderboards")
|
| 31 |
with gr.Row():
|
| 32 |
with gr.Column():
|
| 33 |
-
gr.Markdown("##
|
| 34 |
gr.Dataframe(leaderboard1)
|
| 35 |
with gr.Column():
|
| 36 |
-
gr.Markdown("##
|
| 37 |
gr.Dataframe(leaderboard2)
|
| 38 |
gr.Markdown("# Submit Here")
|
| 39 |
with gr.Row():
|
|
|
|
| 30 |
gr.Markdown("# Leaderboards")
|
| 31 |
with gr.Row():
|
| 32 |
with gr.Column():
|
| 33 |
+
gr.Markdown("## SWE Leaderboard")
|
| 34 |
gr.Dataframe(leaderboard1)
|
| 35 |
with gr.Column():
|
| 36 |
+
gr.Markdown("## GAIA Leaderboard")
|
| 37 |
gr.Dataframe(leaderboard2)
|
| 38 |
gr.Markdown("# Submit Here")
|
| 39 |
with gr.Row():
|
leaderboard_gaia.csv
CHANGED
|
@@ -3,4 +3,7 @@ Rank,Model,Joint F1,Categorical F1,Location Accuracy,Date
|
|
| 3 |
2,Gemini-2.5-Flash-Preview-04-17,0.100,0.337,0.372,2025-05-14
|
| 4 |
3,Open AI o3,0.092,0.296,0.535,2025-05-14
|
| 5 |
4,Anthropic Claude-3.7-Sonnet,0.047,0.254,0.204,2025-05-14
|
| 6 |
-
5,GPT-4.1,0.028,0.218
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
2,Gemini-2.5-Flash-Preview-04-17,0.100,0.337,0.372,2025-05-14
|
| 4 |
3,Open AI o3,0.092,0.296,0.535,2025-05-14
|
| 5 |
4,Anthropic Claude-3.7-Sonnet,0.047,0.254,0.204,2025-05-14
|
| 6 |
+
5,GPT-4.1,0.028,0.218,0.107,2025-05-14
|
| 7 |
+
6,Open AI o1,0.013,0.138,0.040,2025-05-14
|
| 8 |
+
7,Llama-4-Maverick-17B-128E-Instruct,0.122,0.023,0.000,2025-05-14
|
| 9 |
+
8,Llama-4-Scout-17B-16E-Instruct,0.041,0.000,0.000,2025-05-14
|
leaderboard_swe.csv
CHANGED
|
@@ -3,7 +3,8 @@ Rank,Model,Joint F1,Categorical F1,Location Accuracy,Date
|
|
| 3 |
2,Gemini-2.5-Flash-Preview-04-17,0.000,0.213,0.060,2025-05-14
|
| 4 |
3,Llama-4-Maverick-17B-128E-Instruct,0.000,0.191,0.083,2025-05-14
|
| 5 |
4,GPT-4.1,0.000,0.166,0.000,2025-05-14
|
| 6 |
-
5,
|
| 7 |
-
6,Open AI
|
| 8 |
-
7,
|
|
|
|
| 9 |
|
|
|
|
| 3 |
2,Gemini-2.5-Flash-Preview-04-17,0.000,0.213,0.060,2025-05-14
|
| 4 |
3,Llama-4-Maverick-17B-128E-Instruct,0.000,0.191,0.083,2025-05-14
|
| 5 |
4,GPT-4.1,0.000,0.166,0.000,2025-05-14
|
| 6 |
+
5,Llama-4-Scout-17B-16E-Instruct,0.000,0.050,0.000,2025-05-14
|
| 7 |
+
6,Open AI o1,CLE,CLE,CLE,2025-05-14
|
| 8 |
+
7,Open AI o3,CLE,CLE,CLE,2025-05-14
|
| 9 |
+
8,Anthropic Claude-3.7-Sonnet,CLE,CLE,CLE,2025-05-14
|
| 10 |
|