Spaces:
Running
Running
cleaner update
Browse files
app.py
CHANGED
|
@@ -1,66 +1,66 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
-
|
| 5 |
-
CITATION_BUTTON_LABEL,
|
| 6 |
-
CITATION_BUTTON_TEXT,
|
| 7 |
-
EVALUATION_QUEUE_TEXT,
|
| 8 |
-
INTRODUCTION_TEXT,
|
| 9 |
-
LLM_BENCHMARKS_TEXT,
|
| 10 |
-
TITLE,
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
# Simplified DataFrame for the leaderboard
|
| 14 |
data = {
|
| 15 |
"Model": [
|
| 16 |
"Handwritten TAG",
|
| 17 |
"Zero-shot Text2SQL",
|
| 18 |
"Zero-shot Text2SQL + LM Generation",
|
| 19 |
"RAG (E5)",
|
| 20 |
-
"RAG (E5) + LM Rerank"
|
| 21 |
-
],
|
| 22 |
-
"Code": [
|
| 23 |
-
"", # Handwritten TAG doesn't have a code link
|
| 24 |
-
"", # Zero-shot Text2SQL doesn't have a code link
|
| 25 |
-
"", # Zero-shot Text2SQL + LM Generation doesn't have a code link
|
| 26 |
-
"", # RAG (E5) doesn't have a code link
|
| 27 |
-
"" # RAG (E5) + LM Rerank doesn't have a code link
|
| 28 |
],
|
| 29 |
-
"Execution Accuracy": [
|
| 30 |
-
"55%", # Handwritten TAG
|
| 31 |
-
"17%", # Zero-shot Text2SQL
|
| 32 |
-
"13%", # Zero-shot Text2SQL + LM Generation
|
| 33 |
-
"0%", # RAG (E5)
|
| 34 |
-
"2%" # RAG (E5) + LM Rerank
|
| 35 |
-
]
|
| 36 |
}
|
| 37 |
|
|
|
|
| 38 |
leaderboard_df = pd.DataFrame(data)
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
# Highlight the top row in green for "Handwritten TAG"
|
| 48 |
-
with gr.Row():
|
| 49 |
-
gr.Dataframe(
|
| 50 |
-
value=leaderboard_df,
|
| 51 |
-
headers=["Model", "Code", "Execution Accuracy"],
|
| 52 |
-
datatype=["str", "str", "str"],
|
| 53 |
-
row_count=(5, "dynamic"),
|
| 54 |
-
wrap=True,
|
| 55 |
-
elem_id="leaderboard",
|
| 56 |
-
type="pandas"
|
| 57 |
-
)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
+
# Simplified leaderboard data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
data = {
|
| 6 |
"Model": [
|
| 7 |
"Handwritten TAG",
|
| 8 |
"Zero-shot Text2SQL",
|
| 9 |
"Zero-shot Text2SQL + LM Generation",
|
| 10 |
"RAG (E5)",
|
| 11 |
+
"RAG (E5) + LM Rerank",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
],
|
| 13 |
+
"Execution Accuracy": ["55%", "17%", "13%", "0%", "2%"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
+
# Create a DataFrame
|
| 17 |
leaderboard_df = pd.DataFrame(data)
|
| 18 |
|
| 19 |
+
# Convert Execution Accuracy to numeric for sorting
|
| 20 |
+
leaderboard_df["Execution Accuracy (numeric)"] = (
|
| 21 |
+
leaderboard_df["Execution Accuracy"].str.rstrip("%").astype(float)
|
| 22 |
+
)
|
| 23 |
+
leaderboard_df = leaderboard_df.sort_values(
|
| 24 |
+
"Execution Accuracy (numeric)", ascending=False
|
| 25 |
+
).reset_index(drop=True)
|
| 26 |
|
| 27 |
+
# Add the Rank column
|
| 28 |
+
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
# Drop the numeric column for display
|
| 31 |
+
leaderboard_df = leaderboard_df.drop(columns=["Execution Accuracy (numeric)"])
|
| 32 |
+
|
| 33 |
+
# Add hyperlinks to the Model column
|
| 34 |
+
def hyperlink_model(model):
|
| 35 |
+
base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
|
| 36 |
+
return f'<a href="{base_url}" target="_blank">{model}</a>'
|
| 37 |
+
|
| 38 |
+
leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)
|
| 39 |
+
|
| 40 |
+
# Gradio app
|
| 41 |
+
with gr.Blocks() as demo:
|
| 42 |
+
# Title and subtitle
|
| 43 |
+
gr.HTML(
|
| 44 |
+
"""
|
| 45 |
+
<div style="text-align: center;">
|
| 46 |
+
<h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">Execution Accuracy Leaderboard</h1>
|
| 47 |
+
<p style="font-size: 1.25rem; color: gray;">Comparing baseline approaches for structured data queries</p>
|
| 48 |
+
</div>
|
| 49 |
+
"""
|
| 50 |
+
)
|
| 51 |
|
| 52 |
+
# Leaderboard table
|
| 53 |
+
gr.HTML(
|
| 54 |
+
leaderboard_df.to_html(
|
| 55 |
+
index=False,
|
| 56 |
+
escape=False,
|
| 57 |
+
classes="leaderboard-table",
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
|
| 61 |
+
# Footer or additional info (optional)
|
| 62 |
+
gr.Markdown(
|
| 63 |
+
"Note: Execution accuracy is based on the percentage of correctly answered queries."
|
| 64 |
+
)
|
| 65 |
|
| 66 |
demo.launch()
|