DABstep-temp / app.py
justinlangsethgenesis's picture
Fix: use dash instead of pipe in org+user format
5cc9741
"""
DABstep Leaderboard - Genesis Edition
A self-contained leaderboard for the DABstep benchmark.
Original source: https://huggingface.co/spaces/adyen/DABstep
"""
import os
import gradio as gr
from dabstep_benchmark.content import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
INTRODUCTION_TEXT,
SUBMISSION_TEXT,
TEMPORARY_NOTICE,
TITLE,
VALIDATION_GUIDELINES,
)
from dabstep_benchmark.leaderboard import (
generate_leaderboard_df,
process_submission,
refresh,
sync_from_hf_dataset,
)
def download_leaderboard(lb_type: str) -> str:
"""Download the leaderboard as CSV."""
validated_lb, unvalidated_lb = generate_leaderboard_df()
if lb_type == "validated":
df_to_download = validated_lb
else:
df_to_download = unvalidated_lb
os.makedirs("data", exist_ok=True)
path = f"data/{lb_type}_leaderboard.csv"
if os.path.exists(path):
os.remove(path)
df_to_download.to_csv(path, index=False)
return path
# Custom CSS for better styling
CUSTOM_CSS = """
.markdown-text {
font-size: 16px;
}
.gradio-container {
max-width: 1200px !important;
}
#citation-button {
font-family: monospace;
font-size: 12px;
}
.notice-box {
background: transparent !important;
border: none !important;
padding: 0 !important;
margin: 8px 0 16px 0 !important;
}
.notice-box p {
margin: 0 !important;
padding: 12px 16px !important;
background: #2a2a2a !important;
border: 1px solid #444 !important;
border-radius: 6px !important;
color: #ccc !important;
font-size: 14px !important;
line-height: 1.5 !important;
}
.notice-box a {
color: #6cb6ff !important;
}
"""
if __name__ == "__main__":
# Ensure data directories exist
os.makedirs("data/task_scores", exist_ok=True)
os.makedirs("data/submissions", exist_ok=True)
# Sync latest data from HF Dataset repo on startup
print("===== Application Startup at", __import__("datetime").datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
sync_from_hf_dataset()
# Load data once at startup (cached for subsequent calls)
validated_lb, unvalidated_lb = generate_leaderboard_df()
# Build the Gradio app
demo = gr.Blocks(title="DABstep Leaderboard - Temporary Mirror")
with demo:
gr.HTML(f"<style>{CUSTOM_CSS}</style>")
gr.Markdown(TITLE)
gr.Markdown(TEMPORARY_NOTICE, elem_classes="notice-box")
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tab("📊 Leaderboard"):
with gr.Tab("Validated"):
verified_table = gr.Dataframe(
value=validated_lb,
datatype=["markdown", "str", "str", "str", "markdown", "str", "str"],
interactive=False,
column_widths=["20%"],
wrap=True,
)
verified_download = gr.DownloadButton(
label="📥 Download Leaderboard CSV",
elem_id="download-verified-lb",
)
with gr.Tab("Unvalidated"):
unverified_table = gr.Dataframe(
value=unvalidated_lb,
datatype=["markdown", "str", "str", "str", "markdown", "str", "str"],
interactive=False,
column_widths=["20%"],
wrap=True,
)
unverified_download = gr.DownloadButton(
label="📥 Download Full Leaderboard CSV",
elem_id="download-unverified-lb",
)
# Refresh button
refresh_button = gr.Button("🔄 Refresh Leaderboard", variant="secondary")
def do_refresh():
"""Clear cache and reload leaderboard data."""
return refresh(only_leaderboard=True)
refresh_button.click(
fn=do_refresh,
inputs=None,
outputs=[verified_table, unverified_table],
)
# Download handlers
verified_download.click(
download_leaderboard,
inputs=[gr.Textbox(value="validated", visible=False)],
outputs=[verified_download]
)
unverified_download.click(
download_leaderboard,
inputs=[gr.Textbox(value="unvalidated", visible=False)],
outputs=[unverified_download]
)
gr.Markdown(VALIDATION_GUIDELINES, elem_classes="markdown-text")
with gr.Tab("📤 Submit"):
gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
# Show login status
with gr.Row():
gr.Markdown("**Login with HuggingFace to submit:**")
login_btn = gr.LoginButton(size="sm")
with gr.Row():
with gr.Column():
split = gr.Radio(["all"], value="all", label="Split", visible=False)
agent_name_textbox = gr.Textbox(
label="Agent Name",
placeholder="e.g., MyDataAgent-v1"
)
model_family_textbox = gr.Textbox(
label="Model Family",
placeholder="e.g., GPT-4, Claude, Llama"
)
repo_url_textbox = gr.Textbox(
label="Repository URL (optional)",
placeholder="https://github.com/..."
)
with gr.Column():
organisation = gr.Textbox(
label="Organization",
placeholder="e.g., Genesis Computing"
)
mail = gr.Textbox(
label="Contact Email",
placeholder="your@email.com"
)
file_output = gr.File(
label="Upload Submission (.jsonl)",
file_types=[".jsonl", ".json"]
)
with gr.Row():
submit_button = gr.Button("🚀 Submit Answers", variant="primary")
submission_result = gr.Markdown()
def submit_with_user(split, agent_name, model_family, repo_url, file, org, mail, profile: gr.OAuthProfile | None):
"""Wrap submission to include HF username."""
if profile is None:
return "⚠️ Please login with HuggingFace first to submit."
# Append HF username to organization (use dash instead of pipe to pass validation)
org_with_user = f"{org} - user {profile.username}"
return process_submission(split, agent_name, model_family, repo_url, file, org_with_user, mail)
submit_button.click(
submit_with_user,
inputs=[
split,
agent_name_textbox,
model_family_textbox,
repo_url_textbox,
file_output,
organisation,
mail
],
outputs=submission_result,
)
with gr.Tab("📚 Citation"):
with gr.Accordion("📙 How to Cite", open=True):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=len(CITATION_BUTTON_TEXT.split("\n")),
elem_id="citation-button",
)
# Login at bottom (LoginButton shows logout when logged in)
with gr.Row():
gr.Markdown("---")
with gr.Row():
gr.LoginButton(size="sm")
# Launch the app with HuggingFace OAuth
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=True
)