|
|
""" |
|
|
DABstep Leaderboard - Genesis Edition |
|
|
A self-contained leaderboard for the DABstep benchmark. |
|
|
|
|
|
Original source: https://huggingface.co/spaces/adyen/DABstep |
|
|
""" |
|
|
import os |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
from dabstep_benchmark.content import ( |
|
|
CITATION_BUTTON_LABEL, |
|
|
CITATION_BUTTON_TEXT, |
|
|
INTRODUCTION_TEXT, |
|
|
SUBMISSION_TEXT, |
|
|
TEMPORARY_NOTICE, |
|
|
TITLE, |
|
|
VALIDATION_GUIDELINES, |
|
|
) |
|
|
from dabstep_benchmark.leaderboard import ( |
|
|
generate_leaderboard_df, |
|
|
process_submission, |
|
|
refresh, |
|
|
sync_from_hf_dataset, |
|
|
) |
|
|
|
|
|
|
|
|
def download_leaderboard(lb_type: str) -> str: |
|
|
"""Download the leaderboard as CSV.""" |
|
|
validated_lb, unvalidated_lb = generate_leaderboard_df() |
|
|
|
|
|
if lb_type == "validated": |
|
|
df_to_download = validated_lb |
|
|
else: |
|
|
df_to_download = unvalidated_lb |
|
|
|
|
|
os.makedirs("data", exist_ok=True) |
|
|
path = f"data/{lb_type}_leaderboard.csv" |
|
|
|
|
|
if os.path.exists(path): |
|
|
os.remove(path) |
|
|
|
|
|
df_to_download.to_csv(path, index=False) |
|
|
return path |
|
|
|
|
|
|
|
|
|
|
|
CUSTOM_CSS = """ |
|
|
.markdown-text { |
|
|
font-size: 16px; |
|
|
} |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
} |
|
|
#citation-button { |
|
|
font-family: monospace; |
|
|
font-size: 12px; |
|
|
} |
|
|
.notice-box { |
|
|
background: transparent !important; |
|
|
border: none !important; |
|
|
padding: 0 !important; |
|
|
margin: 8px 0 16px 0 !important; |
|
|
} |
|
|
.notice-box p { |
|
|
margin: 0 !important; |
|
|
padding: 12px 16px !important; |
|
|
background: #2a2a2a !important; |
|
|
border: 1px solid #444 !important; |
|
|
border-radius: 6px !important; |
|
|
color: #ccc !important; |
|
|
font-size: 14px !important; |
|
|
line-height: 1.5 !important; |
|
|
} |
|
|
.notice-box a { |
|
|
color: #6cb6ff !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
os.makedirs("data/task_scores", exist_ok=True) |
|
|
os.makedirs("data/submissions", exist_ok=True) |
|
|
|
|
|
|
|
|
print("===== Application Startup at", __import__("datetime").datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====") |
|
|
sync_from_hf_dataset() |
|
|
|
|
|
|
|
|
validated_lb, unvalidated_lb = generate_leaderboard_df() |
|
|
|
|
|
|
|
|
demo = gr.Blocks(title="DABstep Leaderboard - Temporary Mirror") |
|
|
|
|
|
with demo: |
|
|
gr.HTML(f"<style>{CUSTOM_CSS}</style>") |
|
|
gr.Markdown(TITLE) |
|
|
gr.Markdown(TEMPORARY_NOTICE, elem_classes="notice-box") |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Tab("📊 Leaderboard"): |
|
|
with gr.Tab("Validated"): |
|
|
verified_table = gr.Dataframe( |
|
|
value=validated_lb, |
|
|
datatype=["markdown", "str", "str", "str", "markdown", "str", "str"], |
|
|
interactive=False, |
|
|
column_widths=["20%"], |
|
|
wrap=True, |
|
|
) |
|
|
verified_download = gr.DownloadButton( |
|
|
label="📥 Download Leaderboard CSV", |
|
|
elem_id="download-verified-lb", |
|
|
) |
|
|
|
|
|
with gr.Tab("Unvalidated"): |
|
|
unverified_table = gr.Dataframe( |
|
|
value=unvalidated_lb, |
|
|
datatype=["markdown", "str", "str", "str", "markdown", "str", "str"], |
|
|
interactive=False, |
|
|
column_widths=["20%"], |
|
|
wrap=True, |
|
|
) |
|
|
unverified_download = gr.DownloadButton( |
|
|
label="📥 Download Full Leaderboard CSV", |
|
|
elem_id="download-unverified-lb", |
|
|
) |
|
|
|
|
|
|
|
|
refresh_button = gr.Button("🔄 Refresh Leaderboard", variant="secondary") |
|
|
|
|
|
def do_refresh(): |
|
|
"""Clear cache and reload leaderboard data.""" |
|
|
return refresh(only_leaderboard=True) |
|
|
|
|
|
refresh_button.click( |
|
|
fn=do_refresh, |
|
|
inputs=None, |
|
|
outputs=[verified_table, unverified_table], |
|
|
) |
|
|
|
|
|
|
|
|
verified_download.click( |
|
|
download_leaderboard, |
|
|
inputs=[gr.Textbox(value="validated", visible=False)], |
|
|
outputs=[verified_download] |
|
|
) |
|
|
unverified_download.click( |
|
|
download_leaderboard, |
|
|
inputs=[gr.Textbox(value="unvalidated", visible=False)], |
|
|
outputs=[unverified_download] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown(VALIDATION_GUIDELINES, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Tab("📤 Submit"): |
|
|
gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
gr.Markdown("**Login with HuggingFace to submit:**") |
|
|
login_btn = gr.LoginButton(size="sm") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
split = gr.Radio(["all"], value="all", label="Split", visible=False) |
|
|
agent_name_textbox = gr.Textbox( |
|
|
label="Agent Name", |
|
|
placeholder="e.g., MyDataAgent-v1" |
|
|
) |
|
|
model_family_textbox = gr.Textbox( |
|
|
label="Model Family", |
|
|
placeholder="e.g., GPT-4, Claude, Llama" |
|
|
) |
|
|
repo_url_textbox = gr.Textbox( |
|
|
label="Repository URL (optional)", |
|
|
placeholder="https://github.com/..." |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
organisation = gr.Textbox( |
|
|
label="Organization", |
|
|
placeholder="e.g., Genesis Computing" |
|
|
) |
|
|
mail = gr.Textbox( |
|
|
label="Contact Email", |
|
|
placeholder="your@email.com" |
|
|
) |
|
|
file_output = gr.File( |
|
|
label="Upload Submission (.jsonl)", |
|
|
file_types=[".jsonl", ".json"] |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
submit_button = gr.Button("🚀 Submit Answers", variant="primary") |
|
|
|
|
|
submission_result = gr.Markdown() |
|
|
|
|
|
def submit_with_user(split, agent_name, model_family, repo_url, file, org, mail, profile: gr.OAuthProfile | None): |
|
|
"""Wrap submission to include HF username.""" |
|
|
if profile is None: |
|
|
return "⚠️ Please login with HuggingFace first to submit." |
|
|
|
|
|
|
|
|
org_with_user = f"{org} - user {profile.username}" |
|
|
return process_submission(split, agent_name, model_family, repo_url, file, org_with_user, mail) |
|
|
|
|
|
submit_button.click( |
|
|
submit_with_user, |
|
|
inputs=[ |
|
|
split, |
|
|
agent_name_textbox, |
|
|
model_family_textbox, |
|
|
repo_url_textbox, |
|
|
file_output, |
|
|
organisation, |
|
|
mail |
|
|
], |
|
|
outputs=submission_result, |
|
|
) |
|
|
|
|
|
with gr.Tab("📚 Citation"): |
|
|
with gr.Accordion("📙 How to Cite", open=True): |
|
|
citation_button = gr.Textbox( |
|
|
value=CITATION_BUTTON_TEXT, |
|
|
label=CITATION_BUTTON_LABEL, |
|
|
lines=len(CITATION_BUTTON_TEXT.split("\n")), |
|
|
elem_id="citation-button", |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
gr.Markdown("---") |
|
|
with gr.Row(): |
|
|
gr.LoginButton(size="sm") |
|
|
|
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False, |
|
|
debug=True |
|
|
) |
|
|
|
|
|
|