roc-hci's picture
Update app.py
4a3c092 verified
import pandas as pd
import gradio as gr
from about import TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_TEXT, DESCRIPTION_TEXT, LEADERBOARD_DETAIL_TEXT
from theme import build_theme, CUSTOM_CSS
from utils import load_results, submit_prediction, _format_inline, markdown_to_html, _format_accuracy
GIT_CLONE_COMMAND = "git clone https://github.com/Masum06/Turing-Bench.git"
import base64
from pathlib import Path
img_path = Path(__file__).parent / "images" / "locke-logo.png"
b64 = base64.b64encode(img_path.read_bytes()).decode()
def build_leaderboard_summary(df: pd.DataFrame) -> str:
if df.empty:
return """
<div class="stats-grid">
<div class="stat-card">
<div class="stat-label">Total submissions</div>
<div class="stat-value">0</div>
<div class="stat-meta">No scored results yet</div>
</div>
<div class="stat-card">
<div class="stat-label">Top accuracy</div>
<div class="stat-value">N/A</div>
<div class="stat-meta">Waiting for evaluations</div>
</div>
<div class="stat-card">
<div class="stat-label">Latest activity</div>
<div class="stat-value">N/A</div>
<div class="stat-meta">Refresh once submissions land</div>
</div>
</div>
"""
best_model = str(df.iloc[0]["Model"]) if "Model" in df.columns else "Unknown"
best_accuracy = _format_accuracy(df.iloc[0]["Accuracy"]) if "Accuracy" in df.columns else "N/A"
latest_time = "N/A"
if "Time" in df.columns:
latest_series = pd.to_datetime(df["Time"], errors="coerce").dropna()
if not latest_series.empty:
latest_time = latest_series.max().strftime("%b %d, %Y %H:%M")
return f"""
<div class="stats-grid">
<div class="stat-card">
<div class="stat-label">Total submissions</div>
<div class="stat-value">{len(df)}</div>
<div class="stat-meta">Ranked on the live board</div>
</div>
<div class="stat-card">
<div class="stat-label">Top accuracy</div>
<div class="stat-value">{best_accuracy}</div>
<div class="stat-meta">Current leader: {best_model}</div>
</div>
<div class="stat-card">
<div class="stat-label">Latest activity</div>
<div class="stat-value">{latest_time}</div>
<div class="stat-meta">Higher accuracy ranks first</div>
</div>
</div>
"""
def refresh_leaderboard_view():
df = load_results()
return df, build_leaderboard_summary(df)
def submit_prediction_html(model_name, predictions_file, profile: gr.OAuthProfile | None):
message = submit_prediction(model_name, predictions_file, profile)
return markdown_to_html(message, "html-block status-message")
def update_submit_ui(profile: gr.OAuthProfile | None):
if profile is None:
return (
gr.update(visible=False),
gr.update(
value=markdown_to_html("Please log in with Hugging Face to submit.", "html-block status-message"),
visible=True,
),
)
username = profile.name
return (
gr.update(visible=True),
gr.update(
value=markdown_to_html(f"Logged in as **{username}**", "html-block status-message"),
visible=True,
),
)
with gr.Blocks(theme=build_theme(), css=CUSTOM_CSS, fill_width=True) as demo:
with gr.Column(elem_classes=["app-shell"]):
with gr.Group(elem_classes=["hero-panel"]):
gr.HTML(
"""
<div class="hero-badges">
<span class="hero-badge">Live Benchmark</span>
<span class="hero-badge">Human vs AI Dialogue Detection</span>
<span class="hero-badge">Ranked by Accuracy</span>
</div>
"""
)
gr.HTML(markdown_to_html(TITLE, "html-block hero-title"))
gr.HTML(markdown_to_html(INTRODUCTION_TEXT, "html-block hero-copy"))
with gr.Tabs(elem_classes=["main-tabs"]):
with gr.Tab("Leaderboard"):
with gr.Group(elem_classes=["panel-card", "leaderboard-card"]):
with gr.Row(elem_classes="centered-row"):
with gr.Column(scale=6, elem_classes="centered-column"):
gr.HTML(
f"""
<div class="table-toolbar">
<div>
<div class="table-title">Leaderboard table</div>
<div class="table-subtitle">
{LEADERBOARD_DETAIL_TEXT}
</div>
</div>
</div>
"""
)
with gr.Column(scale=2, min_width=180, elem_classes="centered-column"):
refresh_btn = gr.Button(
"Refresh standings",
variant="secondary",
elem_classes=["refresh-button"],
)
leaderboard_summary = gr.HTML()
leaderboard_df = gr.Dataframe(
value=load_results(),
elem_classes=["leaderboard-table"],
interactive=False,
)
refresh_btn.click(
fn=refresh_leaderboard_view,
outputs=[leaderboard_df, leaderboard_summary],
)
with gr.Tab("About"):
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(markdown_to_html(DESCRIPTION_TEXT, "html-block rich-copy"))
with gr.Group(elem_classes=["panel-card", "info-grid-card"]):
gr.HTML(
"""
<div class="info-grid">
<div class="info-item">
<div class="info-label">Task format</div>
<div class="info-value">Binary A/B decision</div>
</div>
<div class="info-item">
<div class="info-label">Input</div>
<div class="info-value">Paired dialogue transcripts</div>
</div>
<div class="info-item">
<div class="info-label">Outcome</div>
<div class="info-value">Human-human conversation identification</div>
</div>
</div>
"""
)
with gr.Group(elem_classes=["panel-card"]):
gr.HTML("<div class=\"section-heading\">Citation</div>")
gr.HTML(markdown_to_html(CITATION_BUTTON_TEXT, "html-block rich-copy"))
with gr.Tab("Submit"):
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(
"""
<div class="section-kicker">Submission workflow</div>
<p>To submit your predictions, download the git repository and follow the instructions to run evaluation locally. If you want the dataset directly (no additional infrastructure), click the download link at the bottom of this page.</p>
<div class="steps-row">
<div class="step-chip">1. Log in</div>
<div class="step-chip">2. Clone git repository and run evaluation locally</div>
<div class="step-chip">3. Upload CSV</div>
</div>
"""
)
with gr.Row(equal_height=True):
with gr.Column(scale=5):
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(
"""
<div class="html-block">
<h2>Account access</h2>
<p>Authenticate with Hugging Face to unlock the submission form.</p>
</div>
"""
)
login_btn = gr.LoginButton("Log in with Hugging Face")
login_status = gr.HTML(
value=markdown_to_html(
"Please log in with Hugging Face to submit.",
"html-block status-message",
),
visible=True,
)
with gr.Column(scale=4):
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(
"""
<div class="html-block">
<h2>Clone repository</h2>
<p>Use this command to get the evaluation code/infrastructure locally before generating your predictions.</p>
</div>
"""
)
git_command = gr.Textbox(
label="Git command",
value=GIT_CLONE_COMMAND,
interactive=False,
lines=1,
elem_classes=["command-box"],
)
with gr.Column(visible=False, elem_classes=["submission-panel"]) as submission_form:
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(
"""
<div class="html-block">
<h2>Upload predictions</h2>
<p>Submit a CSV containing the required <code>who_is_human</code> column.</p>
</div>
"""
)
model_name = gr.Textbox(
label="Model name",
placeholder="e.g. GPT-4.5, Claude Opus, or your custom system",
)
predictions_file = gr.File(label="Predictions file (.csv)")
submit_btn = gr.Button(
"Submit predictions",
variant="primary",
elem_classes=["submit-button"],
)
submission_status = gr.HTML()
with gr.Column(scale=3):
with gr.Group(elem_classes=["panel-card"]):
gr.HTML(
"""
<div class="html-block">
<h2>Raw dataset</h2>
<p>If you want to download the raw dataset (no evaluation infrastructure) click the button below.</p>
</div>
"""
)
gr.DownloadButton(
label="Download raw dataset",
value="https://huggingface.co/datasets/roc-hci/Turing-Bench-Questions/resolve/main/turing_bench_public_shuffled.csv",
)
demo.load(
fn=refresh_leaderboard_view,
outputs=[leaderboard_df, leaderboard_summary],
)
demo.load(
fn=update_submit_ui,
inputs=None,
outputs=[submission_form, login_status],
)
login_btn.click(
fn=update_submit_ui,
inputs=None,
outputs=[submission_form, login_status],
)
submit_btn.click(
fn=submit_prediction_html,
inputs=[model_name, predictions_file],
outputs=submission_status,
)
gr.HTML(
f"""
<div class="html-block">
<p class="p-small">Thanks Locke (https://lockeidentity.com/) for sponsoring part of this research</p>
<a href="https://lockeidentity.com/" target="_blank" rel="noopener noreferrer"><img class="logo-small" src="data:image/png;base64,{b64}"/></a>
</div>
"""
)
demo.launch()