debjitpaul commited on
Commit Β·
75b94d3
1
Parent(s): 95b8b77
updated submit form
Browse files
app.py
CHANGED
|
@@ -426,18 +426,25 @@ def submit_predictions(
|
|
| 426 |
code_url: str,
|
| 427 |
split: str,
|
| 428 |
) -> str:
|
|
|
|
|
|
|
|
|
|
| 429 |
if file_obj is None:
|
| 430 |
return "β **Missing file.** Please attach a predictions JSON."
|
| 431 |
-
if not agent_name.strip():
|
| 432 |
-
return "β **Missing agent name.**"
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
|
| 442 |
try:
|
| 443 |
src_path = Path(file_obj.name if hasattr(file_obj, "name") else file_obj)
|
|
@@ -501,16 +508,27 @@ def submit_predictions(
|
|
| 501 |
else "π¬ No notification channels configured on this Space β "
|
| 502 |
"if you don't hear back in 10 days, please email the paper authors.\n\n"
|
| 503 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
return (
|
| 506 |
(warning_prefix + "\n\n" if warning_prefix else "")
|
|
|
|
| 507 |
+ f"β
**Submission received** as `{fname}` for the **{split}** split "
|
| 508 |
f"(**{len(predictions)}** entries).\n\n"
|
| 509 |
+ storage_line
|
| 510 |
+ notify_line
|
| 511 |
-
+ f"A maintainer will score it against the
|
| 512 |
-
f"leaderboard within ~1 week.
|
| 513 |
-
f"reproducibility via your `code_url`.\n\n"
|
| 514 |
f"**For a permanent public record,** please also open a PR to the "
|
| 515 |
f"[benchmark repo]({REPO_URL}) with your predictions file under `submissions/`."
|
| 516 |
)
|
|
@@ -576,11 +594,13 @@ def build_app() -> gr.Blocks:
|
|
| 576 |
with gr.Tab("π€ Submit"):
|
| 577 |
gr.Markdown("## Submit your agent's predictions")
|
| 578 |
gr.Markdown(
|
| 579 |
-
"Upload a JSON file containing **your agent's output** on
|
| 580 |
-
"
|
| 581 |
-
"
|
| 582 |
-
"
|
| 583 |
-
"
|
|
|
|
|
|
|
| 584 |
)
|
| 585 |
|
| 586 |
gr.Markdown(
|
|
@@ -608,12 +628,21 @@ def build_app() -> gr.Blocks:
|
|
| 608 |
|
| 609 |
with gr.Row():
|
| 610 |
with gr.Column():
|
| 611 |
-
agent_name_in = gr.Textbox(
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
scaffold_in = gr.Dropdown(
|
| 614 |
choices=["none", "ReAct", "CodeAct", "Plan-and-Execute", "Reflexion", "MCTS", "Custom"],
|
| 615 |
label="Scaffold",
|
| 616 |
-
value="
|
|
|
|
| 617 |
)
|
| 618 |
split_in = gr.Dropdown(
|
| 619 |
choices=["dev", "test"],
|
|
@@ -621,15 +650,24 @@ def build_app() -> gr.Blocks:
|
|
| 621 |
value="test",
|
| 622 |
)
|
| 623 |
with gr.Column():
|
| 624 |
-
organization_in = gr.Textbox(
|
| 625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
code_url_in = gr.Textbox(
|
| 627 |
-
label="Code URL
|
| 628 |
placeholder="https://github.com/you/your-agent",
|
|
|
|
| 629 |
)
|
| 630 |
|
| 631 |
predictions_in = gr.File(
|
| 632 |
-
label="Predictions JSON (the output file produced by your agent
|
| 633 |
file_types=[".json"],
|
| 634 |
)
|
| 635 |
submit_btn = gr.Button("Submit for review", variant="primary")
|
|
|
|
| 426 |
code_url: str,
|
| 427 |
split: str,
|
| 428 |
) -> str:
|
| 429 |
+
# Only the file and agent name are required. Everything else is optional
|
| 430 |
+
# and gets filled with sensible defaults / "Anonymous" so a curious user
|
| 431 |
+
# can try a submission without committing to a full metadata profile.
|
| 432 |
if file_obj is None:
|
| 433 |
return "β **Missing file.** Please attach a predictions JSON."
|
| 434 |
+
if not agent_name or not agent_name.strip():
|
| 435 |
+
return "β **Missing agent name.** Please give your submission a short display name."
|
| 436 |
+
|
| 437 |
+
# Soft-validate optional fields β warn but don't block.
|
| 438 |
+
soft_warnings: list[str] = []
|
| 439 |
+
if contact_email.strip() and "@" not in contact_email:
|
| 440 |
+
soft_warnings.append("contact email looks malformed")
|
| 441 |
+
if code_url.strip() and not code_url.startswith(("http://", "https://")):
|
| 442 |
+
soft_warnings.append("code URL must start with http:// or https://")
|
| 443 |
+
|
| 444 |
+
# Normalise optionals with placeholders so downstream never sees empty strings.
|
| 445 |
+
base_model = base_model.strip() or "unspecified"
|
| 446 |
+
organization = organization.strip() or "Anonymous"
|
| 447 |
+
scaffold = scaffold or "none"
|
| 448 |
|
| 449 |
try:
|
| 450 |
src_path = Path(file_obj.name if hasattr(file_obj, "name") else file_obj)
|
|
|
|
| 508 |
else "π¬ No notification channels configured on this Space β "
|
| 509 |
"if you don't hear back in 10 days, please email the paper authors.\n\n"
|
| 510 |
)
|
| 511 |
+
soft_line = ""
|
| 512 |
+
if soft_warnings:
|
| 513 |
+
soft_line = "β οΈ **Note:** " + "; ".join(soft_warnings) + ".\n\n"
|
| 514 |
+
|
| 515 |
+
email_line = (
|
| 516 |
+
f"We may email `{bundle['metadata']['contact_email']}` if we need to verify "
|
| 517 |
+
f"reproducibility via your `code_url`."
|
| 518 |
+
if bundle["metadata"]["contact_email"]
|
| 519 |
+
else "**Tip:** add a contact email next time so we can follow up about "
|
| 520 |
+
"reproducibility or questions."
|
| 521 |
+
)
|
| 522 |
|
| 523 |
return (
|
| 524 |
(warning_prefix + "\n\n" if warning_prefix else "")
|
| 525 |
+
+ soft_line
|
| 526 |
+ f"β
**Submission received** as `{fname}` for the **{split}** split "
|
| 527 |
f"(**{len(predictions)}** entries).\n\n"
|
| 528 |
+ storage_line
|
| 529 |
+ notify_line
|
| 530 |
+
+ f"A maintainer will score it against the {split}-set gold answers and merge it to the "
|
| 531 |
+
f"leaderboard within ~1 week. " + email_line + "\n\n"
|
|
|
|
| 532 |
f"**For a permanent public record,** please also open a PR to the "
|
| 533 |
f"[benchmark repo]({REPO_URL}) with your predictions file under `submissions/`."
|
| 534 |
)
|
|
|
|
| 594 |
with gr.Tab("π€ Submit"):
|
| 595 |
gr.Markdown("## Submit your agent's predictions")
|
| 596 |
gr.Markdown(
|
| 597 |
+
"Upload a JSON file containing **your agent's output** on either the "
|
| 598 |
+
"**dev** (40 public tasks) or **test** (80 held-out tasks) split of DEEPSYNTH. "
|
| 599 |
+
"For dev submissions, the evaluator scores against the publicly released gold "
|
| 600 |
+
"answers. For test submissions, we score against our private gold answers and "
|
| 601 |
+
"add your row to the held-out leaderboard. The uploaded file must be the "
|
| 602 |
+
"*predictions file* produced by running your agent on the split's questions β "
|
| 603 |
+
"not your agent's source code, and not a raw transcript."
|
| 604 |
)
|
| 605 |
|
| 606 |
gr.Markdown(
|
|
|
|
| 628 |
|
| 629 |
with gr.Row():
|
| 630 |
with gr.Column():
|
| 631 |
+
agent_name_in = gr.Textbox(
|
| 632 |
+
label="Agent name (required)",
|
| 633 |
+
placeholder="e.g. ReAct-GPT5",
|
| 634 |
+
info="Short display name shown on the leaderboard.",
|
| 635 |
+
)
|
| 636 |
+
base_model_in = gr.Textbox(
|
| 637 |
+
label="Base model",
|
| 638 |
+
placeholder="e.g. gpt-5.2-pro (2026-02)",
|
| 639 |
+
info="Optional. Defaults to 'unspecified'.",
|
| 640 |
+
)
|
| 641 |
scaffold_in = gr.Dropdown(
|
| 642 |
choices=["none", "ReAct", "CodeAct", "Plan-and-Execute", "Reflexion", "MCTS", "Custom"],
|
| 643 |
label="Scaffold",
|
| 644 |
+
value="none",
|
| 645 |
+
info="Optional. Select the agent scaffold you used.",
|
| 646 |
)
|
| 647 |
split_in = gr.Dropdown(
|
| 648 |
choices=["dev", "test"],
|
|
|
|
| 650 |
value="test",
|
| 651 |
)
|
| 652 |
with gr.Column():
|
| 653 |
+
organization_in = gr.Textbox(
|
| 654 |
+
label="Organization",
|
| 655 |
+
placeholder="e.g. MSR India",
|
| 656 |
+
info="Optional. Defaults to 'Anonymous'.",
|
| 657 |
+
)
|
| 658 |
+
contact_email_in = gr.Textbox(
|
| 659 |
+
label="Contact email",
|
| 660 |
+
placeholder="you@org.edu",
|
| 661 |
+
info="Optional but recommended β we may email you about reproducibility.",
|
| 662 |
+
)
|
| 663 |
code_url_in = gr.Textbox(
|
| 664 |
+
label="Code URL",
|
| 665 |
placeholder="https://github.com/you/your-agent",
|
| 666 |
+
info="Optional. A public URL helps us accept your submission faster.",
|
| 667 |
)
|
| 668 |
|
| 669 |
predictions_in = gr.File(
|
| 670 |
+
label="Predictions JSON (required) β the output file produced by your agent",
|
| 671 |
file_types=[".json"],
|
| 672 |
)
|
| 673 |
submit_btn = gr.Button("Submit for review", variant="primary")
|