Jasonkim8652 commited on
Commit
b34cf54
·
verified ·
1 Parent(s): bfce576

redesign: clean header banner + consistent sentence-case About text

Browse files
Files changed (1) hide show
  1. app.py +172 -155
app.py CHANGED
@@ -132,7 +132,7 @@ CUSTOM_CSS = """
132
  .dark .tabs { background: #ffffff !important; }
133
  .dark .tab-nav button { color: #2d3748 !important; }
134
  .dark .tab-nav button.selected {
135
- color: #1a365d !important;
136
  border-color: #3182ce !important;
137
  }
138
  .dark .block { background: #ffffff !important; }
@@ -184,35 +184,46 @@ def _base_layout(**overrides) -> dict:
184
 
185
 
186
  def build_header(last_updated: str, n_entries: int) -> str:
 
 
 
 
 
187
  return f"""
188
- <div style="background:linear-gradient(135deg,#1a365d 0%,#2b6cb0 100%);
189
- color:white;padding:2rem;text-align:center;border-radius:12px;
190
- margin-bottom:0.5rem">
191
- <h1 style="font-size:2rem;margin:0;font-weight:700">
192
- \U0001f9ec BioDesignBench Leaderboard</h1>
193
- <p style="opacity:0.85;margin:0.3rem 0 0;font-size:1rem">
194
- Evaluating LLM Agents on Protein Design via MCP Tools</p>
195
- <div style="margin-top:0.6rem;display:flex;justify-content:center;
196
- gap:0.8rem;flex-wrap:wrap">
 
 
 
 
 
 
197
  <a href="{PAPER_URL}" target="_blank"
198
- style="background:rgba(255,255,255,0.2);color:white;
199
- padding:0.3rem 0.8rem;border-radius:5px;
200
- text-decoration:none;font-size:0.85rem;
201
- font-weight:600">\U0001f4c4 Paper</a>
202
  <a href="{GITHUB_URL}" target="_blank"
203
- style="background:rgba(255,255,255,0.2);color:white;
204
- padding:0.3rem 0.8rem;border-radius:5px;
205
- text-decoration:none;font-size:0.85rem;
206
- font-weight:600">\U0001f4bb GitHub</a>
207
  <a href="{HF_URL}" target="_blank"
208
- style="background:rgba(255,255,255,0.2);color:white;
209
- padding:0.3rem 0.8rem;border-radius:5px;
210
- text-decoration:none;font-size:0.85rem;
211
- font-weight:600">\U0001f917 HuggingFace</a>
 
 
 
 
 
 
 
212
  </div>
213
- <div style="font-size:0.8rem;opacity:0.6;margin-top:0.5rem">
214
- Romero Lab, Duke University &middot; Last updated: {last_updated}
215
- &middot; 76 tasks &middot; {n_entries} conditions</div>
216
  </div>"""
217
 
218
 
@@ -278,9 +289,9 @@ def build_leaderboard_table(
278
  "font-size:0.9rem"
279
  )
280
  TH = (
281
- "background:#1a365d;color:white;padding:0.75rem 1rem;"
282
- "text-align:left;font-size:0.8rem;text-transform:uppercase;"
283
- "letter-spacing:0.5px"
284
  )
285
 
286
  rows = []
@@ -300,7 +311,7 @@ def build_leaderboard_table(
300
  else:
301
  llm_rank += 1
302
  rcolor = {1: "#d69e2e", 2: "#a0aec0", 3: "#c17832"}.get(
303
- llm_rank, "#1a365d"
304
  )
305
  rsize = (
306
  "1.1rem"
@@ -407,8 +418,8 @@ def build_heatmap(entry: dict) -> str:
407
  """HTML heatmap table for one agent across 17 taxonomy cells."""
408
  ts = entry.get("taxonomy_scores", {})
409
  TH = (
410
- "background:#1a365d;color:white;padding:0.6rem 0.8rem;"
411
- "text-align:center;font-size:0.75rem"
412
  )
413
  TD = (
414
  "text-align:center;padding:0.5rem;font-size:0.85rem;"
@@ -520,7 +531,7 @@ def build_mode_cards(entries: list) -> str:
520
  cards.append(
521
  '<div style="background:white;border-radius:10px;padding:1.2rem;'
522
  'box-shadow:0 1px 3px rgba(0,0,0,0.08)">'
523
- f'<h4 style="font-size:0.95rem;color:#1a365d;'
524
  f'margin:0 0 0.8rem">{name}</h4>'
525
  f'{"".join(lines)}</div>'
526
  )
@@ -536,148 +547,152 @@ def build_mode_cards(entries: list) -> str:
536
 
537
 
538
  def build_about() -> str:
539
- return """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  <div style="max-width:900px;margin:0 auto">
541
 
542
- <div style="background:white;border-radius:10px;padding:2rem;
543
- box-shadow:0 1px 3px rgba(0,0,0,0.08);margin-bottom:1.5rem">
544
- <h2 style="color:#1a365d;margin:0 0 0.8rem;font-size:1.3rem">
545
- What is BioDesignBench?</h2>
546
- <p style="margin-bottom:0.8rem;color:#2d3748;line-height:1.6">
547
  BioDesignBench is the first comprehensive benchmark for evaluating
548
  LLM agents on protein design tasks via MCP (Model Context Protocol)
549
  tool use. Unlike existing benchmarks that focus on model-only
550
- evaluation, BioDesignBench tests the full design loop:
551
- <strong>Natural language &rarr; Design &rarr; Evaluate &rarr;
552
- Iterate</strong>.</p>
553
  <div style="display:grid;grid-template-columns:
554
- repeat(auto-fit,minmax(140px,1fr));gap:1rem;margin:1rem 0">
555
- <div style="background:#f7fafc;border-radius:8px;padding:1rem;
556
- text-align:center">
557
- <div style="font-size:1.8rem;font-weight:700;color:#3182ce">
558
  76</div>
559
- <div style="font-size:0.8rem;color:#718096">Design Tasks</div>
560
  </div>
561
- <div style="background:#f7fafc;border-radius:8px;padding:1rem;
562
- text-align:center">
563
- <div style="font-size:1.8rem;font-weight:700;color:#3182ce">
564
  17</div>
565
- <div style="font-size:0.8rem;color:#718096">Taxonomy Cells</div>
566
  </div>
567
- <div style="background:#f7fafc;border-radius:8px;padding:1rem;
568
- text-align:center">
569
- <div style="font-size:1.8rem;font-weight:700;color:#3182ce">
570
  17</div>
571
- <div style="font-size:0.8rem;color:#718096">MCP Tools</div>
572
  </div>
573
- <div style="background:#f7fafc;border-radius:8px;padding:1rem;
574
- text-align:center">
575
- <div style="font-size:1.8rem;font-weight:700;color:#3182ce">
576
  100</div>
577
- <div style="font-size:0.8rem;color:#718096">Point Rubric</div>
578
  </div>
579
  </div>
580
  </div>
581
 
582
- <div style="background:white;border-radius:10px;padding:2rem;
583
- box-shadow:0 1px 3px rgba(0,0,0,0.08);margin-bottom:1.5rem">
584
- <h2 style="color:#1a365d;margin:0 0 0.8rem;font-size:1.3rem">
585
- How to Submit</h2>
586
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
587
- 1. Build Your Agent</h3>
588
- <p style="margin-bottom:0.8rem;color:#2d3748">
589
- Create a protein design agent that accepts tasks via our API spec.
590
- You may use our 17 reference MCP tools as-is, modify them, or build
591
- entirely custom tools.</p>
592
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
593
- 2. Host as API Endpoint</h3>
594
- <p style="margin-bottom:0.8rem;color:#2d3748">
595
- Your agent must be accessible as a POST endpoint that accepts task
596
- descriptions and returns designed sequences.</p>
597
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
598
- API Specification</h3>
599
- <pre style="background:#1a202c;color:#e2e8f0;padding:1rem;
600
- border-radius:8px;font-size:0.8rem;overflow-x:auto;
601
- line-height:1.5">POST /evaluate
602
-
603
- Input:
604
- {
605
- "task_id": "dnb_sig_001",
606
  "task_description": "Design a de novo binder for...",
607
  "available_tools": [...],
608
  "max_steps": 50,
609
  "timeout_sec": 300
610
- }
611
 
612
- Output:
613
- {
614
  "sequences": ["MKKL..."],
615
  "run_log": [...],
616
  "total_steps": 12,
617
  "total_time_sec": 142.5
618
- }</pre>
619
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
620
- 3. Submit &amp; Evaluate</h3>
621
- <p style="margin-bottom:0.8rem;color:#2d3748">
622
- We run 73 hidden tasks against your endpoint. Results are
623
- independently verified with AlphaFold2.
624
- Maximum <strong>2 submissions per month</strong>.</p>
625
- <p style="color:#2d3748">
626
  3 example tasks are publicly available for development and
627
  testing.</p>
628
 
629
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
630
- MCP Reference Tools</h3>
631
- <p style="margin-bottom:0.8rem;color:#2d3748">
632
- We provide 17 reference MCP tools for protein design. You may use
633
- them as-is, modify them, or build entirely custom tools.
634
- <a href="#" style="color:#3182ce">GitHub repository &rarr;</a></p>
635
-
636
- <h3 style="color:#2b6cb0;margin:1.2rem 0 0.5rem;font-size:1.05rem">
637
- Submission Limits</h3>
638
- <ul style="color:#2d3748;padding-left:1.5rem;margin-bottom:0.8rem">
639
- <li>Maximum 2 submissions per month</li>
640
- <li>Hidden test set (73 tasks) is used for ranking</li>
641
- <li>3 example tasks are publicly available for development</li>
642
  </ul>
643
  </div>
644
 
645
- <div style="background:white;border-radius:10px;padding:2rem;
646
- box-shadow:0 1px 3px rgba(0,0,0,0.08);margin-bottom:1.5rem">
647
- <h2 style="color:#1a365d;margin:0 0 0.8rem;font-size:1.3rem">
648
- Scoring Rubric (100 points)</h2>
649
- <p style="margin-bottom:0.5rem;color:#2d3748">
650
- <strong>Approach (20 pts)</strong> &mdash; Function-based design
651
- methodology evaluation across 10 DesignFunctions</p>
652
- <p style="margin-bottom:0.5rem;color:#2d3748">
653
- <strong>Orchestration (15 pts)</strong> &mdash; Pipeline ordering
654
- and intermediate validation</p>
655
- <p style="margin-bottom:0.5rem;color:#2d3748">
656
- <strong>Quality (35 pts)</strong> &mdash; 3-tier graduated scoring:
657
- structure confidence, interface confidence, interface physics</p>
658
- <p style="margin-bottom:0.5rem;color:#2d3748">
659
- <strong>Feasibility (15 pts)</strong> &mdash; Valid amino acids,
660
- length, composition, biophysical checks</p>
661
- <p style="margin-bottom:0.5rem;color:#2d3748">
662
- <strong>Novelty (5 pts)</strong> &mdash; Sequence identity to
663
- reference (lower = more novel = better)</p>
664
- <p style="margin-bottom:0.5rem;color:#2d3748">
665
- <strong>Diversity (10 pts)</strong> &mdash; Number and diversity
666
- of generated designs</p>
667
  </div>
668
 
669
- <div style="background:white;border-radius:10px;padding:2rem;
670
- box-shadow:0 1px 3px rgba(0,0,0,0.08);margin-bottom:1.5rem">
671
- <h2 style="color:#1a365d;margin:0 0 0.8rem;font-size:1.3rem">
672
- Citation</h2>
673
- <pre style="background:#1a202c;color:#e2e8f0;padding:1rem;
674
- border-radius:8px;font-size:0.8rem;
675
- line-height:1.5">@article{biodesignbench2026,
676
- title={BioDesignBench: Evaluating LLM Agents on
677
- Protein Design via MCP Tools},
678
- author={Kim, Jason et al.},
679
- year={2026}
680
- }</pre>
681
  </div>
682
 
683
  </div>"""
@@ -981,17 +996,18 @@ def create_app() -> gr.Blocks:
981
  with gr.Tab("\U0001f4e4 Submit"):
982
  gr.HTML("""
983
  <div style="max-width:700px;margin:0 auto;padding:1rem">
984
- <h2 style="color:#1a365d;margin:0 0 0.5rem">
985
- Submit Your Agent</h2>
986
- <p style="color:#4a5568;margin-bottom:1rem;line-height:1.5">
 
987
  Submit your protein design agent for benchmarking.
988
  Your agent must be hosted as a POST endpoint that accepts
989
- task descriptions and returns designed sequences.
990
- <strong>You bear all LLM and MCP tool costs</strong>;
991
- we only run Boltz structure prediction on our end.</p>
992
- <div style="background:#fefcbf;border-left:4px solid #d69e2e;
993
- padding:0.8rem;border-radius:4px;margin-bottom:1rem;
994
- font-size:0.85rem;color:#744210">
995
  <strong>Rate limit:</strong> 2 submissions per calendar
996
  month per organization.</div>
997
  </div>""")
@@ -1065,9 +1081,10 @@ def create_app() -> gr.Blocks:
1065
  with gr.Tab("\U0001f6e0 Status"):
1066
  gr.HTML("""
1067
  <div style="max-width:800px;margin:0 auto;padding:1rem">
1068
- <h2 style="color:#1a365d;margin:0 0 0.5rem">
1069
- Submission Status & Admin</h2>
1070
- <p style="color:#4a5568;margin-bottom:0.5rem">
 
1071
  Check your submission status or manage the pipeline
1072
  (admin only).</p>
1073
  </div>""")
@@ -1100,7 +1117,7 @@ def create_app() -> gr.Blocks:
1100
  if sub.get("overall_score") is not None:
1101
  score_html = (
1102
  f'<div style="font-size:1.2rem;'
1103
- f'font-weight:700;color:#1a365d;'
1104
  f'margin-top:0.5rem">'
1105
  f'Score: {sub["overall_score"]:.1f}/100'
1106
  f'</div>'
@@ -1132,7 +1149,7 @@ def create_app() -> gr.Blocks:
1132
  admin_msg = gr.HTML()
1133
 
1134
  with admin_panel:
1135
- gr.HTML('<h3 style="color:#1a365d">'
1136
  'Pending Submissions</h3>')
1137
  pending_html = gr.HTML()
1138
  refresh_btn = gr.Button("Refresh List")
@@ -1150,7 +1167,7 @@ def create_app() -> gr.Blocks:
1150
  )
1151
  approve_msg = gr.HTML()
1152
 
1153
- gr.HTML('<h3 style="color:#1a365d;margin-top:1rem">'
1154
  'Pipeline Control</h3>')
1155
  with gr.Row():
1156
  dispatch_id = gr.Textbox(
 
132
  .dark .tabs { background: #ffffff !important; }
133
  .dark .tab-nav button { color: #2d3748 !important; }
134
  .dark .tab-nav button.selected {
135
+ color: #0f172a !important;
136
  border-color: #3182ce !important;
137
  }
138
  .dark .block { background: #ffffff !important; }
 
184
 
185
 
186
  def build_header(last_updated: str, n_entries: int) -> str:
187
+ btn = (
188
+ "display:inline-block;padding:0.45rem 1.1rem;border-radius:8px;"
189
+ "text-decoration:none;font-size:0.82rem;font-weight:600;"
190
+ "transition:opacity 0.15s"
191
+ )
192
  return f"""
193
+ <div style="background:#ffffff;border:1px solid #e2e8f0;
194
+ padding:2.2rem 2rem 1.8rem;text-align:center;
195
+ border-radius:16px;margin-bottom:0.8rem;
196
+ box-shadow:0 1px 4px rgba(0,0,0,0.04)">
197
+ <p style="margin:0 0 0.3rem;font-size:0.75rem;font-weight:700;
198
+ letter-spacing:0.12em;text-transform:uppercase;
199
+ color:#3182ce">Romero Lab &middot; Duke University</p>
200
+ <h1 style="font-size:2rem;margin:0;font-weight:800;color:#0f172a;
201
+ letter-spacing:-0.02em">
202
+ \U0001f9ec BioDesignBench</h1>
203
+ <p style="color:#64748b;margin:0.4rem 0 0;font-size:1rem;
204
+ font-weight:400">
205
+ Evaluating LLM agents on protein design via MCP tools</p>
206
+ <div style="margin-top:1rem;display:flex;justify-content:center;
207
+ gap:0.6rem;flex-wrap:wrap">
208
  <a href="{PAPER_URL}" target="_blank"
209
+ style="{btn};background:#0f172a;color:#ffffff">
210
+ \U0001f4c4 Paper</a>
 
 
211
  <a href="{GITHUB_URL}" target="_blank"
212
+ style="{btn};background:#f1f5f9;color:#334155">
213
+ \U0001f4bb GitHub</a>
 
 
214
  <a href="{HF_URL}" target="_blank"
215
+ style="{btn};background:#f1f5f9;color:#334155">
216
+ \U0001f917 HuggingFace</a>
217
+ </div>
218
+ <div style="margin-top:1rem;display:flex;justify-content:center;
219
+ gap:1.5rem;flex-wrap:wrap">
220
+ <span style="font-size:0.78rem;color:#94a3b8">
221
+ 76 tasks</span>
222
+ <span style="font-size:0.78rem;color:#94a3b8">
223
+ {n_entries} agents</span>
224
+ <span style="font-size:0.78rem;color:#94a3b8">
225
+ Updated {last_updated}</span>
226
  </div>
 
 
 
227
  </div>"""
228
 
229
 
 
289
  "font-size:0.9rem"
290
  )
291
  TH = (
292
+ "background:#0f172a;color:white;padding:0.75rem 1rem;"
293
+ "text-align:left;font-size:0.75rem;text-transform:uppercase;"
294
+ "letter-spacing:0.05em;font-weight:600"
295
  )
296
 
297
  rows = []
 
311
  else:
312
  llm_rank += 1
313
  rcolor = {1: "#d69e2e", 2: "#a0aec0", 3: "#c17832"}.get(
314
+ llm_rank, "#0f172a"
315
  )
316
  rsize = (
317
  "1.1rem"
 
418
  """HTML heatmap table for one agent across 17 taxonomy cells."""
419
  ts = entry.get("taxonomy_scores", {})
420
  TH = (
421
+ "background:#0f172a;color:white;padding:0.6rem 0.8rem;"
422
+ "text-align:center;font-size:0.75rem;font-weight:600"
423
  )
424
  TD = (
425
  "text-align:center;padding:0.5rem;font-size:0.85rem;"
 
531
  cards.append(
532
  '<div style="background:white;border-radius:10px;padding:1.2rem;'
533
  'box-shadow:0 1px 3px rgba(0,0,0,0.08)">'
534
+ f'<h4 style="font-size:0.95rem;color:#0f172a;'
535
  f'margin:0 0 0.8rem">{name}</h4>'
536
  f'{"".join(lines)}</div>'
537
  )
 
547
 
548
 
549
  def build_about() -> str:
550
+ h2 = (
551
+ 'style="color:#0f172a;margin:0 0 0.8rem;font-size:1.25rem;'
552
+ 'font-weight:700"'
553
+ )
554
+ h3 = (
555
+ 'style="color:#334155;margin:1.2rem 0 0.5rem;font-size:1rem;'
556
+ 'font-weight:600"'
557
+ )
558
+ p = 'style="margin-bottom:0.8rem;color:#475569;line-height:1.6"'
559
+ card = (
560
+ 'style="background:#ffffff;border:1px solid #e2e8f0;'
561
+ 'border-radius:12px;padding:2rem;margin-bottom:1.2rem"'
562
+ )
563
+ stat_box = (
564
+ 'style="background:#f8fafc;border:1px solid #e2e8f0;'
565
+ 'border-radius:10px;padding:1rem;text-align:center"'
566
+ )
567
+ return f"""
568
  <div style="max-width:900px;margin:0 auto">
569
 
570
+ <div {card}>
571
+ <h2 {h2}>What is BioDesignBench?</h2>
572
+ <p {p}>
 
 
573
  BioDesignBench is the first comprehensive benchmark for evaluating
574
  LLM agents on protein design tasks via MCP (Model Context Protocol)
575
  tool use. Unlike existing benchmarks that focus on model-only
576
+ metrics, BioDesignBench tests the full agentic design loop:
577
+ <strong>prompt &rarr; design &rarr; validate &rarr;
578
+ iterate</strong>.</p>
579
  <div style="display:grid;grid-template-columns:
580
+ repeat(auto-fit,minmax(140px,1fr));gap:0.8rem;
581
+ margin:1rem 0">
582
+ <div {stat_box}>
583
+ <div style="font-size:1.8rem;font-weight:800;color:#0f172a">
584
  76</div>
585
+ <div style="font-size:0.78rem;color:#64748b">design tasks</div>
586
  </div>
587
+ <div {stat_box}>
588
+ <div style="font-size:1.8rem;font-weight:800;color:#0f172a">
 
589
  17</div>
590
+ <div style="font-size:0.78rem;color:#64748b">taxonomy cells</div>
591
  </div>
592
+ <div {stat_box}>
593
+ <div style="font-size:1.8rem;font-weight:800;color:#0f172a">
 
594
  17</div>
595
+ <div style="font-size:0.78rem;color:#64748b">MCP tools</div>
596
  </div>
597
+ <div {stat_box}>
598
+ <div style="font-size:1.8rem;font-weight:800;color:#0f172a">
 
599
  100</div>
600
+ <div style="font-size:0.78rem;color:#64748b">point rubric</div>
601
  </div>
602
  </div>
603
  </div>
604
 
605
+ <div {card}>
606
+ <h2 {h2}>How to submit</h2>
607
+ <h3 {h3}>1. Build your agent</h3>
608
+ <p {p}>
609
+ Create a protein design agent that accepts tasks via our API.
610
+ You can use our 17 reference MCP tools as-is, extend them, or
611
+ build entirely custom tools.</p>
612
+ <h3 {h3}>2. Host an API endpoint</h3>
613
+ <p {p}>
614
+ Your agent must be accessible as a POST endpoint that accepts
615
+ task payloads and returns designed protein sequences.</p>
616
+ <h3 {h3}>API specification</h3>
617
+ <pre style="background:#0f172a;color:#e2e8f0;padding:1.2rem;
618
+ border-radius:10px;font-size:0.8rem;overflow-x:auto;
619
+ line-height:1.6">POST /api/run
620
+
621
+ Request:
622
+ {{
623
+ "task_id": "dnb_ab_001",
 
 
 
 
 
624
  "task_description": "Design a de novo binder for...",
625
  "available_tools": [...],
626
  "max_steps": 50,
627
  "timeout_sec": 300
628
+ }}
629
 
630
+ Response:
631
+ {{
632
  "sequences": ["MKKL..."],
633
  "run_log": [...],
634
  "total_steps": 12,
635
  "total_time_sec": 142.5
636
+ }}</pre>
637
+ <h3 {h3}>3. Submit and wait</h3>
638
+ <p {p}>
639
+ We dispatch 73 hidden tasks to your endpoint and verify results
640
+ with Boltz structure prediction.
641
+ Maximum <strong>2 submissions per month</strong> per organization.</p>
642
+ <p {p}>
 
643
  3 example tasks are publicly available for development and
644
  testing.</p>
645
 
646
+ <h3 {h3}>Reference MCP tools</h3>
647
+ <p {p}>
648
+ We provide 17 reference MCP tools for protein design. You can use
649
+ them as-is, extend them, or build entirely custom tools.
650
+ <a href="#" style="color:#2563eb;font-weight:500">
651
+ GitHub repository &rarr;</a></p>
652
+
653
+ <h3 {h3}>Limits</h3>
654
+ <ul style="color:#475569;padding-left:1.5rem;margin-bottom:0.8rem;
655
+ line-height:1.7">
656
+ <li>Maximum 2 submissions per calendar month per organization</li>
657
+ <li>73 hidden tasks are used for ranking</li>
658
+ <li>3 public example tasks are available for development</li>
659
  </ul>
660
  </div>
661
 
662
+ <div {card}>
663
+ <h2 {h2}>Scoring rubric (100 points)</h2>
664
+ <p {p}>
665
+ <strong>Approach (20 pts)</strong> &mdash; design methodology
666
+ coverage across 10 functional categories
667
+ (backbone generation, sequence design, structure prediction, etc.)</p>
668
+ <p {p}>
669
+ <strong>Orchestration (15 pts)</strong> &mdash; pipeline ordering,
670
+ intermediate validation, and iteration quality</p>
671
+ <p {p}>
672
+ <strong>Quality (35 pts)</strong> &mdash; three-tier graduated
673
+ scoring based on structure confidence (pLDDT, pTM), interface
674
+ metrics (ipTM, i_pAE), and interface physics</p>
675
+ <p {p}>
676
+ <strong>Feasibility (15 pts)</strong> &mdash; valid amino acids,
677
+ length constraints, composition, and biophysical plausibility</p>
678
+ <p {p}>
679
+ <strong>Novelty (5 pts)</strong> &mdash; sequence identity to
680
+ reference (lower identity = more novel = higher score)</p>
681
+ <p {p}>
682
+ <strong>Diversity (10 pts)</strong> &mdash; number and pairwise
683
+ diversity of generated designs</p>
684
  </div>
685
 
686
+ <div {card}>
687
+ <h2 {h2}>Citation</h2>
688
+ <pre style="background:#0f172a;color:#e2e8f0;padding:1.2rem;
689
+ border-radius:10px;font-size:0.8rem;
690
+ line-height:1.6">@article{{biodesignbench2026,
691
+ title={{BioDesignBench: Evaluating LLM Agents on
692
+ Protein Design via MCP Tools}},
693
+ author={{Kim, Jason et al.}},
694
+ year={{2026}}
695
+ }}</pre>
 
 
696
  </div>
697
 
698
  </div>"""
 
996
  with gr.Tab("\U0001f4e4 Submit"):
997
  gr.HTML("""
998
  <div style="max-width:700px;margin:0 auto;padding:1rem">
999
+ <h2 style="color:#0f172a;margin:0 0 0.5rem;
1000
+ font-weight:700;font-size:1.25rem">
1001
+ Submit your agent</h2>
1002
+ <p style="color:#475569;margin-bottom:1rem;line-height:1.6">
1003
  Submit your protein design agent for benchmarking.
1004
  Your agent must be hosted as a POST endpoint that accepts
1005
+ task payloads and returns designed sequences.
1006
+ <strong>You bear all LLM and tool costs</strong> &mdash;
1007
+ we only run Boltz structure prediction on our side.</p>
1008
+ <div style="background:#fefce8;border-left:3px solid #ca8a04;
1009
+ padding:0.8rem 1rem;border-radius:6px;
1010
+ margin-bottom:1rem;font-size:0.85rem;color:#713f12">
1011
  <strong>Rate limit:</strong> 2 submissions per calendar
1012
  month per organization.</div>
1013
  </div>""")
 
1081
  with gr.Tab("\U0001f6e0 Status"):
1082
  gr.HTML("""
1083
  <div style="max-width:800px;margin:0 auto;padding:1rem">
1084
+ <h2 style="color:#0f172a;margin:0 0 0.5rem;
1085
+ font-weight:700;font-size:1.25rem">
1086
+ Submission status</h2>
1087
+ <p style="color:#475569;margin-bottom:0.5rem;line-height:1.6">
1088
  Check your submission status or manage the pipeline
1089
  (admin only).</p>
1090
  </div>""")
 
1117
  if sub.get("overall_score") is not None:
1118
  score_html = (
1119
  f'<div style="font-size:1.2rem;'
1120
+ f'font-weight:700;color:#0f172a;'
1121
  f'margin-top:0.5rem">'
1122
  f'Score: {sub["overall_score"]:.1f}/100'
1123
  f'</div>'
 
1149
  admin_msg = gr.HTML()
1150
 
1151
  with admin_panel:
1152
+ gr.HTML('<h3 style="color:#0f172a">'
1153
  'Pending Submissions</h3>')
1154
  pending_html = gr.HTML()
1155
  refresh_btn = gr.Button("Refresh List")
 
1167
  )
1168
  approve_msg = gr.HTML()
1169
 
1170
+ gr.HTML('<h3 style="color:#0f172a;margin-top:1rem">'
1171
  'Pipeline Control</h3>')
1172
  with gr.Row():
1173
  dispatch_id = gr.Textbox(