Sync about.py from kosmonautical/openhands-index-paul

#26
by juan-all-hands - opened
Files changed (1) hide show
  1. about.py +95 -94
about.py CHANGED
@@ -2,110 +2,111 @@ import gradio as gr
2
 
3
 
4
  def build_page():
5
- with gr.Column(elem_id="about-page-content-wrapper"):
6
- # --- Section 1: About ---
7
- gr.HTML(
8
- """
9
- <h2>About</h2>
10
- <p>
11
- OpenHands Index tracks AI coding agent performance across software engineering benchmarks, providing a unified view of both accuracy and cost efficiency.
12
- </p>
13
- """
14
- )
15
- gr.Markdown("---", elem_classes="divider-line")
 
16
 
17
- # --- Section 2: Benchmark Details ---
18
- gr.HTML(
19
- """
20
- <h2>Benchmark Details</h2>
21
- <p>We evaluate agents across five categories:</p>
22
- <ul class="info-list">
23
- <li><strong>Issue Resolution:</strong> <a href="https://www.swebench.com/" target="_blank">SWE-bench Verified</a> β€” 500 instances</li>
24
- <li><strong>Frontend:</strong> <a href="https://github.com/OpenHands/SWE-bench-multimodal" target="_blank">SWE-bench Multimodal</a> β€” 617 instances</li>
25
- <li><strong>Greenfield:</strong> <a href="https://github.com/commit-0/commit0" target="_blank">Commit0</a> β€” 16 libraries (lite split)</li>
26
- <li><strong>Testing:</strong> <a href="https://github.com/logic-star-ai/swt-bench" target="_blank">SWT-bench Verified</a> β€” 433 instances</li>
27
- <li><strong>Information Gathering:</strong> <a href="https://huggingface.co/gaia-benchmark" target="_blank">GAIA</a> β€” 165 questions (validation split)</li>
28
- </ul>
29
- """
30
- )
31
- gr.Markdown("---", elem_classes="divider-line")
32
 
33
- # --- Section 3: Methodology ---
34
- gr.HTML(
35
- """
36
- <h2>Methodology</h2>
37
- <p><strong>Per-benchmark scores:</strong> Each benchmark reports a percentage metric (resolve rate, accuracy, or test pass rate), making scores comparable regardless of dataset size.</p>
38
- <p><strong>Average score:</strong> Macro-average across all five categories with equal weighting.</p>
39
- <p><strong>Cost &amp; Runtime:</strong> Average USD and seconds per task instance.</p>
40
- <p>All evaluations use the <a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">OpenHands Agent SDK</a> with identical configurations per model.</p>
41
- """
42
- )
43
- gr.Markdown("---", elem_classes="divider-line")
44
 
45
- # --- Section 4: API Access ---
46
- gr.HTML(
47
- """
48
- <h2>API Access</h2>
49
- <p>Access leaderboard data programmatically via our REST API:</p>
50
- <ul class="info-list">
51
- <li><a href="https://index.openhands.dev/api/docs" target="_blank">Interactive API Documentation</a> - Swagger UI with all endpoints</li>
52
- <li><a href="https://index.openhands.dev/api/leaderboard" target="_blank">/api/leaderboard</a> - Full leaderboard with scores and metadata</li>
53
- <li><a href="https://index.openhands.dev/api/categories" target="_blank">/api/categories</a> - List of benchmark categories</li>
54
- </ul>
55
- <p style="margin-top: 10px;"><strong>Example:</strong></p>
56
- <pre class="citation-block" style="font-size: 0.9em;">curl "https://index.openhands.dev/api/leaderboard?limit=5"</pre>
57
- """
58
- )
59
- gr.Markdown("---", elem_classes="divider-line")
60
 
61
- # --- Section 5: Resources ---
62
- gr.HTML(
63
- """
64
- <h2>Resources</h2>
65
- <ul class="info-list">
66
- <li><a href="https://github.com/OpenHands/OpenHands" target="_blank">OpenHands</a> - The main OpenHands repository</li>
67
- <li><a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">Software Agent SDK</a> - The agent code used for evaluation</li>
68
- <li><a href="https://github.com/OpenHands/benchmarks" target="_blank">Benchmarks</a> - The benchmarking code</li>
69
- <li><a href="https://github.com/OpenHands/openhands-index-results" target="_blank">Results</a> - Raw evaluation results</li>
70
- </ul>
71
- """
72
- )
73
- gr.Markdown("---", elem_classes="divider-line")
74
 
75
- # --- Section 5: Contact ---
76
- gr.HTML(
77
- """
78
- <h2>Contact</h2>
79
- <p>
80
- Questions or feedback? Join us on <a href="https://dub.sh/openhands" target="_blank">Slack</a>.
81
- </p>
82
- """
83
- )
84
- gr.Markdown("---", elem_classes="divider-line")
85
 
86
- # --- Section 6: Acknowledgements ---
87
- gr.HTML(
88
- """
89
- <h2>Acknowledgements</h2>
90
- <p>
91
- The leaderboard interface is adapted from the
92
- <a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank">AstaBench Leaderboard</a>
93
- by Allen Institute for AI.
94
- </p>
95
- """
96
- )
97
- gr.Markdown("---", elem_classes="divider-line")
98
 
99
- # --- Section 7: Citation ---
100
- gr.HTML(
101
- """
102
- <h2>Citation</h2>
103
- <pre class="citation-block">
104
  @misc{openhandsindex2025,
105
  title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
106
  author={OpenHands Team},
107
  year={2025},
108
  howpublished={https://index.openhands.dev}
109
  }</pre>
110
- """
111
- )
 
2
 
3
 
4
  def build_page():
5
+ with gr.Column(elem_id="page-content-wrapper"):
6
+ with gr.Column(elem_id="about-page-content-wrapper"):
7
+ # --- Section 1: About ---
8
+ gr.HTML(
9
+ """
10
+ <h2>About</h2>
11
+ <p>
12
+ OpenHands Index tracks AI coding agent performance across software engineering benchmarks, providing a unified view of both accuracy and cost efficiency.
13
+ </p>
14
+ """
15
+ )
16
+ gr.Markdown("---", elem_classes="divider-line")
17
 
18
+ # --- Section 2: Benchmark Details ---
19
+ gr.HTML(
20
+ """
21
+ <h2>Benchmark Details</h2>
22
+ <p>We evaluate agents across five categories:</p>
23
+ <ul class="info-list">
24
+ <li><strong>Issue Resolution:</strong> <a href="https://www.swebench.com/" target="_blank">SWE-bench Verified</a> β€” 500 instances</li>
25
+ <li><strong>Frontend:</strong> <a href="https://github.com/OpenHands/SWE-bench-multimodal" target="_blank">SWE-bench Multimodal</a> β€” 617 instances</li>
26
+ <li><strong>Greenfield:</strong> <a href="https://github.com/commit-0/commit0" target="_blank">Commit0</a> β€” 16 libraries (lite split)</li>
27
+ <li><strong>Testing:</strong> <a href="https://github.com/logic-star-ai/swt-bench" target="_blank">SWT-bench Verified</a> β€” 433 instances</li>
28
+ <li><strong>Information Gathering:</strong> <a href="https://huggingface.co/gaia-benchmark" target="_blank">GAIA</a> β€” 165 questions (validation split)</li>
29
+ </ul>
30
+ """
31
+ )
32
+ gr.Markdown("---", elem_classes="divider-line")
33
 
34
+ # --- Section 3: Methodology ---
35
+ gr.HTML(
36
+ """
37
+ <h2>Methodology</h2>
38
+ <p><strong>Per-benchmark scores:</strong> Each benchmark reports a percentage metric (resolve rate, accuracy, or test pass rate), making scores comparable regardless of dataset size.</p>
39
+ <p><strong>Average score:</strong> Macro-average across all five categories with equal weighting.</p>
40
+ <p><strong>Cost &amp; Runtime:</strong> Average USD and seconds per task instance.</p>
41
+ <p>All evaluations use the <a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">OpenHands Agent SDK</a> with identical configurations per model.</p>
42
+ """
43
+ )
44
+ gr.Markdown("---", elem_classes="divider-line")
45
 
46
+ # --- Section 4: API Access ---
47
+ gr.HTML(
48
+ """
49
+ <h2>API Access</h2>
50
+ <p>Access leaderboard data programmatically via our REST API:</p>
51
+ <ul class="info-list">
52
+ <li><a href="https://index.openhands.dev/api/docs" target="_blank">Interactive API Documentation</a> - Swagger UI with all endpoints</li>
53
+ <li><a href="https://index.openhands.dev/api/leaderboard" target="_blank">/api/leaderboard</a> - Full leaderboard with scores and metadata</li>
54
+ <li><a href="https://index.openhands.dev/api/categories" target="_blank">/api/categories</a> - List of benchmark categories</li>
55
+ </ul>
56
+ <p style="margin-top: 10px;"><strong>Example:</strong></p>
57
+ <pre class="citation-block" style="font-size: 0.9em;">curl "https://index.openhands.dev/api/leaderboard?limit=5"</pre>
58
+ """
59
+ )
60
+ gr.Markdown("---", elem_classes="divider-line")
61
 
62
+ # --- Section 5: Resources ---
63
+ gr.HTML(
64
+ """
65
+ <h2>Resources</h2>
66
+ <ul class="info-list">
67
+ <li><a href="https://github.com/OpenHands/OpenHands" target="_blank">OpenHands</a> - The main OpenHands repository</li>
68
+ <li><a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">Software Agent SDK</a> - The agent code used for evaluation</li>
69
+ <li><a href="https://github.com/OpenHands/benchmarks" target="_blank">Benchmarks</a> - The benchmarking code</li>
70
+ <li><a href="https://github.com/OpenHands/openhands-index-results" target="_blank">Results</a> - Raw evaluation results</li>
71
+ </ul>
72
+ """
73
+ )
74
+ gr.Markdown("---", elem_classes="divider-line")
75
 
76
+ # --- Section 5: Contact ---
77
+ gr.HTML(
78
+ """
79
+ <h2>Contact</h2>
80
+ <p>
81
+ Questions or feedback? Join us on <a href="https://dub.sh/openhands" target="_blank">Slack</a>.
82
+ </p>
83
+ """
84
+ )
85
+ gr.Markdown("---", elem_classes="divider-line")
86
 
87
+ # --- Section 6: Acknowledgements ---
88
+ gr.HTML(
89
+ """
90
+ <h2>Acknowledgements</h2>
91
+ <p>
92
+ The leaderboard interface is adapted from the
93
+ <a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank">AstaBench Leaderboard</a>
94
+ by Allen Institute for AI.
95
+ </p>
96
+ """
97
+ )
98
+ gr.Markdown("---", elem_classes="divider-line")
99
 
100
+ # --- Section 7: Citation ---
101
+ gr.HTML(
102
+ """
103
+ <h2>Citation</h2>
104
+ <pre class="citation-block">
105
  @misc{openhandsindex2025,
106
  title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
107
  author={OpenHands Team},
108
  year={2025},
109
  howpublished={https://index.openhands.dev}
110
  }</pre>
111
+ """
112
+ )