Spaces:
Running
Running
Sync about.py from kosmonautical/openhands-index-paul
#26
by juan-all-hands - opened
about.py
CHANGED
|
@@ -2,110 +2,111 @@ import gradio as gr
|
|
| 2 |
|
| 3 |
|
| 4 |
def build_page():
|
| 5 |
-
with gr.Column(elem_id="
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
@misc{openhandsindex2025,
|
| 105 |
title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
|
| 106 |
author={OpenHands Team},
|
| 107 |
year={2025},
|
| 108 |
howpublished={https://index.openhands.dev}
|
| 109 |
}</pre>
|
| 110 |
-
|
| 111 |
-
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def build_page():
|
| 5 |
+
with gr.Column(elem_id="page-content-wrapper"):
|
| 6 |
+
with gr.Column(elem_id="about-page-content-wrapper"):
|
| 7 |
+
# --- Section 1: About ---
|
| 8 |
+
gr.HTML(
|
| 9 |
+
"""
|
| 10 |
+
<h2>About</h2>
|
| 11 |
+
<p>
|
| 12 |
+
OpenHands Index tracks AI coding agent performance across software engineering benchmarks, providing a unified view of both accuracy and cost efficiency.
|
| 13 |
+
</p>
|
| 14 |
+
"""
|
| 15 |
+
)
|
| 16 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 17 |
|
| 18 |
+
# --- Section 2: Benchmark Details ---
|
| 19 |
+
gr.HTML(
|
| 20 |
+
"""
|
| 21 |
+
<h2>Benchmark Details</h2>
|
| 22 |
+
<p>We evaluate agents across five categories:</p>
|
| 23 |
+
<ul class="info-list">
|
| 24 |
+
<li><strong>Issue Resolution:</strong> <a href="https://www.swebench.com/" target="_blank">SWE-bench Verified</a> β 500 instances</li>
|
| 25 |
+
<li><strong>Frontend:</strong> <a href="https://github.com/OpenHands/SWE-bench-multimodal" target="_blank">SWE-bench Multimodal</a> β 617 instances</li>
|
| 26 |
+
<li><strong>Greenfield:</strong> <a href="https://github.com/commit-0/commit0" target="_blank">Commit0</a> β 16 libraries (lite split)</li>
|
| 27 |
+
<li><strong>Testing:</strong> <a href="https://github.com/logic-star-ai/swt-bench" target="_blank">SWT-bench Verified</a> β 433 instances</li>
|
| 28 |
+
<li><strong>Information Gathering:</strong> <a href="https://huggingface.co/gaia-benchmark" target="_blank">GAIA</a> β 165 questions (validation split)</li>
|
| 29 |
+
</ul>
|
| 30 |
+
"""
|
| 31 |
+
)
|
| 32 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 33 |
|
| 34 |
+
# --- Section 3: Methodology ---
|
| 35 |
+
gr.HTML(
|
| 36 |
+
"""
|
| 37 |
+
<h2>Methodology</h2>
|
| 38 |
+
<p><strong>Per-benchmark scores:</strong> Each benchmark reports a percentage metric (resolve rate, accuracy, or test pass rate), making scores comparable regardless of dataset size.</p>
|
| 39 |
+
<p><strong>Average score:</strong> Macro-average across all five categories with equal weighting.</p>
|
| 40 |
+
<p><strong>Cost & Runtime:</strong> Average USD and seconds per task instance.</p>
|
| 41 |
+
<p>All evaluations use the <a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">OpenHands Agent SDK</a> with identical configurations per model.</p>
|
| 42 |
+
"""
|
| 43 |
+
)
|
| 44 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 45 |
|
| 46 |
+
# --- Section 4: API Access ---
|
| 47 |
+
gr.HTML(
|
| 48 |
+
"""
|
| 49 |
+
<h2>API Access</h2>
|
| 50 |
+
<p>Access leaderboard data programmatically via our REST API:</p>
|
| 51 |
+
<ul class="info-list">
|
| 52 |
+
<li><a href="https://index.openhands.dev/api/docs" target="_blank">Interactive API Documentation</a> - Swagger UI with all endpoints</li>
|
| 53 |
+
<li><a href="https://index.openhands.dev/api/leaderboard" target="_blank">/api/leaderboard</a> - Full leaderboard with scores and metadata</li>
|
| 54 |
+
<li><a href="https://index.openhands.dev/api/categories" target="_blank">/api/categories</a> - List of benchmark categories</li>
|
| 55 |
+
</ul>
|
| 56 |
+
<p style="margin-top: 10px;"><strong>Example:</strong></p>
|
| 57 |
+
<pre class="citation-block" style="font-size: 0.9em;">curl "https://index.openhands.dev/api/leaderboard?limit=5"</pre>
|
| 58 |
+
"""
|
| 59 |
+
)
|
| 60 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 61 |
|
| 62 |
+
# --- Section 5: Resources ---
|
| 63 |
+
gr.HTML(
|
| 64 |
+
"""
|
| 65 |
+
<h2>Resources</h2>
|
| 66 |
+
<ul class="info-list">
|
| 67 |
+
<li><a href="https://github.com/OpenHands/OpenHands" target="_blank">OpenHands</a> - The main OpenHands repository</li>
|
| 68 |
+
<li><a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">Software Agent SDK</a> - The agent code used for evaluation</li>
|
| 69 |
+
<li><a href="https://github.com/OpenHands/benchmarks" target="_blank">Benchmarks</a> - The benchmarking code</li>
|
| 70 |
+
<li><a href="https://github.com/OpenHands/openhands-index-results" target="_blank">Results</a> - Raw evaluation results</li>
|
| 71 |
+
</ul>
|
| 72 |
+
"""
|
| 73 |
+
)
|
| 74 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 75 |
|
| 76 |
+
# --- Section 5: Contact ---
|
| 77 |
+
gr.HTML(
|
| 78 |
+
"""
|
| 79 |
+
<h2>Contact</h2>
|
| 80 |
+
<p>
|
| 81 |
+
Questions or feedback? Join us on <a href="https://dub.sh/openhands" target="_blank">Slack</a>.
|
| 82 |
+
</p>
|
| 83 |
+
"""
|
| 84 |
+
)
|
| 85 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 86 |
|
| 87 |
+
# --- Section 6: Acknowledgements ---
|
| 88 |
+
gr.HTML(
|
| 89 |
+
"""
|
| 90 |
+
<h2>Acknowledgements</h2>
|
| 91 |
+
<p>
|
| 92 |
+
The leaderboard interface is adapted from the
|
| 93 |
+
<a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank">AstaBench Leaderboard</a>
|
| 94 |
+
by Allen Institute for AI.
|
| 95 |
+
</p>
|
| 96 |
+
"""
|
| 97 |
+
)
|
| 98 |
+
gr.Markdown("---", elem_classes="divider-line")
|
| 99 |
|
| 100 |
+
# --- Section 7: Citation ---
|
| 101 |
+
gr.HTML(
|
| 102 |
+
"""
|
| 103 |
+
<h2>Citation</h2>
|
| 104 |
+
<pre class="citation-block">
|
| 105 |
@misc{openhandsindex2025,
|
| 106 |
title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
|
| 107 |
author={OpenHands Team},
|
| 108 |
year={2025},
|
| 109 |
howpublished={https://index.openhands.dev}
|
| 110 |
}</pre>
|
| 111 |
+
"""
|
| 112 |
+
)
|