File size: 5,779 Bytes
085a012
 
 
 
734891b
 
 
 
 
 
 
 
 
 
 
 
085a012
734891b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8b77db
734891b
 
 
 
 
 
 
 
 
 
 
085a012
734891b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168fd64
734891b
 
 
 
 
 
 
 
 
 
 
 
 
085a012
734891b
 
 
 
 
 
 
 
 
 
701c496
734891b
 
 
 
 
 
 
 
 
 
 
 
737a3f2
734891b
 
 
 
 
6737ff3
 
cfd4f2a
6737ff3
b6f00ad
7904c2d
734891b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr


def build_page():
    with gr.Column(elem_id="page-content-wrapper"):
        with gr.Column(elem_id="about-page-content-wrapper"):
            # --- Section 1: About ---
            gr.HTML(
                """
                <h2>About</h2>
                <p>
                    OpenHands Index tracks AI coding agent performance across software engineering benchmarks, providing a unified view of both accuracy and cost efficiency.
                </p>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 2: Benchmark Details ---
            gr.HTML(
                """
                <h2>Benchmark Details</h2>
                <p>We evaluate agents across five categories:</p>
                <ul class="info-list">
                    <li><strong>Issue Resolution:</strong> <a href="https://www.swebench.com/" target="_blank">SWE-bench Verified</a> — 500 instances</li>
                    <li><strong>Frontend:</strong> <a href="https://github.com/OpenHands/SWE-bench-multimodal" target="_blank">SWE-bench Multimodal</a> — 617 instances</li>
                    <li><strong>Greenfield:</strong> <a href="https://github.com/commit-0/commit0" target="_blank">Commit0</a> — 16 libraries (lite split)</li>
                    <li><strong>Testing:</strong> <a href="https://github.com/logic-star-ai/swt-bench" target="_blank">SWT-bench Verified</a> — 433 instances</li>
                    <li><strong>Information Gathering:</strong> <a href="https://huggingface.co/gaia-benchmark" target="_blank">GAIA</a> — 165 questions (validation split)</li>
                </ul>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 3: Methodology ---
            gr.HTML(
                """
                <h2>Methodology</h2>
                <p><strong>Per-benchmark scores:</strong> Each benchmark reports a percentage metric (resolve rate, accuracy, or test pass rate), making scores comparable regardless of dataset size.</p>
                <p><strong>Average score:</strong> Macro-average across all five categories with equal weighting.</p>
                <p><strong>Cost &amp; Runtime:</strong> Average USD and seconds per task instance.</p>
                <p>All evaluations use the <a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">OpenHands Agent SDK</a> with identical configurations per model.</p>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 4: API Access ---
            gr.HTML(
                """
                <h2>API Access</h2>
                <p>Access leaderboard data programmatically via our REST API:</p>
                <ul class="info-list">
                    <li><a href="https://index.openhands.dev/api/docs" target="_blank">Interactive API Documentation</a> - Swagger UI with all endpoints</li>
                    <li><a href="https://index.openhands.dev/api/leaderboard" target="_blank">/api/leaderboard</a> - Full leaderboard with scores and metadata</li>
                    <li><a href="https://index.openhands.dev/api/categories" target="_blank">/api/categories</a> - List of benchmark categories</li>
                </ul>
                <p style="margin-top: 10px;"><strong>Example:</strong></p>
                <pre class="citation-block" style="font-size: 0.9em;">curl "https://index.openhands.dev/api/leaderboard?limit=5"</pre>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 5: Resources ---
            gr.HTML(
                """
                <h2>Resources</h2>
                <ul class="info-list">
                    <li><a href="https://github.com/OpenHands/OpenHands" target="_blank">OpenHands</a> - The main OpenHands repository</li>
                    <li><a href="https://github.com/OpenHands/software-agent-sdk" target="_blank">Software Agent SDK</a> - The agent code used for evaluation</li>
                    <li><a href="https://github.com/OpenHands/benchmarks" target="_blank">Benchmarks</a> - The benchmarking code</li>
                    <li><a href="https://github.com/OpenHands/openhands-index-results" target="_blank">Results</a> - Raw evaluation results</li>
                </ul>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 5: Contact ---
            gr.HTML(
                """
                <h2>Contact</h2>
                <p>
                    Questions or feedback? Join us on <a href="https://dub.sh/openhands" target="_blank">Slack</a>.
                </p>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 6: Acknowledgements ---
            gr.HTML(
                """
                <h2>Acknowledgements</h2>
                <p>
                    The leaderboard interface is adapted from the
                    <a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank">AstaBench Leaderboard</a>
                    by Allen Institute for AI.
                </p>
                """
            )
            gr.Markdown("---", elem_classes="divider-line")

            # --- Section 7: Citation ---
            gr.HTML(
                """
                <h2>Citation</h2>
                <pre class="citation-block">
@misc{openhandsindex2025,
    title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
    author={OpenHands Team},
    year={2025},
    howpublished={https://index.openhands.dev}
}</pre>
                """
            )