File size: 6,186 Bytes
085a012
 
 
0b126a8
085a012
 
c14a283
 
 
 
 
085a012
 
 
 
 
 
 
a4b9436
 
 
 
 
d731ad4
 
085a012
 
 
 
0b126a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
085a012
6252817
 
 
 
 
 
 
 
 
 
b8aea20
0b126a8
6252817
 
 
 
 
 
 
 
0b126a8
6252817
 
0b126a8
6252817
 
 
 
 
0b126a8
6252817
0b126a8
6252817
 
 
 
0b126a8
6252817
 
 
 
0b126a8
6252817
 
0b126a8
6252817
 
 
0b126a8
6252817
 
 
 
0b126a8
6252817
0b126a8
6252817
 
 
0b126a8
6252817
 
 
 
0b126a8
6252817
 
 
 
 
0b126a8
6252817
 
 
0b126a8
6252817
0b126a8
6252817
 
0b126a8
6252817
 
d731ad4
6252817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b126a8
6252817
 
085a012
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import matplotlib
matplotlib.use('Agg')
import gradio as gr
import pandas as pd


from ui_components import (
    create_leaderboard_display,
    get_full_leaderboard_data,
    create_winners_by_category_html,
)

from content import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    INTRO_PARAGRAPH
)

from visualizations import (
    create_evolution_over_time_chart,
    create_accuracy_by_size_chart
)

from constants import MARK_BY_DEFAULT

# --- Global State for Viewers (simple caching) ---
CACHED_VIEWERS = {}
CACHED_TAG_MAPS = {}


def filter_complete_entries(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty:
        return df.copy()

    category_score_columns = [
        'Issue Resolution Score',
        'Frontend Score',
        'Greenfield Score',
        'Testing Score',
        'Information Gathering Score',
    ]

    if all(column in df.columns for column in category_score_columns):
        return df[df[category_score_columns].notna().all(axis=1)].copy()

    if 'Categories Completed' in df.columns:
        categories_completed = pd.to_numeric(df['Categories Completed'], errors='coerce')
        return df[categories_completed >= 5].copy()

    if 'Categories Attempted' in df.columns:
        return df[df['Categories Attempted'] == '5/5'].copy()

    return df.copy()


def build_page():
    with gr.Column(elem_id="page-content-wrapper"):
        with gr.Row(elem_id="intro-row"):
            with gr.Column(scale=1):
                gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph")

        # --- Leaderboard Display Section ---
        CATEGORY_NAME = "Overall"
        gr.HTML(
            f'<h2>OpenHands Index {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>',
            elem_id="main-header",
        )

        test_df, test_tag_map = get_full_leaderboard_data("test")
        if not test_df.empty:
            show_incomplete_checkbox, show_open_only_checkbox, mark_by_dropdown = create_leaderboard_display(
                full_df=test_df,
                tag_map=test_tag_map,
                category_name=CATEGORY_NAME,
                split_name="test",
            )

            test_df_complete = filter_complete_entries(test_df)
            has_complete_entries = len(test_df_complete) > 0

            if "Openness" in test_df.columns:
                test_df_open = test_df[test_df["Openness"].str.lower() == "open"].copy()
            else:
                test_df_open = test_df.copy()
            test_df_complete_open = filter_complete_entries(test_df_open)

            initial_df = test_df_complete if has_complete_entries else test_df

            # --- Winners by Category Section ---
            gr.Markdown("---")
            gr.HTML('<h2>Winners by Category</h2>', elem_id="winners-header")
            gr.Markdown("Top 5 performing systems in each benchmark category.")

            winners_component = gr.HTML(
                create_winners_by_category_html(initial_df, top_n=5),
                elem_id="winners-by-category",
            )

            # --- New Visualization Sections ---
            gr.Markdown("---")

            # Evolution Over Time Section
            gr.HTML('<h2>Evolution Over Time</h2>', elem_id="evolution-header")
            gr.Markdown("Track how model performance has improved over time based on release dates.")

            evolution_component = gr.Plot(
                value=create_evolution_over_time_chart(initial_df, MARK_BY_DEFAULT),
                elem_id="evolution-chart",
            )

            gr.Markdown("---")

            # Open Model Accuracy by Size Section (always shows open models only by design)
            gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
            gr.Markdown("Compare open-weights model performance against their parameter count.")

            size_component = gr.Plot(
                value=create_accuracy_by_size_chart(initial_df, MARK_BY_DEFAULT),
                elem_id="size-accuracy-chart",
            )

            def update_extra_sections(show_incomplete, show_open_only, mark_by):
                include_incomplete = show_incomplete or not has_complete_entries
                base_df = test_df if include_incomplete else test_df_complete
                base_df_open = test_df_open if include_incomplete else test_df_complete_open
                winners_df = base_df_open if show_open_only else base_df

                winners_html = create_winners_by_category_html(winners_df, top_n=5)
                evolution_fig = create_evolution_over_time_chart(winners_df, mark_by)
                size_fig = create_accuracy_by_size_chart(base_df, mark_by)

                return winners_html, evolution_fig, size_fig

            show_incomplete_input = (
                show_incomplete_checkbox if show_incomplete_checkbox is not None else gr.State(value=True)
            )
            show_open_only_input = (
                show_open_only_checkbox if show_open_only_checkbox is not None else gr.State(value=False)
            )
            extra_section_inputs = [show_incomplete_input, show_open_only_input, mark_by_dropdown]

            if show_incomplete_checkbox is not None:
                show_incomplete_checkbox.change(
                    fn=update_extra_sections,
                    inputs=extra_section_inputs,
                    outputs=[winners_component, evolution_component, size_component],
                )

            if show_open_only_checkbox is not None:
                show_open_only_checkbox.change(
                    fn=update_extra_sections,
                    inputs=extra_section_inputs,
                    outputs=[winners_component, evolution_component, size_component],
                )

            if mark_by_dropdown is not None:
                mark_by_dropdown.change(
                    fn=update_extra_sections,
                    inputs=extra_section_inputs,
                    outputs=[winners_component, evolution_component, size_component],
                )

        else:
            gr.Markdown("No data available.")

if __name__ == "__main__":
    demo.launch()