Commit
ยท
f81e3b1
1
Parent(s):
403adb5
Rename and reorder chart sections, fix metadata ONE TRAJECTORY display (v0.3.39)
Browse files- Rename all chart sections with clearer naming convention
- Reorder: REPORTED sections first, then CALCULATED sections
- Open Leaderboard data by default, close REPORTED [AGGREGATED ALL]
- Add routing explanation text under button and in routing accordion
- Fix metadata ONE TRAJECTORY charts not showing on first load
app.py
CHANGED
|
@@ -1839,7 +1839,7 @@ def build_app():
|
|
| 1839 |
""")
|
| 1840 |
trajectories_state = gr.State(None)
|
| 1841 |
|
| 1842 |
-
gr.Markdown("# ๐งฎ SWE-bench Bash-Only Leaderboard `v0.3.
|
| 1843 |
gr.Markdown("## ๐ฏ Select a base model for cost analysis (click a row)")
|
| 1844 |
|
| 1845 |
with gr.Row():
|
|
@@ -1860,37 +1860,37 @@ def build_app():
|
|
| 1860 |
plot_steps = gr.Plot(label="Distribution of API Calls (Steps) per Trajectory")
|
| 1861 |
plot_cost = gr.Plot(label="Distribution of Cost Reported by Leaderboard ($)")
|
| 1862 |
|
| 1863 |
-
with gr.Accordion("
|
| 1864 |
with gr.Row():
|
| 1865 |
plot_tokens_meta = gr.Plot(label="Total Tokens by Type")
|
| 1866 |
plot_tokens_cost_meta = gr.Plot(label="Total Cost by Token Type ($)")
|
| 1867 |
|
| 1868 |
-
with gr.Accordion("
|
| 1869 |
with gr.Row():
|
| 1870 |
plot_stacked_meta = gr.Plot(label="Tokens per Trajectory (stacked)")
|
| 1871 |
with gr.Row():
|
| 1872 |
plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
|
| 1873 |
|
| 1874 |
-
with gr.Accordion("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1875 |
with gr.Row():
|
| 1876 |
plot_tokens_calc = gr.Plot(label="Total Tokens by Type")
|
| 1877 |
plot_tokens_cost_calc = gr.Plot(label="Total Cost by Token Type ($)")
|
| 1878 |
|
| 1879 |
-
with gr.Accordion("
|
| 1880 |
with gr.Row():
|
| 1881 |
plot_stacked_calc = gr.Plot(label="Tokens per Trajectory (stacked)")
|
| 1882 |
with gr.Row():
|
| 1883 |
plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
|
| 1884 |
|
| 1885 |
-
with gr.Accordion("
|
| 1886 |
-
with gr.Row():
|
| 1887 |
-
single_traj_meta_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
|
| 1888 |
-
with gr.Row():
|
| 1889 |
-
single_traj_meta_plot = gr.Plot(label="Tokens per Step (stacked)")
|
| 1890 |
-
with gr.Row():
|
| 1891 |
-
single_traj_meta_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
|
| 1892 |
-
|
| 1893 |
-
with gr.Accordion("One trajectory statistics. Calculated from .traj messages", open=False, visible=False) as single_traj_accordion:
|
| 1894 |
with gr.Row():
|
| 1895 |
single_traj_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
|
| 1896 |
with gr.Row():
|
|
@@ -1898,7 +1898,8 @@ def build_app():
|
|
| 1898 |
with gr.Row():
|
| 1899 |
single_traj_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
|
| 1900 |
|
| 1901 |
-
with gr.Accordion("
|
|
|
|
| 1902 |
with gr.Row():
|
| 1903 |
routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
|
| 1904 |
routing_cost_plot = gr.Plot(label="Cost by Type (per Model) ($)")
|
|
@@ -2087,6 +2088,7 @@ def build_app():
|
|
| 2087 |
|
| 2088 |
gr.Markdown("---")
|
| 2089 |
route_btn = gr.Button("๐ Let's ROUTE!!", variant="primary", size="lg", interactive=False)
|
|
|
|
| 2090 |
routing_result = gr.Markdown(visible=False)
|
| 2091 |
|
| 2092 |
|
|
@@ -2862,6 +2864,13 @@ def build_app():
|
|
| 2862 |
first_meta_issue = meta_issue_ids[0] if meta_issue_ids else None
|
| 2863 |
has_meta_steps = len(meta_issue_ids) > 0
|
| 2864 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2865 |
progress(1, desc="Done")
|
| 2866 |
yield (
|
| 2867 |
f"โ
Loaded {len(df_meta)} trajectories",
|
|
@@ -2877,8 +2886,8 @@ def build_app():
|
|
| 2877 |
gr.update(),
|
| 2878 |
gr.update(visible=has_meta_steps),
|
| 2879 |
gr.update(choices=meta_issue_ids, value=first_meta_issue),
|
| 2880 |
-
|
| 2881 |
-
|
| 2882 |
)
|
| 2883 |
|
| 2884 |
def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
|
|
|
|
| 1839 |
""")
|
| 1840 |
trajectories_state = gr.State(None)
|
| 1841 |
|
| 1842 |
+
gr.Markdown("# ๐งฎ SWE-bench Bash-Only Leaderboard `v0.3.39`")
|
| 1843 |
gr.Markdown("## ๐ฏ Select a base model for cost analysis (click a row)")
|
| 1844 |
|
| 1845 |
with gr.Row():
|
|
|
|
| 1860 |
plot_steps = gr.Plot(label="Distribution of API Calls (Steps) per Trajectory")
|
| 1861 |
plot_cost = gr.Plot(label="Distribution of Cost Reported by Leaderboard ($)")
|
| 1862 |
|
| 1863 |
+
with gr.Accordion("Token counts REPORTED in the metadata of .traj files [AGGREGATED ALL]", open=False):
|
| 1864 |
with gr.Row():
|
| 1865 |
plot_tokens_meta = gr.Plot(label="Total Tokens by Type")
|
| 1866 |
plot_tokens_cost_meta = gr.Plot(label="Total Cost by Token Type ($)")
|
| 1867 |
|
| 1868 |
+
with gr.Accordion("Token counts REPORTED in the metadata of .traj files [AGGREGATED BY TRAJECTORY]", open=False):
|
| 1869 |
with gr.Row():
|
| 1870 |
plot_stacked_meta = gr.Plot(label="Tokens per Trajectory (stacked)")
|
| 1871 |
with gr.Row():
|
| 1872 |
plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
|
| 1873 |
|
| 1874 |
+
with gr.Accordion("Token counts REPORTED in the metadata of .traj files [ONE TRAJECTORY]", open=False, visible=False) as single_traj_meta_accordion:
|
| 1875 |
+
with gr.Row():
|
| 1876 |
+
single_traj_meta_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
|
| 1877 |
+
with gr.Row():
|
| 1878 |
+
single_traj_meta_plot = gr.Plot(label="Tokens per Step (stacked)")
|
| 1879 |
+
with gr.Row():
|
| 1880 |
+
single_traj_meta_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
|
| 1881 |
+
|
| 1882 |
+
with gr.Accordion("Token counts CALCULATED from .traj files [AGGREGATED ALL]", open=False):
|
| 1883 |
with gr.Row():
|
| 1884 |
plot_tokens_calc = gr.Plot(label="Total Tokens by Type")
|
| 1885 |
plot_tokens_cost_calc = gr.Plot(label="Total Cost by Token Type ($)")
|
| 1886 |
|
| 1887 |
+
with gr.Accordion("Token counts CALCULATED from .traj files [AGGREGATED BY TRAJECTORY]", open=False):
|
| 1888 |
with gr.Row():
|
| 1889 |
plot_stacked_calc = gr.Plot(label="Tokens per Trajectory (stacked)")
|
| 1890 |
with gr.Row():
|
| 1891 |
plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
|
| 1892 |
|
| 1893 |
+
with gr.Accordion("Token counts CALCULATED from .traj files [ONE TRAJECTORY]", open=False, visible=False) as single_traj_accordion:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1894 |
with gr.Row():
|
| 1895 |
single_traj_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
|
| 1896 |
with gr.Row():
|
|
|
|
| 1898 |
with gr.Row():
|
| 1899 |
single_traj_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
|
| 1900 |
|
| 1901 |
+
with gr.Accordion("Token counts CALCULATED from .traj files, with ROUTING [AGGREGATED ALL]", open=True, visible=False) as routing_plots_row:
|
| 1902 |
+
gr.Markdown("*With routing all messages in the trajectory remain as they are, but messages that match the selected filters are assigned to selected models for routing to.*")
|
| 1903 |
with gr.Row():
|
| 1904 |
routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
|
| 1905 |
routing_cost_plot = gr.Plot(label="Cost by Type (per Model) ($)")
|
|
|
|
| 2088 |
|
| 2089 |
gr.Markdown("---")
|
| 2090 |
route_btn = gr.Button("๐ Let's ROUTE!!", variant="primary", size="lg", interactive=False)
|
| 2091 |
+
gr.Markdown("*With routing all messages in the trajectory remain as they are, but messages that match the selected filters are assigned to selected models for routing to.*")
|
| 2092 |
routing_result = gr.Markdown(visible=False)
|
| 2093 |
|
| 2094 |
|
|
|
|
| 2864 |
first_meta_issue = meta_issue_ids[0] if meta_issue_ids else None
|
| 2865 |
has_meta_steps = len(meta_issue_ids) > 0
|
| 2866 |
|
| 2867 |
+
fig_single_traj_meta = None
|
| 2868 |
+
fig_single_traj_meta_cost = None
|
| 2869 |
+
if first_meta_issue and first_meta_issue in metadata_steps:
|
| 2870 |
+
meta_steps = metadata_steps[first_meta_issue]
|
| 2871 |
+
fig_single_traj_meta = create_single_trajectory_meta_chart(meta_steps)
|
| 2872 |
+
fig_single_traj_meta_cost = create_single_trajectory_meta_cost_chart(meta_steps, input_price, cache_read_price, cache_creation_price, completion_price)
|
| 2873 |
+
|
| 2874 |
progress(1, desc="Done")
|
| 2875 |
yield (
|
| 2876 |
f"โ
Loaded {len(df_meta)} trajectories",
|
|
|
|
| 2886 |
gr.update(),
|
| 2887 |
gr.update(visible=has_meta_steps),
|
| 2888 |
gr.update(choices=meta_issue_ids, value=first_meta_issue),
|
| 2889 |
+
fig_single_traj_meta,
|
| 2890 |
+
fig_single_traj_meta_cost,
|
| 2891 |
)
|
| 2892 |
|
| 2893 |
def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
|