IgorSlinko commited on
Commit
f81e3b1
ยท
1 Parent(s): 403adb5

Rename and reorder chart sections, fix metadata ONE TRAJECTORY display (v0.3.39)

Browse files

- Rename all chart sections with clearer naming convention
- Reorder: REPORTED sections first, then CALCULATED sections
- Open Leaderboard data by default, close REPORTED [AGGREGATED ALL]
- Add routing explanation text under button and in routing accordion
- Fix metadata ONE TRAJECTORY charts not showing on first load

Files changed (1) hide show
  1. app.py +26 -17
app.py CHANGED
@@ -1839,7 +1839,7 @@ def build_app():
1839
  """)
1840
  trajectories_state = gr.State(None)
1841
 
1842
- gr.Markdown("# ๐Ÿงฎ SWE-bench Bash-Only Leaderboard `v0.3.38`")
1843
  gr.Markdown("## ๐ŸŽฏ Select a base model for cost analysis (click a row)")
1844
 
1845
  with gr.Row():
@@ -1860,37 +1860,37 @@ def build_app():
1860
  plot_steps = gr.Plot(label="Distribution of API Calls (Steps) per Trajectory")
1861
  plot_cost = gr.Plot(label="Distribution of Cost Reported by Leaderboard ($)")
1862
 
1863
- with gr.Accordion("Metadata from .traj", open=True):
1864
  with gr.Row():
1865
  plot_tokens_meta = gr.Plot(label="Total Tokens by Type")
1866
  plot_tokens_cost_meta = gr.Plot(label="Total Cost by Token Type ($)")
1867
 
1868
- with gr.Accordion("Metadata from .traj by trajectory", open=False):
1869
  with gr.Row():
1870
  plot_stacked_meta = gr.Plot(label="Tokens per Trajectory (stacked)")
1871
  with gr.Row():
1872
  plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
1873
 
1874
- with gr.Accordion("Calculated from .traj messages", open=True):
 
 
 
 
 
 
 
 
1875
  with gr.Row():
1876
  plot_tokens_calc = gr.Plot(label="Total Tokens by Type")
1877
  plot_tokens_cost_calc = gr.Plot(label="Total Cost by Token Type ($)")
1878
 
1879
- with gr.Accordion("Calculated from .traj messages by trajectory", open=False):
1880
  with gr.Row():
1881
  plot_stacked_calc = gr.Plot(label="Tokens per Trajectory (stacked)")
1882
  with gr.Row():
1883
  plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
1884
 
1885
- with gr.Accordion("One trajectory statistics. Metadata from .traj", open=False, visible=False) as single_traj_meta_accordion:
1886
- with gr.Row():
1887
- single_traj_meta_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
1888
- with gr.Row():
1889
- single_traj_meta_plot = gr.Plot(label="Tokens per Step (stacked)")
1890
- with gr.Row():
1891
- single_traj_meta_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
1892
-
1893
- with gr.Accordion("One trajectory statistics. Calculated from .traj messages", open=False, visible=False) as single_traj_accordion:
1894
  with gr.Row():
1895
  single_traj_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
1896
  with gr.Row():
@@ -1898,7 +1898,8 @@ def build_app():
1898
  with gr.Row():
1899
  single_traj_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
1900
 
1901
- with gr.Accordion("Calculated with routing", open=True, visible=False) as routing_plots_row:
 
1902
  with gr.Row():
1903
  routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
1904
  routing_cost_plot = gr.Plot(label="Cost by Type (per Model) ($)")
@@ -2087,6 +2088,7 @@ def build_app():
2087
 
2088
  gr.Markdown("---")
2089
  route_btn = gr.Button("๐Ÿš€ Let's ROUTE!!", variant="primary", size="lg", interactive=False)
 
2090
  routing_result = gr.Markdown(visible=False)
2091
 
2092
 
@@ -2862,6 +2864,13 @@ def build_app():
2862
  first_meta_issue = meta_issue_ids[0] if meta_issue_ids else None
2863
  has_meta_steps = len(meta_issue_ids) > 0
2864
 
 
 
 
 
 
 
 
2865
  progress(1, desc="Done")
2866
  yield (
2867
  f"โœ… Loaded {len(df_meta)} trajectories",
@@ -2877,8 +2886,8 @@ def build_app():
2877
  gr.update(),
2878
  gr.update(visible=has_meta_steps),
2879
  gr.update(choices=meta_issue_ids, value=first_meta_issue),
2880
- gr.update(),
2881
- gr.update(),
2882
  )
2883
 
2884
  def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):
 
1839
  """)
1840
  trajectories_state = gr.State(None)
1841
 
1842
+ gr.Markdown("# ๐Ÿงฎ SWE-bench Bash-Only Leaderboard `v0.3.39`")
1843
  gr.Markdown("## ๐ŸŽฏ Select a base model for cost analysis (click a row)")
1844
 
1845
  with gr.Row():
 
1860
  plot_steps = gr.Plot(label="Distribution of API Calls (Steps) per Trajectory")
1861
  plot_cost = gr.Plot(label="Distribution of Cost Reported by Leaderboard ($)")
1862
 
1863
+ with gr.Accordion("Token counts REPORTED in the metadata of .traj files [AGGREGATED ALL]", open=False):
1864
  with gr.Row():
1865
  plot_tokens_meta = gr.Plot(label="Total Tokens by Type")
1866
  plot_tokens_cost_meta = gr.Plot(label="Total Cost by Token Type ($)")
1867
 
1868
+ with gr.Accordion("Token counts REPORTED in the metadata of .traj files [AGGREGATED BY TRAJECTORY]", open=False):
1869
  with gr.Row():
1870
  plot_stacked_meta = gr.Plot(label="Tokens per Trajectory (stacked)")
1871
  with gr.Row():
1872
  plot_cost_breakdown_meta = gr.Plot(label="Cost per Trajectory")
1873
 
1874
+ with gr.Accordion("Token counts REPORTED in the metadata of .traj files [ONE TRAJECTORY]", open=False, visible=False) as single_traj_meta_accordion:
1875
+ with gr.Row():
1876
+ single_traj_meta_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
1877
+ with gr.Row():
1878
+ single_traj_meta_plot = gr.Plot(label="Tokens per Step (stacked)")
1879
+ with gr.Row():
1880
+ single_traj_meta_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
1881
+
1882
+ with gr.Accordion("Token counts CALCULATED from .traj files [AGGREGATED ALL]", open=False):
1883
  with gr.Row():
1884
  plot_tokens_calc = gr.Plot(label="Total Tokens by Type")
1885
  plot_tokens_cost_calc = gr.Plot(label="Total Cost by Token Type ($)")
1886
 
1887
+ with gr.Accordion("Token counts CALCULATED from .traj files [AGGREGATED BY TRAJECTORY]", open=False):
1888
  with gr.Row():
1889
  plot_stacked_calc = gr.Plot(label="Tokens per Trajectory (stacked)")
1890
  with gr.Row():
1891
  plot_cost_breakdown_calc = gr.Plot(label="Cost per Trajectory")
1892
 
1893
+ with gr.Accordion("Token counts CALCULATED from .traj files [ONE TRAJECTORY]", open=False, visible=False) as single_traj_accordion:
 
 
 
 
 
 
 
 
1894
  with gr.Row():
1895
  single_traj_dropdown = gr.Dropdown(label="Select Issue", choices=[], interactive=True)
1896
  with gr.Row():
 
1898
  with gr.Row():
1899
  single_traj_cost_plot = gr.Plot(label="Cost per Step (stacked) ($)")
1900
 
1901
+ with gr.Accordion("Token counts CALCULATED from .traj files, with ROUTING [AGGREGATED ALL]", open=True, visible=False) as routing_plots_row:
1902
+ gr.Markdown("*With routing all messages in the trajectory remain as they are, but messages that match the selected filters are assigned to selected models for routing to.*")
1903
  with gr.Row():
1904
  routing_tokens_plot = gr.Plot(label="Tokens by Type (per Model)")
1905
  routing_cost_plot = gr.Plot(label="Cost by Type (per Model) ($)")
 
2088
 
2089
  gr.Markdown("---")
2090
  route_btn = gr.Button("๐Ÿš€ Let's ROUTE!!", variant="primary", size="lg", interactive=False)
2091
+ gr.Markdown("*With routing all messages in the trajectory remain as they are, but messages that match the selected filters are assigned to selected models for routing to.*")
2092
  routing_result = gr.Markdown(visible=False)
2093
 
2094
 
 
2864
  first_meta_issue = meta_issue_ids[0] if meta_issue_ids else None
2865
  has_meta_steps = len(meta_issue_ids) > 0
2866
 
2867
+ fig_single_traj_meta = None
2868
+ fig_single_traj_meta_cost = None
2869
+ if first_meta_issue and first_meta_issue in metadata_steps:
2870
+ meta_steps = metadata_steps[first_meta_issue]
2871
+ fig_single_traj_meta = create_single_trajectory_meta_chart(meta_steps)
2872
+ fig_single_traj_meta_cost = create_single_trajectory_meta_cost_chart(meta_steps, input_price, cache_read_price, cache_creation_price, completion_price)
2873
+
2874
  progress(1, desc="Done")
2875
  yield (
2876
  f"โœ… Loaded {len(df_meta)} trajectories",
 
2886
  gr.update(),
2887
  gr.update(visible=has_meta_steps),
2888
  gr.update(choices=meta_issue_ids, value=first_meta_issue),
2889
+ fig_single_traj_meta,
2890
+ fig_single_traj_meta_cost,
2891
  )
2892
 
2893
  def on_single_traj_select(state_data, issue_id, input_price, cache_read_price, cache_creation_price, completion_price, overhead, with_cache):