Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Sleeping

IgorSlinko commited on 10 days ago

Commit

7e7e3a1

1 Parent(s): 781ed01

Add trajectory analysis with cost breakdown

- Add 6 analysis plots: API calls, cost distribution, token usage, cost by token type, billable tokens per instance, cost breakdown per instance
- Load token prices from litellm model_prices_and_context_window.json
- Show ✅/❌ indicators for auto-loaded vs manual price fields
- Move analysis section under leaderboard table in left column
- Add tight margins to Plotly charts for better layout
- Use gr.State for folder storage instead of hidden textbox

Files changed (3) hide show

app.py +487 -19
pyproject.toml +1 -0
uv.lock +24 -0

app.py CHANGED Viewed

@@ -5,13 +5,71 @@ from pathlib import Path
 import gradio as gr
 import pandas as pd
-from src.download_swebench_leaderboard import download_leaderboard, get_leaderboard
 DATA_DIR = Path("data")
 TRAJS_DIR = DATA_DIR / "swebench_trajs"
 LEADERBOARD_CACHE = DATA_DIR / "swebench_leaderboard_latest.json"
 S3_BUCKET = "s3://swe-bench-experiments/bash-only"
 def load_or_download_leaderboard():
@@ -46,8 +104,7 @@ def get_bash_only_df():
             "os_system": "✅" if r.get("os_system") else "❌",
         })
-    df = pd.DataFrame(rows)
-    return df
 def get_model_details(folder: str):
@@ -68,18 +125,27 @@ def get_model_details(folder: str):
     return model, None
 def download_trajectories_from_s3(folder: str, progress=gr.Progress()):
     if not folder:
-        return "❌ No model selected"
     model, error = get_model_details(folder)
     if error:
-        return f"❌ {error}"
     output_dir = TRAJS_DIR / folder
     if output_dir.exists() and any(output_dir.iterdir()):
         file_count = len(list(output_dir.glob("*/*.traj.json")))
-        return f"✅ Already downloaded: {output_dir}\n\n{file_count} trajectory files"
     s3_path = f"{S3_BUCKET}/{folder}/trajs/"
     output_dir.mkdir(parents=True, exist_ok=True)
@@ -95,7 +161,7 @@ def download_trajectories_from_s3(folder: str, progress=gr.Progress()):
         )
         if result.returncode != 0:
-            return f"❌ S3 download failed:\n{result.stderr}"
         file_count = len(list(output_dir.glob("*/*.traj.json")))
         if file_count == 0:
@@ -105,62 +171,464 @@ def download_trajectories_from_s3(folder: str, progress=gr.Progress()):
         resolved_count = sum(1 for v in per_instance.values() if v.get("resolved"))
         total_count = len(per_instance)
-        return f"✅ Downloaded to {output_dir}\n\n{file_count} trajectory files\nResolved: {resolved_count}/{total_count} ({100*resolved_count/total_count:.1f}%)"
     except subprocess.TimeoutExpired:
-        return "❌ Download timed out (>10 min)"
     except FileNotFoundError:
-        return "❌ AWS CLI not found. Install with: pip install awscli"
     except Exception as e:
-        return f"❌ Error: {e}"
 def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
     if evt.index is None:
-        return "", "", gr.update()
     row_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     row = df.iloc[row_idx]
     folder = row["folder"]
     name = row["name"]
-    return folder, name, gr.update(interactive=True)
 def build_app():
-    df = get_bash_only_df()
     with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
         gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard")
         gr.Markdown("Select a model to use as base for cost analysis")
         with gr.Row():
             with gr.Column(scale=3):
                 leaderboard_table = gr.Dataframe(
-                    value=df,
                     label="Bash-Only Leaderboard",
                     interactive=False,
                     wrap=True,
                 )
             with gr.Column(scale=1):
                 gr.Markdown("### Selected Model")
                 selected_name = gr.Textbox(label="Model Name", interactive=False)
-                selected_folder = gr.Textbox(label="Folder ID", interactive=False)
                 download_btn = gr.Button("📥 Download Trajectories", interactive=False)
                 download_status = gr.Textbox(label="Status", interactive=False, lines=3)
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],
-            outputs=[selected_folder, selected_name, download_btn],
         )
         download_btn.click(
             fn=download_trajectories_from_s3,
             inputs=[selected_folder],
-            outputs=[download_status],
         )
     return app
@@ -168,5 +636,5 @@ def build_app():
 if __name__ == "__main__":
     app = build_app()
     app.launch()

 import gradio as gr
 import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import requests
+from src.download_swebench_leaderboard import download_leaderboard
 DATA_DIR = Path("data")
 TRAJS_DIR = DATA_DIR / "swebench_trajs"
 LEADERBOARD_CACHE = DATA_DIR / "swebench_leaderboard_latest.json"
+LITELLM_PRICES_CACHE = DATA_DIR / "litellm_prices.json"
 S3_BUCKET = "s3://swe-bench-experiments/bash-only"
+LITELLM_PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
+_litellm_prices_cache = None
+_trajectories_cache = {}
+def get_litellm_prices() -> dict:
+    global _litellm_prices_cache
+    if _litellm_prices_cache is not None:
+        return _litellm_prices_cache
+    if LITELLM_PRICES_CACHE.exists():
+        with open(LITELLM_PRICES_CACHE) as f:
+            _litellm_prices_cache = json.load(f)
+            return _litellm_prices_cache
+    try:
+        response = requests.get(LITELLM_PRICES_URL, timeout=30)
+        response.raise_for_status()
+        _litellm_prices_cache = response.json()
+        DATA_DIR.mkdir(exist_ok=True)
+        with open(LITELLM_PRICES_CACHE, "w") as f:
+            json.dump(_litellm_prices_cache, f)
+    except Exception:
+        _litellm_prices_cache = {}
+    return _litellm_prices_cache
+def get_model_prices(model_name: str) -> dict | None:
+    if not model_name:
+        return None
+    prices = get_litellm_prices()
+    clean_name = model_name.replace("anthropic/", "").replace("openai/", "")
+    candidates = [
+        model_name,
+        clean_name,
+        f"anthropic/{clean_name}",
+        f"openai/{clean_name}",
+    ]
+    for key in candidates:
+        if key in prices:
+            return prices[key]
+    for key, value in prices.items():
+        if clean_name in key or model_name in key:
+            return value
+    return None
 def load_or_download_leaderboard():
             "os_system": "✅" if r.get("os_system") else "❌",
         })
+    return pd.DataFrame(rows)
 def get_model_details(folder: str):
     return model, None
+def check_trajectories_downloaded(folder: str) -> bool:
+    if not folder:
+        return False
+    output_dir = TRAJS_DIR / folder
+    return output_dir.exists() and any(output_dir.iterdir())
 def download_trajectories_from_s3(folder: str, progress=gr.Progress()):
     if not folder:
+        return "❌ No model selected", gr.update(visible=False)
     model, error = get_model_details(folder)
     if error:
+        return f"❌ {error}", gr.update(visible=False)
     output_dir = TRAJS_DIR / folder
     if output_dir.exists() and any(output_dir.iterdir()):
         file_count = len(list(output_dir.glob("*/*.traj.json")))
+        if file_count == 0:
+            file_count = len(list(output_dir.glob("*.json")))
+        return f"✅ Already downloaded: {output_dir}\n\n{file_count} trajectory files", gr.update(visible=True)
     s3_path = f"{S3_BUCKET}/{folder}/trajs/"
     output_dir.mkdir(parents=True, exist_ok=True)
         )
         if result.returncode != 0:
+            return f"❌ S3 download failed:\n{result.stderr}", gr.update(visible=False)
         file_count = len(list(output_dir.glob("*/*.traj.json")))
         if file_count == 0:
         resolved_count = sum(1 for v in per_instance.values() if v.get("resolved"))
         total_count = len(per_instance)
+        status = f"✅ Downloaded to {output_dir}\n\n{file_count} trajectory files\nResolved: {resolved_count}/{total_count} ({100*resolved_count/total_count:.1f}%)"
+        return status, gr.update(visible=True)
     except subprocess.TimeoutExpired:
+        return "❌ Download timed out (>10 min)", gr.update(visible=False)
     except FileNotFoundError:
+        return "❌ AWS CLI not found. Install with: pip install awscli", gr.update(visible=False)
     except Exception as e:
+        return f"❌ Error: {e}", gr.update(visible=False)
+def parse_trajectory(traj_path: Path) -> dict:
+    with open(traj_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    info = data.get("info", {})
+    model_stats = info.get("model_stats", {})
+    config = info.get("config", {})
+    model_config = config.get("model", {})
+    model_name = model_config.get("cost_calc_model_override", model_config.get("model_name", ""))
+    result = {
+        "instance_id": data.get("instance_id", traj_path.stem),
+        "model_name": model_name,
+        "api_calls": model_stats.get("api_calls", 0),
+        "instance_cost": model_stats.get("instance_cost", 0),
+        "prompt_tokens": 0,
+        "completion_tokens": 0,
+        "total_tokens": 0,
+        "cache_read_tokens": 0,
+        "cache_creation_tokens": 0,
+    }
+    messages = data.get("messages", [])
+    for msg in messages:
+        usage = None
+        if "usage" in msg:
+            usage = msg["usage"]
+        elif "extra" in msg and isinstance(msg["extra"], dict):
+            response = msg["extra"].get("response", {})
+            if isinstance(response, dict):
+                usage = response.get("usage", {})
+        if usage:
+            result["prompt_tokens"] += usage.get("prompt_tokens", 0) or 0
+            result["completion_tokens"] += usage.get("completion_tokens", 0) or 0
+            result["total_tokens"] += usage.get("total_tokens", 0) or 0
+            result["cache_read_tokens"] += usage.get("cache_read_input_tokens", 0) or 0
+            result["cache_creation_tokens"] += usage.get("cache_creation_input_tokens", 0) or 0
+    return result
+def load_all_trajectories(folder: str) -> pd.DataFrame:
+    global _trajectories_cache
+    if folder in _trajectories_cache:
+        return _trajectories_cache[folder]
+    output_dir = TRAJS_DIR / folder
+    traj_files = list(output_dir.glob("*/*.traj.json"))
+    if not traj_files:
+        traj_files = list(output_dir.glob("*.traj.json"))
+    if not traj_files:
+        traj_files = list(output_dir.glob("*.json"))
+    rows = []
+    for traj_path in traj_files:
+        try:
+            rows.append(parse_trajectory(traj_path))
+        except Exception as e:
+            print(f"Error parsing {traj_path}: {e}")
+    df = pd.DataFrame(rows)
+    _trajectories_cache[folder] = df
+    return df
+def create_basic_histograms(df: pd.DataFrame, cache_read_price: float, cache_creation_price: float, completion_price: float):
+    if df.empty:
+        return None, None, None, None, None
+    fig_steps = px.histogram(
+        df,
+        x="api_calls",
+        nbins=30,
+        title="Distribution of API Calls (Steps) per Instance",
+        color_discrete_sequence=["#636EFA"],
+    )
+    fig_steps.update_layout(
+        xaxis_title="API Calls (Steps)",
+        yaxis_title="Number of Instances",
+        showlegend=False,
+        margin=dict(l=40, r=20, t=40, b=40),
+    )
+    fig_steps.add_annotation(
+        text=f"Mean: {df['api_calls'].mean():.1f} | Median: {df['api_calls'].median():.0f}",
+        xref="paper", yref="paper",
+        x=0.95, y=0.95, showarrow=False,
+        font=dict(size=12),
+    )
+    fig_cost = px.histogram(
+        df,
+        x="instance_cost",
+        nbins=30,
+        title="Distribution of Cost per Instance ($)",
+        color_discrete_sequence=["#00CC96"],
+    )
+    fig_cost.update_layout(
+        xaxis_title="Cost ($)",
+        yaxis_title="Number of Instances",
+        showlegend=False,
+        margin=dict(l=40, r=20, t=40, b=40),
+    )
+    fig_cost.add_annotation(
+        text=f"Mean: ${df['instance_cost'].mean():.4f} | Total: ${df['instance_cost'].sum():.2f}",
+        xref="paper", yref="paper",
+        x=0.95, y=0.95, showarrow=False,
+        font=dict(size=12),
+    )
+    total_prompt = df["prompt_tokens"].sum()
+    total_completion = df["completion_tokens"].sum()
+    total_cache_read = df["cache_read_tokens"].sum()
+    total_cache_creation = df["cache_creation_tokens"].sum()
+    token_data = pd.DataFrame({
+        "Token Type": ["Prompt", "Completion", "Cache Read", "Cache Creation"],
+        "Total Tokens": [total_prompt, total_completion, total_cache_read, total_cache_creation],
+    })
+    fig_tokens = px.bar(
+        token_data,
+        x="Token Type",
+        y="Total Tokens",
+        title="Total Tokens by Type",
+        color="Token Type",
+        color_discrete_sequence=["#EF553B", "#AB63FA", "#19D3F3", "#FFA15A"],
+    )
+    fig_tokens.update_layout(
+        xaxis_title="Token Type",
+        yaxis_title="Total Tokens",
+        showlegend=False,
+        margin=dict(l=40, r=20, t=40, b=40),
+    )
+    total_all = token_data["Total Tokens"].sum()
+    fig_tokens.add_annotation(
+        text=f"Total: {total_all:,.0f}",
+        xref="paper", yref="paper",
+        x=0.95, y=0.95, showarrow=False,
+        font=dict(size=12),
+    )
+    # Cost by token type (prompt tokens not billed separately, included in cache)
+    cost_completion = total_completion * completion_price / 1e6
+    cost_cache_read = total_cache_read * cache_read_price / 1e6
+    cost_cache_creation = total_cache_creation * cache_creation_price / 1e6
+    cost_data = pd.DataFrame({
+        "Token Type": ["Completion", "Cache Read", "Cache Creation"],
+        "Cost ($)": [cost_completion, cost_cache_read, cost_cache_creation],
+    })
+    fig_tokens_cost = px.bar(
+        cost_data,
+        x="Token Type",
+        y="Cost ($)",
+        title="Total Cost by Token Type ($)",
+        color="Token Type",
+        color_discrete_sequence=["#AB63FA", "#19D3F3", "#FFA15A"],
+    )
+    fig_tokens_cost.update_layout(
+        xaxis_title="Token Type",
+        yaxis_title="Cost ($)",
+        showlegend=False,
+        margin=dict(l=40, r=20, t=40, b=40),
+    )
+    total_cost = cost_completion + cost_cache_read + cost_cache_creation
+    fig_tokens_cost.add_annotation(
+        text=f"Total: ${total_cost:.2f}",
+        xref="paper", yref="paper",
+        x=0.95, y=0.95, showarrow=False,
+        font=dict(size=12),
+    )
+    df_sorted = df.sort_values("cache_read_tokens", ascending=False).reset_index(drop=True)
+    df_sorted["instance_idx"] = range(len(df_sorted))
+    fig_stacked = go.Figure()
+    fig_stacked.add_trace(go.Bar(
+        name="Cache Read",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["cache_read_tokens"],
+        marker_color="#19D3F3",
+        hovertemplate="Instance: %{x}<br>Cache Read: %{y:,.0f}<extra></extra>",
+    ))
+    fig_stacked.add_trace(go.Bar(
+        name="Cache Creation",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["cache_creation_tokens"],
+        marker_color="#FFA15A",
+        hovertemplate="Instance: %{x}<br>Cache Creation: %{y:,.0f}<extra></extra>",
+    ))
+    fig_stacked.add_trace(go.Bar(
+        name="Completion",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["completion_tokens"],
+        marker_color="#AB63FA",
+        hovertemplate="Instance: %{x}<br>Completion: %{y:,.0f}<extra></extra>",
+    ))
+    fig_stacked.update_layout(
+        barmode="stack",
+        title="Billable Tokens per Instance (stacked)",
+        xaxis_title="Instance (sorted by cache read)",
+        yaxis_title="Tokens",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        margin=dict(l=50, r=20, t=60, b=40),
+    )
+    return fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked
+def create_cost_breakdown(df: pd.DataFrame, cache_read_price: float, cache_creation_price: float, completion_price: float):
+    if df.empty:
+        return None
+    df_sorted = df.sort_values("cache_read_tokens", ascending=False).reset_index(drop=True)
+    df_sorted["instance_idx"] = range(len(df_sorted))
+    df_sorted["cost_cache_read"] = df_sorted["cache_read_tokens"] * cache_read_price / 1e6
+    df_sorted["cost_cache_creation"] = df_sorted["cache_creation_tokens"] * cache_creation_price / 1e6
+    df_sorted["cost_completion"] = df_sorted["completion_tokens"] * completion_price / 1e6
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        name=f"Cache Read (${cache_read_price:.2f}/1M)",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["cost_cache_read"],
+        marker_color="#19D3F3",
+        hovertemplate="Instance: %{x}<br>Cost: $%{y:.4f}<extra></extra>",
+    ))
+    fig.add_trace(go.Bar(
+        name=f"Cache Creation (${cache_creation_price:.2f}/1M)",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["cost_cache_creation"],
+        marker_color="#FFA15A",
+        hovertemplate="Instance: %{x}<br>Cost: $%{y:.4f}<extra></extra>",
+    ))
+    fig.add_trace(go.Bar(
+        name=f"Completion (${completion_price:.2f}/1M)",
+        x=df_sorted["instance_idx"],
+        y=df_sorted["cost_completion"],
+        marker_color="#AB63FA",
+        hovertemplate="Instance: %{x}<br>Cost: $%{y:.4f}<extra></extra>",
+    ))
+    total_cost = (
+        df_sorted["cost_cache_read"].sum() +
+        df_sorted["cost_cache_creation"].sum() +
+        df_sorted["cost_completion"].sum()
+    )
+    fig.update_layout(
+        barmode="stack",
+        title="Cost Breakdown per Instance",
+        xaxis_title="Instance (sorted by cache read)",
+        yaxis_title="Cost ($)",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        margin=dict(l=50, r=20, t=60, b=40),
+    )
+    fig.add_annotation(
+        text=f"Total: ${total_cost:.2f}",
+        xref="paper", yref="paper",
+        x=0.95, y=0.95, showarrow=False,
+        font=dict(size=14),
+        bgcolor="white",
+    )
+    return fig
+def extract_model_from_folder(folder: str) -> str:
+    """Extract model name from folder like '20251124_mini-v1.16.0_claude-opus-4-5-20251101'"""
+    if not folder:
+        return ""
+    parts = folder.split("_")
+    if len(parts) >= 3:
+        return "_".join(parts[2:])
+    return folder
+def get_prices_for_folder(folder: str) -> tuple[float, float, float, str]:
+    """Get prices from litellm based on folder name. Returns (cache_read, cache_creation, completion, model_name)"""
+    model_hint = extract_model_from_folder(folder)
+    if not model_hint:
+        return 0, 0, 0, ""
+    prices = get_model_prices(model_hint)
+    if prices:
+        cache_read = prices.get("cache_read_input_token_cost", 0) * 1e6
+        cache_creation = prices.get("cache_creation_input_token_cost", 0) * 1e6
+        completion = prices.get("output_cost_per_token", 0) * 1e6
+        return cache_read, cache_creation, completion, model_hint
+    return 0, 0, 0, model_hint
 def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
     if evt.index is None:
+        return (
+            "", "",
+            gr.update(interactive=False),
+            gr.update(visible=False),
+            gr.update(value=0, label="💲 Cache Read"),
+            gr.update(value=0, label="💲 Cache Creation"),
+            gr.update(value=0, label="💲 Completion"),
+            ""
+        )
     row_idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
     row = df.iloc[row_idx]
     folder = row["folder"]
     name = row["name"]
+    show_analyze = check_trajectories_downloaded(folder)
+    cache_read, cache_creation, completion, model_hint = get_prices_for_folder(folder)
+    def price_update(value, name):
+        if value > 0:
+            return gr.update(value=value, label=f"✅ {name}")
+        else:
+            return gr.update(value=value, label=f"❌ {name}")
+    return (
+        folder, name,
+        gr.update(interactive=True),
+        gr.update(visible=show_analyze),
+        price_update(cache_read, "Cache Read"),
+        price_update(cache_creation, "Cache Creation"),
+        price_update(completion, "Completion"),
+        model_hint
+    )
 def build_app():
+    leaderboard_df = get_bash_only_df()
     with gr.Blocks(title="SWE-bench Routing Cost Calculator") as app:
+        trajectories_state = gr.State(None)
         gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard")
         gr.Markdown("Select a model to use as base for cost analysis")
         with gr.Row():
             with gr.Column(scale=3):
                 leaderboard_table = gr.Dataframe(
+                    value=leaderboard_df,
                     label="Bash-Only Leaderboard",
                     interactive=False,
                     wrap=True,
                 )
+                with gr.Column(visible=False) as analysis_section:
+                    gr.Markdown("## 📊 Trajectory Analysis")
+                    with gr.Row():
+                        plot_steps = gr.Plot(label="API Calls Distribution")
+                        plot_cost = gr.Plot(label="Cost Distribution")
+                    with gr.Row():
+                        plot_tokens = gr.Plot(label="Token Usage by Type")
+                        plot_tokens_cost = gr.Plot(label="Cost by Token Type ($)")
+                    with gr.Row():
+                        plot_stacked = gr.Plot(label="Billable Tokens per Instance")
+                    with gr.Row():
+                        plot_cost_breakdown = gr.Plot(label="Cost Breakdown per Instance ($)")
             with gr.Column(scale=1):
+                selected_folder = gr.State("")
                 gr.Markdown("### Selected Model")
                 selected_name = gr.Textbox(label="Model Name", interactive=False)
                 download_btn = gr.Button("📥 Download Trajectories", interactive=False)
                 download_status = gr.Textbox(label="Status", interactive=False, lines=3)
+                analyze_btn = gr.Button("📊 Load & Analyze", visible=False, variant="primary")
+                gr.Markdown("---")
+                gr.Markdown("### 💰 Token Prices ($/1M) · *[litellm](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)*")
+                detected_model = gr.Textbox(label="Detected Model", interactive=False)
+                price_cache_read = gr.Number(label="💲 Cache Read", value=0, precision=2)
+                price_cache_creation = gr.Number(label="💲 Cache Creation", value=0, precision=2)
+                price_completion = gr.Number(label="💲 Completion", value=0, precision=2)
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],
+            outputs=[selected_folder, selected_name, download_btn, analyze_btn, price_cache_read, price_cache_creation, price_completion, detected_model],
         )
         download_btn.click(
             fn=download_trajectories_from_s3,
             inputs=[selected_folder],
+            outputs=[download_status, analyze_btn],
+        )
+        def load_and_analyze(folder, cache_read_price, cache_creation_price, completion_price):
+            empty_result = (
+                gr.update(visible=False),
+                None, None, None, None, None, None,
+            )
+            if not folder:
+                yield empty_result
+                return
+            yield (
+                gr.update(visible=True),
+                None, None, None, None, None, None,
+            )
+            df = load_all_trajectories(folder)
+            if df.empty:
+                yield empty_result
+                return
+            fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked = create_basic_histograms(
+                df, cache_read_price, cache_creation_price, completion_price
+            )
+            fig_cost_breakdown = create_cost_breakdown(df, cache_read_price, cache_creation_price, completion_price)
+            yield (
+                gr.update(visible=True),
+                fig_steps, fig_cost, fig_tokens, fig_tokens_cost, fig_stacked, fig_cost_breakdown,
+            )
+        analyze_btn.click(
+            fn=load_and_analyze,
+            inputs=[selected_folder, price_cache_read, price_cache_creation, price_completion],
+            outputs=[
+                analysis_section,
+                plot_steps, plot_cost, plot_tokens, plot_tokens_cost, plot_stacked, plot_cost_breakdown,
+            ],
         )
     return app
 if __name__ == "__main__":
     app = build_app()
+    app.queue()
     app.launch()

pyproject.toml CHANGED Viewed

@@ -8,6 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
     "gradio>=6.0.2",
     "pandas>=2.0.0",
     "requests>=2.31.0",
     "python-dotenv>=1.0.0",
 ]

 dependencies = [
     "gradio>=6.0.2",
     "pandas>=2.0.0",
+    "plotly>=5.18.0",
     "requests>=2.31.0",
     "python-dotenv>=1.0.0",
 ]

uv.lock CHANGED Viewed

@@ -615,6 +615,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 [[package]]
 name = "numpy"
 version = "2.2.6"
@@ -1016,6 +1025,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
 ]
 [[package]]
 name = "pydantic"
 version = "2.12.4"
@@ -1305,6 +1327,7 @@ source = { virtual = "." }
 dependencies = [
     { name = "gradio" },
     { name = "pandas" },
     { name = "python-dotenv" },
     { name = "requests" },
 ]
@@ -1318,6 +1341,7 @@ dev = [
 requires-dist = [
     { name = "gradio", specifier = ">=6.0.2" },
     { name = "pandas", specifier = ">=2.0.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "requests", specifier = ">=2.31.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" },

     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
+[[package]]
+name = "narwhals"
+version = "2.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/ea/f82ef99ced4d03c33bb314c9b84a08a0a86c448aaa11ffd6256b99538aa5/narwhals-2.13.0.tar.gz", hash = "sha256:ee94c97f4cf7cfeebbeca8d274784df8b3d7fd3f955ce418af998d405576fdd9", size = 594555, upload-time = "2025-12-01T13:54:05.329Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/0d/1861d1599571974b15b025e12b142d8e6b42ad66c8a07a89cb0fc21f1e03/narwhals-2.13.0-py3-none-any.whl", hash = "sha256:9b795523c179ca78204e3be53726da374168f906e38de2ff174c2363baaaf481", size = 426407, upload-time = "2025-12-01T13:54:03.861Z" },
+]
 [[package]]
 name = "numpy"
 version = "2.2.6"
     { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
 ]
+[[package]]
+name = "plotly"
+version = "6.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "narwhals" },
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/05/1199e2a03ce6637960bc1e951ca0f928209a48cfceb57355806a88f214cf/plotly-6.5.0.tar.gz", hash = "sha256:d5d38224883fd38c1409bef7d6a8dc32b74348d39313f3c52ca998b8e447f5c8", size = 7013624, upload-time = "2025-11-17T18:39:24.523Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e7/c3/3031c931098de393393e1f93a38dc9ed6805d86bb801acc3cf2d5bd1e6b7/plotly-6.5.0-py3-none-any.whl", hash = "sha256:5ac851e100367735250206788a2b1325412aa4a4917a4fe3e6f0bc5aa6f3d90a", size = 9893174, upload-time = "2025-11-17T18:39:20.351Z" },
+]
 [[package]]
 name = "pydantic"
 version = "2.12.4"
 dependencies = [
     { name = "gradio" },
     { name = "pandas" },
+    { name = "plotly" },
     { name = "python-dotenv" },
     { name = "requests" },
 ]
 requires-dist = [
     { name = "gradio", specifier = ">=6.0.2" },
     { name = "pandas", specifier = ">=2.0.0" },
+    { name = "plotly", specifier = ">=5.18.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "requests", specifier = ">=2.31.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" },