Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Running

IgorSlinko commited on 9 days ago

Commit

0cafd55

1 Parent(s): ad3271e

Improve price detection with fallbacks and date stripping

- Strip date suffix from model name (e.g. gemini-3-pro-preview-20251118 -> gemini-3-pro-preview)
- Add fallback price estimates based on standard ratios:
- Cache Read = Input × 0.1
- Cache Creation = Input × 1.25
- Completion = Input × 5
- Show '(estimated)' label for fallback prices with ❌
- Rename cost chart to 'Distribution of Cost Reported by Leaderboard'

Files changed (1) hide show

app.py +62 -16

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 import subprocess
 from pathlib import Path
@@ -53,20 +54,27 @@ def get_model_prices(model_name: str) -> dict | None:
     prices = get_litellm_prices()
     clean_name = model_name.replace("anthropic/", "").replace("openai/", "")
     candidates = [
         model_name,
         clean_name,
         f"anthropic/{clean_name}",
         f"openai/{clean_name}",
     ]
     for key in candidates:
         if key in prices:
             return prices[key]
     for key, value in prices.items():
-        if clean_name in key or model_name in key:
             return value
     return None
@@ -326,7 +334,7 @@ def create_basic_histograms(df: pd.DataFrame, input_price: float, cache_read_pri
         df,
         x="instance_cost",
         nbins=30,
-        title="Distribution of Cost per Instance ($)",
         color_discrete_sequence=["#00CC96"],
     )
     fig_cost.update_layout(
@@ -518,21 +526,56 @@ def extract_model_from_folder(folder: str) -> str:
     return folder
-def get_prices_for_folder(folder: str) -> tuple[float, float, float, float, str]:
-    """Get prices from litellm based on folder name. Returns (input, cache_read, cache_creation, completion, model_name)"""
     model_hint = extract_model_from_folder(folder)
     if not model_hint:
-        return 0, 0, 0, 0, ""
     prices = get_model_prices(model_hint)
     if prices:
         input_price = prices.get("input_cost_per_token", 0) * 1e6
         cache_read = prices.get("cache_read_input_token_cost", 0) * 1e6
         cache_creation = prices.get("cache_creation_input_token_cost", 0) * 1e6
         completion = prices.get("output_cost_per_token", 0) * 1e6
-        return input_price, cache_read, cache_creation, completion, model_hint
-    return 0, 0, 0, 0, model_hint
 def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
@@ -555,22 +598,25 @@ def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
     show_analyze = check_trajectories_downloaded(folder)
-    input_price, cache_read, cache_creation, completion, model_hint = get_prices_for_folder(folder)
-    def price_update(value, name):
-        if value > 0:
             return gr.update(value=value, label=f"✅ {name}")
         else:
-            return gr.update(value=value, label=f"❌ {name}")
     return (
         folder, name,
         gr.update(interactive=True),
         gr.update(visible=show_analyze),
-        price_update(input_price, "Input"),
-        price_update(cache_read, "Cache Read"),
-        price_update(cache_creation, "Cache Creation"),
-        price_update(completion, "Completion"),
         model_hint
     )

 import json
 import os
+import re
 import subprocess
 from pathlib import Path
     prices = get_litellm_prices()
     clean_name = model_name.replace("anthropic/", "").replace("openai/", "")
+    # Try without date suffix (e.g., gemini-3-pro-preview-20251118 -> gemini-3-pro-preview)
+    name_without_date = re.sub(r'-\d{8}$', '', clean_name)
     candidates = [
         model_name,
         clean_name,
+        name_without_date,
         f"anthropic/{clean_name}",
         f"openai/{clean_name}",
+        f"anthropic/{name_without_date}",
+        f"openai/{name_without_date}",
     ]
     for key in candidates:
         if key in prices:
             return prices[key]
+    # Fuzzy match
     for key, value in prices.items():
+        if clean_name in key or model_name in key or name_without_date in key:
             return value
     return None
         df,
         x="instance_cost",
         nbins=30,
+        title="Distribution of Cost Reported by Leaderboard ($)",
         color_discrete_sequence=["#00CC96"],
     )
     fig_cost.update_layout(
     return folder
+def get_prices_for_folder(folder: str) -> tuple[dict, str]:
+    """Get prices from litellm based on folder name.
+    Returns (prices_dict, model_name) where prices_dict has 'value' and 'found' for each price type."""
     model_hint = extract_model_from_folder(folder)
+    result = {
+        "input": {"value": 0, "found": False},
+        "cache_read": {"value": 0, "found": False},
+        "cache_creation": {"value": 0, "found": False},
+        "completion": {"value": 0, "found": False},
+    }
     if not model_hint:
+        return result, ""
     prices = get_model_prices(model_hint)
     if prices:
+        # Get values from litellm
         input_price = prices.get("input_cost_per_token", 0) * 1e6
         cache_read = prices.get("cache_read_input_token_cost", 0) * 1e6
         cache_creation = prices.get("cache_creation_input_token_cost", 0) * 1e6
         completion = prices.get("output_cost_per_token", 0) * 1e6
+        result["input"] = {"value": input_price, "found": input_price > 0}
+        result["cache_read"] = {"value": cache_read, "found": cache_read > 0}
+        result["cache_creation"] = {"value": cache_creation, "found": cache_creation > 0}
+        result["completion"] = {"value": completion, "found": completion > 0}
+        # Apply fallback estimates based on standard ratios
+        # Cache Read = Input * 0.1 (90% discount)
+        # Cache Creation = Input * 1.25 (25% premium)
+        # Completion = Input * 5 (typical ratio)
+        if input_price > 0:
+            if not result["cache_read"]["found"]:
+                result["cache_read"]["value"] = input_price * 0.1
+            if not result["cache_creation"]["found"]:
+                result["cache_creation"]["value"] = input_price * 1.25
+            if not result["completion"]["found"]:
+                result["completion"]["value"] = input_price * 5
+        elif completion > 0:
+            # If we only have completion, estimate input from it
+            estimated_input = completion / 5
+            if not result["input"]["found"]:
+                result["input"]["value"] = estimated_input
+            if not result["cache_read"]["found"]:
+                result["cache_read"]["value"] = estimated_input * 0.1
+            if not result["cache_creation"]["found"]:
+                result["cache_creation"]["value"] = estimated_input * 1.25
+    return result, model_hint
 def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
     show_analyze = check_trajectories_downloaded(folder)
+    prices_dict, model_hint = get_prices_for_folder(folder)
+    def price_update(price_info, name):
+        value = price_info["value"]
+        if price_info["found"]:
             return gr.update(value=value, label=f"✅ {name}")
+        elif value > 0:
+            return gr.update(value=value, label=f"❌ {name} (estimated)")
         else:
+            return gr.update(value=0, label=f"❌ {name}")
     return (
         folder, name,
         gr.update(interactive=True),
         gr.update(visible=show_analyze),
+        price_update(prices_dict["input"], "Input"),
+        price_update(prices_dict["cache_read"], "Cache Read"),
+        price_update(prices_dict["cache_creation"], "Cache Creation"),
+        price_update(prices_dict["completion"], "Completion"),
         model_hint
     )