Commit
Β·
0cafd55
1
Parent(s):
ad3271e
Improve price detection with fallbacks and date stripping
Browse files- Strip date suffix from model name (e.g. gemini-3-pro-preview-20251118 -> gemini-3-pro-preview)
- Add fallback price estimates based on standard ratios:
- Cache Read = Input Γ 0.1
- Cache Creation = Input Γ 1.25
- Completion = Input Γ 5
- Show '(estimated)' label for fallback prices with β
- Rename cost chart to 'Distribution of Cost Reported by Leaderboard'
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
import os
|
|
|
|
| 3 |
import subprocess
|
| 4 |
from pathlib import Path
|
| 5 |
|
|
@@ -53,20 +54,27 @@ def get_model_prices(model_name: str) -> dict | None:
|
|
| 53 |
prices = get_litellm_prices()
|
| 54 |
|
| 55 |
clean_name = model_name.replace("anthropic/", "").replace("openai/", "")
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
candidates = [
|
| 58 |
model_name,
|
| 59 |
clean_name,
|
|
|
|
| 60 |
f"anthropic/{clean_name}",
|
| 61 |
f"openai/{clean_name}",
|
|
|
|
|
|
|
| 62 |
]
|
| 63 |
|
| 64 |
for key in candidates:
|
| 65 |
if key in prices:
|
| 66 |
return prices[key]
|
| 67 |
|
|
|
|
| 68 |
for key, value in prices.items():
|
| 69 |
-
if clean_name in key or model_name in key:
|
| 70 |
return value
|
| 71 |
|
| 72 |
return None
|
|
@@ -326,7 +334,7 @@ def create_basic_histograms(df: pd.DataFrame, input_price: float, cache_read_pri
|
|
| 326 |
df,
|
| 327 |
x="instance_cost",
|
| 328 |
nbins=30,
|
| 329 |
-
title="Distribution of Cost
|
| 330 |
color_discrete_sequence=["#00CC96"],
|
| 331 |
)
|
| 332 |
fig_cost.update_layout(
|
|
@@ -518,21 +526,56 @@ def extract_model_from_folder(folder: str) -> str:
|
|
| 518 |
return folder
|
| 519 |
|
| 520 |
|
| 521 |
-
def get_prices_for_folder(folder: str) -> tuple[
|
| 522 |
-
"""Get prices from litellm based on folder name.
|
|
|
|
| 523 |
model_hint = extract_model_from_folder(folder)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
if not model_hint:
|
| 525 |
-
return
|
| 526 |
|
| 527 |
prices = get_model_prices(model_hint)
|
| 528 |
if prices:
|
|
|
|
| 529 |
input_price = prices.get("input_cost_per_token", 0) * 1e6
|
| 530 |
cache_read = prices.get("cache_read_input_token_cost", 0) * 1e6
|
| 531 |
cache_creation = prices.get("cache_creation_input_token_cost", 0) * 1e6
|
| 532 |
completion = prices.get("output_cost_per_token", 0) * 1e6
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
|
| 538 |
def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
|
|
@@ -555,22 +598,25 @@ def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
|
|
| 555 |
|
| 556 |
show_analyze = check_trajectories_downloaded(folder)
|
| 557 |
|
| 558 |
-
|
| 559 |
|
| 560 |
-
def price_update(
|
| 561 |
-
|
|
|
|
| 562 |
return gr.update(value=value, label=f"β
{name}")
|
|
|
|
|
|
|
| 563 |
else:
|
| 564 |
-
return gr.update(value=
|
| 565 |
|
| 566 |
return (
|
| 567 |
folder, name,
|
| 568 |
gr.update(interactive=True),
|
| 569 |
gr.update(visible=show_analyze),
|
| 570 |
-
price_update(
|
| 571 |
-
price_update(cache_read, "Cache Read"),
|
| 572 |
-
price_update(cache_creation, "Cache Creation"),
|
| 573 |
-
price_update(completion, "Completion"),
|
| 574 |
model_hint
|
| 575 |
)
|
| 576 |
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
+
import re
|
| 4 |
import subprocess
|
| 5 |
from pathlib import Path
|
| 6 |
|
|
|
|
| 54 |
prices = get_litellm_prices()
|
| 55 |
|
| 56 |
clean_name = model_name.replace("anthropic/", "").replace("openai/", "")
|
| 57 |
+
|
| 58 |
+
# Try without date suffix (e.g., gemini-3-pro-preview-20251118 -> gemini-3-pro-preview)
|
| 59 |
+
name_without_date = re.sub(r'-\d{8}$', '', clean_name)
|
| 60 |
|
| 61 |
candidates = [
|
| 62 |
model_name,
|
| 63 |
clean_name,
|
| 64 |
+
name_without_date,
|
| 65 |
f"anthropic/{clean_name}",
|
| 66 |
f"openai/{clean_name}",
|
| 67 |
+
f"anthropic/{name_without_date}",
|
| 68 |
+
f"openai/{name_without_date}",
|
| 69 |
]
|
| 70 |
|
| 71 |
for key in candidates:
|
| 72 |
if key in prices:
|
| 73 |
return prices[key]
|
| 74 |
|
| 75 |
+
# Fuzzy match
|
| 76 |
for key, value in prices.items():
|
| 77 |
+
if clean_name in key or model_name in key or name_without_date in key:
|
| 78 |
return value
|
| 79 |
|
| 80 |
return None
|
|
|
|
| 334 |
df,
|
| 335 |
x="instance_cost",
|
| 336 |
nbins=30,
|
| 337 |
+
title="Distribution of Cost Reported by Leaderboard ($)",
|
| 338 |
color_discrete_sequence=["#00CC96"],
|
| 339 |
)
|
| 340 |
fig_cost.update_layout(
|
|
|
|
| 526 |
return folder
|
| 527 |
|
| 528 |
|
| 529 |
+
def get_prices_for_folder(folder: str) -> tuple[dict, str]:
|
| 530 |
+
"""Get prices from litellm based on folder name.
|
| 531 |
+
Returns (prices_dict, model_name) where prices_dict has 'value' and 'found' for each price type."""
|
| 532 |
model_hint = extract_model_from_folder(folder)
|
| 533 |
+
|
| 534 |
+
result = {
|
| 535 |
+
"input": {"value": 0, "found": False},
|
| 536 |
+
"cache_read": {"value": 0, "found": False},
|
| 537 |
+
"cache_creation": {"value": 0, "found": False},
|
| 538 |
+
"completion": {"value": 0, "found": False},
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
if not model_hint:
|
| 542 |
+
return result, ""
|
| 543 |
|
| 544 |
prices = get_model_prices(model_hint)
|
| 545 |
if prices:
|
| 546 |
+
# Get values from litellm
|
| 547 |
input_price = prices.get("input_cost_per_token", 0) * 1e6
|
| 548 |
cache_read = prices.get("cache_read_input_token_cost", 0) * 1e6
|
| 549 |
cache_creation = prices.get("cache_creation_input_token_cost", 0) * 1e6
|
| 550 |
completion = prices.get("output_cost_per_token", 0) * 1e6
|
| 551 |
+
|
| 552 |
+
result["input"] = {"value": input_price, "found": input_price > 0}
|
| 553 |
+
result["cache_read"] = {"value": cache_read, "found": cache_read > 0}
|
| 554 |
+
result["cache_creation"] = {"value": cache_creation, "found": cache_creation > 0}
|
| 555 |
+
result["completion"] = {"value": completion, "found": completion > 0}
|
| 556 |
+
|
| 557 |
+
# Apply fallback estimates based on standard ratios
|
| 558 |
+
# Cache Read = Input * 0.1 (90% discount)
|
| 559 |
+
# Cache Creation = Input * 1.25 (25% premium)
|
| 560 |
+
# Completion = Input * 5 (typical ratio)
|
| 561 |
+
if input_price > 0:
|
| 562 |
+
if not result["cache_read"]["found"]:
|
| 563 |
+
result["cache_read"]["value"] = input_price * 0.1
|
| 564 |
+
if not result["cache_creation"]["found"]:
|
| 565 |
+
result["cache_creation"]["value"] = input_price * 1.25
|
| 566 |
+
if not result["completion"]["found"]:
|
| 567 |
+
result["completion"]["value"] = input_price * 5
|
| 568 |
+
elif completion > 0:
|
| 569 |
+
# If we only have completion, estimate input from it
|
| 570 |
+
estimated_input = completion / 5
|
| 571 |
+
if not result["input"]["found"]:
|
| 572 |
+
result["input"]["value"] = estimated_input
|
| 573 |
+
if not result["cache_read"]["found"]:
|
| 574 |
+
result["cache_read"]["value"] = estimated_input * 0.1
|
| 575 |
+
if not result["cache_creation"]["found"]:
|
| 576 |
+
result["cache_creation"]["value"] = estimated_input * 1.25
|
| 577 |
+
|
| 578 |
+
return result, model_hint
|
| 579 |
|
| 580 |
|
| 581 |
def on_row_select(evt: gr.SelectData, df: pd.DataFrame):
|
|
|
|
| 598 |
|
| 599 |
show_analyze = check_trajectories_downloaded(folder)
|
| 600 |
|
| 601 |
+
prices_dict, model_hint = get_prices_for_folder(folder)
|
| 602 |
|
| 603 |
+
def price_update(price_info, name):
|
| 604 |
+
value = price_info["value"]
|
| 605 |
+
if price_info["found"]:
|
| 606 |
return gr.update(value=value, label=f"β
{name}")
|
| 607 |
+
elif value > 0:
|
| 608 |
+
return gr.update(value=value, label=f"β {name} (estimated)")
|
| 609 |
else:
|
| 610 |
+
return gr.update(value=0, label=f"β {name}")
|
| 611 |
|
| 612 |
return (
|
| 613 |
folder, name,
|
| 614 |
gr.update(interactive=True),
|
| 615 |
gr.update(visible=show_analyze),
|
| 616 |
+
price_update(prices_dict["input"], "Input"),
|
| 617 |
+
price_update(prices_dict["cache_read"], "Cache Read"),
|
| 618 |
+
price_update(prices_dict["cache_creation"], "Cache Creation"),
|
| 619 |
+
price_update(prices_dict["completion"], "Completion"),
|
| 620 |
model_hint
|
| 621 |
)
|
| 622 |
|