afridialeval / src /generation_rates.py
millicentochieng's picture
Upload folder using huggingface_hub
edf8cae verified
# Per-million-token pricing (USD). Rates are subject to change.
# Models not listed here will report tokens but not estimated cost.
# Using paid-tier rates (not free tier) for budgeting.
input_rates = {
"gemini-3-flash": 1.50, # Paid tier: $1.50/M input
"gpt-5.1": 1.25,
"gemma-3-27b-it": 0.0, # Free via HF Inference API
}
cached_input_rates = {
"gemini-3-flash": 0.15,
"gpt-5.1": 0.3125,
"gemma-3-27b-it": 0.0,
}
output_rates = {
"gemini-3-flash": 9.00, # Paid tier: $9.00/M output (incl. thinking)
"gpt-5.1": 10.0,
"gemma-3-27b-it": 0.0, # Free via HF Inference API
}
def estimate_cost(
model_alias: str,
prompt_tokens: int,
completion_tokens: int,
cached_tokens: int = 0,
) -> float | None:
"""Return estimated cost in USD, or None if pricing unavailable."""
if model_alias not in input_rates:
return None
uncached = max(prompt_tokens - cached_tokens, 0)
cost = (
uncached / 1_000_000 * input_rates[model_alias]
+ cached_tokens / 1_000_000 * cached_input_rates.get(model_alias, 0)
+ completion_tokens / 1_000_000 * output_rates[model_alias]
)
return cost