# Per-million-token pricing (USD). Rates are subject to change. # Models not listed here will report tokens but not estimated cost. # Using paid-tier rates (not free tier) for budgeting. input_rates = { "gemini-3-flash": 1.50, # Paid tier: $1.50/M input "gpt-5.1": 1.25, "gemma-3-27b-it": 0.0, # Free via HF Inference API } cached_input_rates = { "gemini-3-flash": 0.15, "gpt-5.1": 0.3125, "gemma-3-27b-it": 0.0, } output_rates = { "gemini-3-flash": 9.00, # Paid tier: $9.00/M output (incl. thinking) "gpt-5.1": 10.0, "gemma-3-27b-it": 0.0, # Free via HF Inference API } def estimate_cost( model_alias: str, prompt_tokens: int, completion_tokens: int, cached_tokens: int = 0, ) -> float | None: """Return estimated cost in USD, or None if pricing unavailable.""" if model_alias not in input_rates: return None uncached = max(prompt_tokens - cached_tokens, 0) cost = ( uncached / 1_000_000 * input_rates[model_alias] + cached_tokens / 1_000_000 * cached_input_rates.get(model_alias, 0) + completion_tokens / 1_000_000 * output_rates[model_alias] ) return cost