Spaces:
Running
Running
| # Per-million-token pricing (USD). Rates are subject to change. | |
| # Models not listed here will report tokens but not estimated cost. | |
| # Using paid-tier rates (not free tier) for budgeting. | |
| input_rates = { | |
| "gemini-3-flash": 1.50, # Paid tier: $1.50/M input | |
| "gpt-5.1": 1.25, | |
| "gemma-3-27b-it": 0.0, # Free via HF Inference API | |
| } | |
| cached_input_rates = { | |
| "gemini-3-flash": 0.15, | |
| "gpt-5.1": 0.3125, | |
| "gemma-3-27b-it": 0.0, | |
| } | |
| output_rates = { | |
| "gemini-3-flash": 9.00, # Paid tier: $9.00/M output (incl. thinking) | |
| "gpt-5.1": 10.0, | |
| "gemma-3-27b-it": 0.0, # Free via HF Inference API | |
| } | |
| def estimate_cost( | |
| model_alias: str, | |
| prompt_tokens: int, | |
| completion_tokens: int, | |
| cached_tokens: int = 0, | |
| ) -> float | None: | |
| """Return estimated cost in USD, or None if pricing unavailable.""" | |
| if model_alias not in input_rates: | |
| return None | |
| uncached = max(prompt_tokens - cached_tokens, 0) | |
| cost = ( | |
| uncached / 1_000_000 * input_rates[model_alias] | |
| + cached_tokens / 1_000_000 * cached_input_rates.get(model_alias, 0) | |
| + completion_tokens / 1_000_000 * output_rates[model_alias] | |
| ) | |
| return cost |