# Per-million-token pricing (USD). Rates are subject to change.
# Models not listed here will report tokens but not estimated cost.
# Using paid-tier rates (not free tier) for budgeting.

input_rates = {
    "gemini-3-flash": 1.50,      # Paid tier: $1.50/M input
    "gpt-5.1": 1.25,
    "gemma-3-27b-it": 0.0,       # Free via HF Inference API
}

cached_input_rates = {
    "gemini-3-flash": 0.15,
    "gpt-5.1": 0.3125,
    "gemma-3-27b-it": 0.0,
}

output_rates = {
    "gemini-3-flash": 9.00,      # Paid tier: $9.00/M output (incl. thinking)
    "gpt-5.1": 10.0,
    "gemma-3-27b-it": 0.0,       # Free via HF Inference API
}


def estimate_cost(
    model_alias: str,
    prompt_tokens: int,
    completion_tokens: int,
    cached_tokens: int = 0,
) -> float | None:
    """Return estimated cost in USD, or None if pricing unavailable."""
    if model_alias not in input_rates:
        return None

    uncached = max(prompt_tokens - cached_tokens, 0)
    cost = (
        uncached / 1_000_000 * input_rates[model_alias]
        + cached_tokens / 1_000_000 * cached_input_rates.get(model_alias, 0)
        + completion_tokens / 1_000_000 * output_rates[model_alias]
    )
    return cost