Spaces:

alejandro-ao
/

design-evals

Running

alejandro

feat: add pricing pipeline and API cost metadata

e7b6107 2 months ago

11.9 kB

	import type {
	GenerationAttempt,
	GenerationCost,
	GenerationResult,
	GenerationUsage,
	} from "@/lib/generation-types";
	import type { ProviderId } from "@/lib/providers";

	const TOKENS_PER_MILLION = 1_000_000;

	// Update this when prices are refreshed from provider pricing pages.
	export const PRICING_VERSION = "2026-02-21";

	interface ModelPricingEntry {
	provider: ProviderId;
	matchType: "exact" \| "prefix";
	model: string;
	inputUsdPer1M: number;
	outputUsdPer1M: number;
	cachedInputUsdPer1M?: number;
	routingProviders?: string[];
	}

	interface ResolvedModelPricing {
	inputUsdPer1M: number;
	outputUsdPer1M: number;
	cachedInputUsdPer1M?: number;
	pricingMatchedModel: string;
	pricingVersion: string;
	}

	const MODEL_PRICING_TABLE: ModelPricingEntry[] = [
	{
	provider: "openai",
	matchType: "exact",
	model: "gpt-5.2",
	inputUsdPer1M: 1.5,
	outputUsdPer1M: 6,
	cachedInputUsdPer1M: 0.375,
	},
	{
	provider: "openai",
	matchType: "exact",
	model: "gpt-5.1",
	inputUsdPer1M: 1.25,
	outputUsdPer1M: 5,
	cachedInputUsdPer1M: 0.3125,
	},
	{
	provider: "openai",
	matchType: "exact",
	model: "gpt-5-mini",
	inputUsdPer1M: 0.25,
	outputUsdPer1M: 1,
	cachedInputUsdPer1M: 0.0625,
	},
	{
	provider: "openai",
	matchType: "exact",
	model: "gpt-5-nano",
	inputUsdPer1M: 0.05,
	outputUsdPer1M: 0.2,
	cachedInputUsdPer1M: 0.0125,
	},
	{
	provider: "openai",
	matchType: "exact",
	model: "gpt-4.1",
	inputUsdPer1M: 2,
	outputUsdPer1M: 8,
	cachedInputUsdPer1M: 0.5,
	},
	{
	provider: "anthropic",
	matchType: "exact",
	model: "claude-opus-4-6",
	inputUsdPer1M: 15,
	outputUsdPer1M: 75,
	},
	{
	provider: "anthropic",
	matchType: "exact",
	model: "claude-sonnet-4-6",
	inputUsdPer1M: 3,
	outputUsdPer1M: 15,
	},
	{
	provider: "anthropic",
	matchType: "exact",
	model: "claude-opus-4-1-20250805",
	inputUsdPer1M: 15,
	outputUsdPer1M: 75,
	},
	{
	provider: "anthropic",
	matchType: "exact",
	model: "claude-sonnet-4-20250514",
	inputUsdPer1M: 3,
	outputUsdPer1M: 15,
	},
	{
	provider: "anthropic",
	matchType: "exact",
	model: "claude-3-5-haiku-latest",
	inputUsdPer1M: 0.8,
	outputUsdPer1M: 4,
	},
	{
	provider: "google",
	matchType: "exact",
	model: "gemini-3-pro-preview",
	inputUsdPer1M: 3.5,
	outputUsdPer1M: 10.5,
	},
	{
	provider: "google",
	matchType: "exact",
	model: "gemini-3-flash-preview",
	inputUsdPer1M: 0.35,
	outputUsdPer1M: 1.05,
	},
	{
	provider: "google",
	matchType: "exact",
	model: "gemini-2.5-flash",
	inputUsdPer1M: 0.35,
	outputUsdPer1M: 1.05,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "moonshotai/kimi-k2",
	inputUsdPer1M: 0.8,
	outputUsdPer1M: 2.4,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "minimax/minimax-m1",
	inputUsdPer1M: 0.6,
	outputUsdPer1M: 2,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "minimaxai/minimax-m2",
	inputUsdPer1M: 0.6,
	outputUsdPer1M: 2,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "qwen/qwen",
	inputUsdPer1M: 0.3,
	outputUsdPer1M: 0.9,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "deepseek-ai/deepseek",
	inputUsdPer1M: 0.55,
	outputUsdPer1M: 1.65,
	},
	{
	provider: "huggingface",
	matchType: "prefix",
	model: "meta-llama/llama-3.3-70b-instruct",
	inputUsdPer1M: 0.9,
	outputUsdPer1M: 0.9,
	},
	];

	function parseTokenCount(value: unknown): number \| null {
	if (typeof value !== "number" \|\| !Number.isFinite(value)) {
	return null;
	}

	const normalized = Math.floor(value);
	if (normalized < 0) {
	return null;
	}

	return normalized;
	}

	function roundUsd(value: number): number {
	return Math.round(value * 1_000_000) / 1_000_000;
	}

	function splitRoutedModelId(modelId: string): {
	baseModelId: string;
	routingProvider?: string;
	} {
	const trimmed = modelId.trim();
	const suffixIndex = trimmed.lastIndexOf(":");
	if (suffixIndex > 0 && suffixIndex < trimmed.length - 1) {
	const baseModelId = trimmed.slice(0, suffixIndex).trim().toLowerCase();
	const routingProvider = trimmed.slice(suffixIndex + 1).trim().toLowerCase();
	return {
	baseModelId,
	routingProvider: routingProvider \|\| undefined,
	};
	}

	return {
	baseModelId: trimmed.toLowerCase(),
	};
	}

	function isRoutingProviderMatch(
	entry: ModelPricingEntry,
	routingProvider: string \| undefined,
	): boolean {
	if (!entry.routingProviders?.length) {
	return true;
	}

	if (!routingProvider) {
	return false;
	}

	return entry.routingProviders.includes(routingProvider);
	}

	function resolveModelPricing(
	provider: ProviderId,
	modelId: string,
	routingProviderInput?: string,
	): ResolvedModelPricing \| null {
	const { baseModelId, routingProvider: routedFromModel } = splitRoutedModelId(modelId);
	if (!baseModelId) {
	return null;
	}

	const routingProvider = routingProviderInput?.trim().toLowerCase() \|\| routedFromModel;

	const exactMatch = MODEL_PRICING_TABLE.find(
	(entry) =>
	entry.provider === provider &&
	entry.matchType === "exact" &&
	entry.model === baseModelId &&
	isRoutingProviderMatch(entry, routingProvider),
	);

	if (exactMatch) {
	return {
	inputUsdPer1M: exactMatch.inputUsdPer1M,
	outputUsdPer1M: exactMatch.outputUsdPer1M,
	cachedInputUsdPer1M: exactMatch.cachedInputUsdPer1M,
	pricingMatchedModel: exactMatch.model,
	pricingVersion: PRICING_VERSION,
	};
	}

	const prefixMatches = MODEL_PRICING_TABLE
	.filter(
	(entry) =>
	entry.provider === provider &&
	entry.matchType === "prefix" &&
	baseModelId.startsWith(entry.model) &&
	isRoutingProviderMatch(entry, routingProvider),
	)
	.sort((left, right) => right.model.length - left.model.length);

	const bestPrefix = prefixMatches[0];
	if (!bestPrefix) {
	return null;
	}

	return {
	inputUsdPer1M: bestPrefix.inputUsdPer1M,
	outputUsdPer1M: bestPrefix.outputUsdPer1M,
	cachedInputUsdPer1M: bestPrefix.cachedInputUsdPer1M,
	pricingMatchedModel: bestPrefix.model,
	pricingVersion: PRICING_VERSION,
	};
	}

	export function normalizeGenerationUsage(input: {
	inputTokens?: unknown;
	outputTokens?: unknown;
	totalTokens?: unknown;
	cachedInputTokens?: unknown;
	}): GenerationUsage \| null {
	const inputTokens = parseTokenCount(input.inputTokens) ?? 0;
	const outputTokens = parseTokenCount(input.outputTokens) ?? 0;
	const totalTokensRaw = parseTokenCount(input.totalTokens);
	const cachedInputTokensRaw = parseTokenCount(input.cachedInputTokens);

	const hasAnyUsage =
	inputTokens > 0 \|\|
	outputTokens > 0 \|\|
	(totalTokensRaw !== null && totalTokensRaw > 0) \|\|
	(cachedInputTokensRaw !== null && cachedInputTokensRaw > 0);
	if (!hasAnyUsage) {
	return null;
	}

	const totalTokenFloor = inputTokens + outputTokens;
	const totalTokens = Math.max(totalTokensRaw ?? 0, totalTokenFloor);
	const cachedInputTokens =
	cachedInputTokensRaw !== null && cachedInputTokensRaw > 0
	? Math.min(cachedInputTokensRaw, inputTokens)
	: undefined;

	return {
	inputTokens,
	outputTokens,
	cachedInputTokens,
	totalTokens,
	};
	}

	function calculateGenerationCost(
	usage: GenerationUsage,
	pricing: ResolvedModelPricing,
	): GenerationCost {
	const cachedInputTokens =
	usage.cachedInputTokens && usage.cachedInputTokens > 0 ? usage.cachedInputTokens : 0;

	let uncachedInputTokens = usage.inputTokens;
	let cachedInputUsd: number \| undefined;
	if (cachedInputTokens > 0 && typeof pricing.cachedInputUsdPer1M === "number") {
	uncachedInputTokens = Math.max(usage.inputTokens - cachedInputTokens, 0);
	cachedInputUsd = roundUsd((cachedInputTokens / TOKENS_PER_MILLION) * pricing.cachedInputUsdPer1M);
	}

	const inputUsd = roundUsd((uncachedInputTokens / TOKENS_PER_MILLION) * pricing.inputUsdPer1M);
	const outputUsd = roundUsd((usage.outputTokens / TOKENS_PER_MILLION) * pricing.outputUsdPer1M);
	const totalUsd = roundUsd(inputUsd + outputUsd + (cachedInputUsd ?? 0));

	return {
	currency: "USD",
	inputUsd,
	outputUsd,
	cachedInputUsd,
	totalUsd,
	pricingVersion: pricing.pricingVersion,
	pricingMatchedModel: pricing.pricingMatchedModel,
	};
	}

	function enrichAttemptCost(provider: ProviderId, attempt: GenerationAttempt): GenerationAttempt {
	if (!attempt.usage) {
	return attempt;
	}

	const pricing = resolveModelPricing(
	provider,
	attempt.model,
	provider === "huggingface" ? attempt.provider : undefined,
	);

	if (!pricing) {
	return {
	...attempt,
	cost: null,
	};
	}

	return {
	...attempt,
	cost: calculateGenerationCost(attempt.usage, pricing),
	};
	}

	function aggregateUsage(attempts: GenerationAttempt[]): GenerationUsage \| null {
	const usageAttempts = attempts.filter((attempt) => Boolean(attempt.usage));
	if (usageAttempts.length === 0) {
	return null;
	}

	const totals = usageAttempts.reduce(
	(accumulator, attempt) => {
	const usage = attempt.usage as GenerationUsage;
	accumulator.inputTokens += usage.inputTokens;
	accumulator.outputTokens += usage.outputTokens;
	accumulator.totalTokens += usage.totalTokens;
	accumulator.cachedInputTokens += usage.cachedInputTokens ?? 0;
	return accumulator;
	},
	{
	inputTokens: 0,
	outputTokens: 0,
	totalTokens: 0,
	cachedInputTokens: 0,
	},
	);

	return {
	inputTokens: totals.inputTokens,
	outputTokens: totals.outputTokens,
	totalTokens: totals.totalTokens,
	cachedInputTokens: totals.cachedInputTokens > 0 ? totals.cachedInputTokens : undefined,
	};
	}

	function aggregateCost(attempts: GenerationAttempt[]): GenerationCost \| null {
	const attemptsWithUsage = attempts.filter((attempt) => Boolean(attempt.usage));
	if (attemptsWithUsage.length === 0) {
	return null;
	}

	const pricedAttempts = attemptsWithUsage.filter(
	(attempt): attempt is GenerationAttempt & { cost: GenerationCost } =>
	Boolean(attempt.cost),
	);
	if (pricedAttempts.length !== attemptsWithUsage.length) {
	return null;
	}

	const totals = pricedAttempts.reduce(
	(accumulator, attempt) => {
	const cost = attempt.cost;
	accumulator.inputUsd += cost.inputUsd;
	accumulator.outputUsd += cost.outputUsd;
	accumulator.cachedInputUsd += cost.cachedInputUsd ?? 0;
	accumulator.totalUsd += cost.totalUsd;
	accumulator.pricingMatchedModels.add(cost.pricingMatchedModel);
	accumulator.pricingVersions.add(cost.pricingVersion);
	return accumulator;
	},
	{
	inputUsd: 0,
	outputUsd: 0,
	cachedInputUsd: 0,
	totalUsd: 0,
	pricingMatchedModels: new Set<string>(),
	pricingVersions: new Set<string>(),
	},
	);

	return {
	currency: "USD",
	inputUsd: roundUsd(totals.inputUsd),
	outputUsd: roundUsd(totals.outputUsd),
	cachedInputUsd: totals.cachedInputUsd > 0 ? roundUsd(totals.cachedInputUsd) : undefined,
	totalUsd: roundUsd(totals.totalUsd),
	pricingVersion:
	totals.pricingVersions.size === 1
	? [...totals.pricingVersions][0]!
	: `${PRICING_VERSION}-mixed`,
	pricingMatchedModel:
	totals.pricingMatchedModels.size === 1
	? [...totals.pricingMatchedModels][0]!
	: "mixed",
	};
	}

	export function applyPricingToGenerationResult(
	provider: ProviderId,
	result: GenerationResult,
	): GenerationResult {
	const attempts = result.attempts.map((attempt) => enrichAttemptCost(provider, attempt));
	const usage = aggregateUsage(attempts);
	const cost = aggregateCost(attempts);

	return {
	...result,
	attempts,
	usage,
	cost,
	};
	}