|
|
const pricingService = require('../services/pricingService') |
|
|
|
|
|
|
|
|
const MODEL_PRICING = { |
|
|
|
|
|
'claude-3-5-sonnet-20241022': { |
|
|
input: 3.0, |
|
|
output: 15.0, |
|
|
cacheWrite: 3.75, |
|
|
cacheRead: 0.3 |
|
|
}, |
|
|
'claude-sonnet-4-20250514': { |
|
|
input: 3.0, |
|
|
output: 15.0, |
|
|
cacheWrite: 3.75, |
|
|
cacheRead: 0.3 |
|
|
}, |
|
|
'claude-sonnet-4-5-20250929': { |
|
|
input: 3.0, |
|
|
output: 15.0, |
|
|
cacheWrite: 3.75, |
|
|
cacheRead: 0.3 |
|
|
}, |
|
|
|
|
|
|
|
|
'claude-3-5-haiku-20241022': { |
|
|
input: 0.25, |
|
|
output: 1.25, |
|
|
cacheWrite: 0.3, |
|
|
cacheRead: 0.03 |
|
|
}, |
|
|
|
|
|
|
|
|
'claude-3-opus-20240229': { |
|
|
input: 15.0, |
|
|
output: 75.0, |
|
|
cacheWrite: 18.75, |
|
|
cacheRead: 1.5 |
|
|
}, |
|
|
|
|
|
|
|
|
'claude-opus-4-1-20250805': { |
|
|
input: 15.0, |
|
|
output: 75.0, |
|
|
cacheWrite: 18.75, |
|
|
cacheRead: 1.5 |
|
|
}, |
|
|
|
|
|
|
|
|
'claude-3-sonnet-20240229': { |
|
|
input: 3.0, |
|
|
output: 15.0, |
|
|
cacheWrite: 3.75, |
|
|
cacheRead: 0.3 |
|
|
}, |
|
|
|
|
|
|
|
|
'claude-3-haiku-20240307': { |
|
|
input: 0.25, |
|
|
output: 1.25, |
|
|
cacheWrite: 0.3, |
|
|
cacheRead: 0.03 |
|
|
}, |
|
|
|
|
|
|
|
|
unknown: { |
|
|
input: 3.0, |
|
|
output: 15.0, |
|
|
cacheWrite: 3.75, |
|
|
cacheRead: 0.3 |
|
|
} |
|
|
} |
|
|
|
|
|
class CostCalculator { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static calculateCost(usage, model = 'unknown') { |
|
|
|
|
|
if ( |
|
|
(usage.cache_creation && typeof usage.cache_creation === 'object') || |
|
|
(model && model.includes('[1m]')) |
|
|
) { |
|
|
const result = pricingService.calculateCost(usage, model) |
|
|
|
|
|
return { |
|
|
model, |
|
|
pricing: { |
|
|
input: result.pricing.input * 1000000, |
|
|
output: result.pricing.output * 1000000, |
|
|
cacheWrite: result.pricing.cacheCreate * 1000000, |
|
|
cacheRead: result.pricing.cacheRead * 1000000 |
|
|
}, |
|
|
usingDynamicPricing: true, |
|
|
isLongContextRequest: result.isLongContextRequest || false, |
|
|
usage: { |
|
|
inputTokens: usage.input_tokens || 0, |
|
|
outputTokens: usage.output_tokens || 0, |
|
|
cacheCreateTokens: usage.cache_creation_input_tokens || 0, |
|
|
cacheReadTokens: usage.cache_read_input_tokens || 0, |
|
|
totalTokens: |
|
|
(usage.input_tokens || 0) + |
|
|
(usage.output_tokens || 0) + |
|
|
(usage.cache_creation_input_tokens || 0) + |
|
|
(usage.cache_read_input_tokens || 0) |
|
|
}, |
|
|
costs: { |
|
|
input: result.inputCost, |
|
|
output: result.outputCost, |
|
|
cacheWrite: result.cacheCreateCost, |
|
|
cacheRead: result.cacheReadCost, |
|
|
total: result.totalCost |
|
|
}, |
|
|
formatted: { |
|
|
input: this.formatCost(result.inputCost), |
|
|
output: this.formatCost(result.outputCost), |
|
|
cacheWrite: this.formatCost(result.cacheCreateCost), |
|
|
cacheRead: this.formatCost(result.cacheReadCost), |
|
|
total: this.formatCost(result.totalCost) |
|
|
}, |
|
|
debug: { |
|
|
isOpenAIModel: model.includes('gpt') || model.includes('o1'), |
|
|
hasCacheCreatePrice: !!result.pricing.cacheCreate, |
|
|
cacheCreateTokens: usage.cache_creation_input_tokens || 0, |
|
|
cacheWritePriceUsed: result.pricing.cacheCreate * 1000000, |
|
|
isLongContextModel: model && model.includes('[1m]'), |
|
|
isLongContextRequest: result.isLongContextRequest || false |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const inputTokens = usage.input_tokens || 0 |
|
|
const outputTokens = usage.output_tokens || 0 |
|
|
const cacheCreateTokens = usage.cache_creation_input_tokens || 0 |
|
|
const cacheReadTokens = usage.cache_read_input_tokens || 0 |
|
|
|
|
|
|
|
|
const pricingData = pricingService.getModelPricing(model) |
|
|
let pricing |
|
|
let usingDynamicPricing = false |
|
|
|
|
|
if (pricingData) { |
|
|
|
|
|
const inputPrice = (pricingData.input_cost_per_token || 0) * 1000000 |
|
|
const outputPrice = (pricingData.output_cost_per_token || 0) * 1000000 |
|
|
const cacheReadPrice = (pricingData.cache_read_input_token_cost || 0) * 1000000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let cacheWritePrice = (pricingData.cache_creation_input_token_cost || 0) * 1000000 |
|
|
|
|
|
|
|
|
const isOpenAIModel = |
|
|
model.includes('gpt') || model.includes('o1') || pricingData.litellm_provider === 'openai' |
|
|
|
|
|
if (isOpenAIModel && !pricingData.cache_creation_input_token_cost && cacheCreateTokens > 0) { |
|
|
|
|
|
cacheWritePrice = inputPrice |
|
|
} |
|
|
|
|
|
pricing = { |
|
|
input: inputPrice, |
|
|
output: outputPrice, |
|
|
cacheWrite: cacheWritePrice, |
|
|
cacheRead: cacheReadPrice |
|
|
} |
|
|
usingDynamicPricing = true |
|
|
} else { |
|
|
|
|
|
pricing = MODEL_PRICING[model] || MODEL_PRICING['unknown'] |
|
|
} |
|
|
|
|
|
|
|
|
const inputCost = (inputTokens / 1000000) * pricing.input |
|
|
const outputCost = (outputTokens / 1000000) * pricing.output |
|
|
const cacheWriteCost = (cacheCreateTokens / 1000000) * pricing.cacheWrite |
|
|
const cacheReadCost = (cacheReadTokens / 1000000) * pricing.cacheRead |
|
|
|
|
|
const totalCost = inputCost + outputCost + cacheWriteCost + cacheReadCost |
|
|
|
|
|
return { |
|
|
model, |
|
|
pricing, |
|
|
usingDynamicPricing, |
|
|
usage: { |
|
|
inputTokens, |
|
|
outputTokens, |
|
|
cacheCreateTokens, |
|
|
cacheReadTokens, |
|
|
totalTokens: inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens |
|
|
}, |
|
|
costs: { |
|
|
input: inputCost, |
|
|
output: outputCost, |
|
|
cacheWrite: cacheWriteCost, |
|
|
cacheRead: cacheReadCost, |
|
|
total: totalCost |
|
|
}, |
|
|
|
|
|
formatted: { |
|
|
input: this.formatCost(inputCost), |
|
|
output: this.formatCost(outputCost), |
|
|
cacheWrite: this.formatCost(cacheWriteCost), |
|
|
cacheRead: this.formatCost(cacheReadCost), |
|
|
total: this.formatCost(totalCost) |
|
|
}, |
|
|
|
|
|
debug: { |
|
|
isOpenAIModel: model.includes('gpt') || model.includes('o1'), |
|
|
hasCacheCreatePrice: !!pricingData?.cache_creation_input_token_cost, |
|
|
cacheCreateTokens, |
|
|
cacheWritePriceUsed: pricing.cacheWrite |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static calculateAggregatedCost(aggregatedUsage, model = 'unknown') { |
|
|
const usage = { |
|
|
input_tokens: aggregatedUsage.inputTokens || aggregatedUsage.totalInputTokens || 0, |
|
|
output_tokens: aggregatedUsage.outputTokens || aggregatedUsage.totalOutputTokens || 0, |
|
|
cache_creation_input_tokens: |
|
|
aggregatedUsage.cacheCreateTokens || aggregatedUsage.totalCacheCreateTokens || 0, |
|
|
cache_read_input_tokens: |
|
|
aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0 |
|
|
} |
|
|
|
|
|
return this.calculateCost(usage, model) |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static getModelPricing(model = 'unknown') { |
|
|
|
|
|
if (model === 'gpt-5-codex' && !MODEL_PRICING['gpt-5-codex']) { |
|
|
const gpt5Pricing = MODEL_PRICING['gpt-5'] |
|
|
if (gpt5Pricing) { |
|
|
console.log(`Using gpt-5 pricing as fallback for ${model}`) |
|
|
return gpt5Pricing |
|
|
} |
|
|
} |
|
|
return MODEL_PRICING[model] || MODEL_PRICING['unknown'] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static getAllModelPricing() { |
|
|
return { ...MODEL_PRICING } |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static isModelSupported(model) { |
|
|
return !!MODEL_PRICING[model] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static formatCost(cost, decimals = 6) { |
|
|
if (cost >= 1) { |
|
|
return `$${cost.toFixed(2)}` |
|
|
} else if (cost >= 0.001) { |
|
|
return `$${cost.toFixed(4)}` |
|
|
} else { |
|
|
return `$${cost.toFixed(decimals)}` |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static calculateCacheSavings(usage, model = 'unknown') { |
|
|
const pricing = this.getModelPricing(model) |
|
|
const cacheReadTokens = usage.cache_read_input_tokens || 0 |
|
|
|
|
|
|
|
|
const normalCost = (cacheReadTokens / 1000000) * pricing.input |
|
|
const cacheCost = (cacheReadTokens / 1000000) * pricing.cacheRead |
|
|
const savings = normalCost - cacheCost |
|
|
const savingsPercentage = normalCost > 0 ? (savings / normalCost) * 100 : 0 |
|
|
|
|
|
return { |
|
|
normalCost, |
|
|
cacheCost, |
|
|
savings, |
|
|
savingsPercentage, |
|
|
formatted: { |
|
|
normalCost: this.formatCost(normalCost), |
|
|
cacheCost: this.formatCost(cacheCost), |
|
|
savings: this.formatCost(savings), |
|
|
savingsPercentage: `${savingsPercentage.toFixed(1)}%` |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
module.exports = CostCalculator |
|
|
|