File size: 6,105 Bytes
2b64d42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | /**
* Request statistics collector with debounced JSON persistence.
*/
import { readFileSync, existsSync } from 'fs';
import { writeJsonAtomic } from '../fs-atomic.js';
import { join } from 'path';
import { config } from '../config.js';
const STATS_FILE = join(config.dataDir, 'stats.json');
const _state = {
startedAt: Date.now(),
totalRequests: 0,
successCount: 0,
errorCount: 0,
modelCounts: {}, // { "gpt-4o-mini": { requests, success, errors, totalMs } }
accountCounts: {}, // { "abc123": { requests, success, errors } }
hourlyBuckets: [], // [{ hour: "2026-04-09T07:00:00Z", requests, errors }]
// v2.0.69 (#118 wnfilm) — bucket-level token totals so the dashboard
// can show fresh_input / cache_read / cache_write / output without
// having to recompute from the per-request usage stream. Keyed by
// bucket so summing across the proxy lifetime is just `totals[k]`.
tokenTotals: {
fresh_input: 0,
cache_read: 0,
cache_write: 0,
output: 0,
total: 0,
requests_with_usage: 0,
},
// v2.0.91 — track upstream rejection/cooldown events
policyBlockedCount: 0,
rateLimitedCount: 0,
};
// Load persisted stats
try {
if (existsSync(STATS_FILE)) {
const saved = JSON.parse(readFileSync(STATS_FILE, 'utf-8'));
Object.assign(_state, saved);
}
} catch {}
// Debounced save
let _saveTimer = null;
function scheduleSave() {
clearTimeout(_saveTimer);
_saveTimer = setTimeout(() => {
try {
writeJsonAtomic(STATS_FILE, _state);
} catch {}
}, 5000);
}
function getHourKey() {
const d = new Date();
d.setMinutes(0, 0, 0);
return d.toISOString();
}
/**
* Record a completed request.
*/
export function recordRequest(model, success, durationMs, accountId) {
_state.totalRequests++;
if (success) _state.successCount++;
else _state.errorCount++;
// Per-model stats (includes a small ring buffer for p50/p95 latency)
if (!_state.modelCounts[model]) {
_state.modelCounts[model] = { requests: 0, success: 0, errors: 0, totalMs: 0, recentMs: [] };
}
const mc = _state.modelCounts[model];
mc.requests++;
if (success) mc.success++;
else mc.errors++;
mc.totalMs += durationMs;
if (!mc.recentMs) mc.recentMs = [];
if (durationMs > 0) {
mc.recentMs.push(durationMs);
if (mc.recentMs.length > 200) mc.recentMs.shift();
}
// Per-account stats
if (accountId) {
const aid = typeof accountId === 'string' ? accountId.slice(0, 8) : String(accountId);
if (!_state.accountCounts[aid]) {
_state.accountCounts[aid] = { requests: 0, success: 0, errors: 0 };
}
const ac = _state.accountCounts[aid];
ac.requests++;
if (success) ac.success++;
else ac.errors++;
}
// Hourly bucket
const hourKey = getHourKey();
let bucket = _state.hourlyBuckets.find(b => b.hour === hourKey);
if (!bucket) {
bucket = { hour: hourKey, requests: 0, errors: 0 };
_state.hourlyBuckets.push(bucket);
// Keep last 30 days of hourly data (720 buckets)
if (_state.hourlyBuckets.length > 720) _state.hourlyBuckets.shift();
}
bucket.requests++;
if (!success) bucket.errors++;
scheduleSave();
}
function percentile(sortedArr, p) {
if (!sortedArr.length) return 0;
const idx = Math.min(sortedArr.length - 1, Math.floor(sortedArr.length * p));
return sortedArr[idx];
}
/** Get all stats, with computed latency percentiles per model. */
export function getStats() {
const out = { ..._state };
out.modelCounts = {};
for (const [m, s] of Object.entries(_state.modelCounts)) {
const sorted = (s.recentMs || []).slice().sort((a, b) => a - b);
out.modelCounts[m] = {
requests: s.requests,
success: s.success,
errors: s.errors,
totalMs: s.totalMs,
avgMs: s.requests > 0 ? Math.round(s.totalMs / s.requests) : 0,
p50Ms: Math.round(percentile(sorted, 0.5)),
p95Ms: Math.round(percentile(sorted, 0.95)),
};
}
return out;
}
/** Reset all stats. */
export function resetStats() {
_state.totalRequests = 0;
_state.successCount = 0;
_state.errorCount = 0;
_state.modelCounts = {};
_state.accountCounts = {};
_state.hourlyBuckets = [];
_state.tokenTotals = {
fresh_input: 0, cache_read: 0, cache_write: 0,
output: 0, total: 0, requests_with_usage: 0,
};
_state.startedAt = Date.now();
scheduleSave();
}
/**
* v2.0.69 (#118): record per-request token bucket totals so the dashboard
* can show real fresh-input vs cache-read vs cache-write breakdown
* instead of the conflated prompt_tokens number.
*
* Accepts the OpenAI-shaped usage object that buildUsageBody returns —
* cascade_breakdown is the authoritative source when present, otherwise
* fall back to standard fields.
*/
export function recordTokenUsage(usage) {
if (!usage || typeof usage !== 'object') return;
const bd = usage.cascade_breakdown || null;
const fresh = bd?.fresh_input_tokens ?? Math.max(0, (usage.prompt_tokens || 0) - (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0));
const cacheR = bd?.cache_read_tokens ?? (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0);
const cacheW = bd?.cache_write_tokens ?? (usage.cache_creation_input_tokens || 0);
const output = bd?.output_tokens ?? (usage.completion_tokens || usage.output_tokens || 0);
if (!fresh && !cacheR && !cacheW && !output) return;
if (!_state.tokenTotals) {
_state.tokenTotals = { fresh_input: 0, cache_read: 0, cache_write: 0, output: 0, total: 0, requests_with_usage: 0 };
}
_state.tokenTotals.fresh_input += fresh;
_state.tokenTotals.cache_read += cacheR;
_state.tokenTotals.cache_write += cacheW;
_state.tokenTotals.output += output;
_state.tokenTotals.total += fresh + cacheR + cacheW + output;
_state.tokenTotals.requests_with_usage += 1;
scheduleSave();
}
export function recordPolicyBlocked() {
_state.policyBlockedCount = (_state.policyBlockedCount || 0) + 1;
scheduleSave();
}
export function recordRateLimited() {
_state.rateLimitedCount = (_state.rateLimitedCount || 0) + 1;
scheduleSave();
}
|