W / src /dashboard /stats.js
Ac66's picture
Upload folder using huggingface_hub
2b64d42 verified
/**
* Request statistics collector with debounced JSON persistence.
*/
import { readFileSync, existsSync } from 'fs';
import { writeJsonAtomic } from '../fs-atomic.js';
import { join } from 'path';
import { config } from '../config.js';
const STATS_FILE = join(config.dataDir, 'stats.json');
const _state = {
startedAt: Date.now(),
totalRequests: 0,
successCount: 0,
errorCount: 0,
modelCounts: {}, // { "gpt-4o-mini": { requests, success, errors, totalMs } }
accountCounts: {}, // { "abc123": { requests, success, errors } }
hourlyBuckets: [], // [{ hour: "2026-04-09T07:00:00Z", requests, errors }]
// v2.0.69 (#118 wnfilm) — bucket-level token totals so the dashboard
// can show fresh_input / cache_read / cache_write / output without
// having to recompute from the per-request usage stream. Keyed by
// bucket so summing across the proxy lifetime is just `totals[k]`.
tokenTotals: {
fresh_input: 0,
cache_read: 0,
cache_write: 0,
output: 0,
total: 0,
requests_with_usage: 0,
},
// v2.0.91 — track upstream rejection/cooldown events
policyBlockedCount: 0,
rateLimitedCount: 0,
};
// Load persisted stats
try {
if (existsSync(STATS_FILE)) {
const saved = JSON.parse(readFileSync(STATS_FILE, 'utf-8'));
Object.assign(_state, saved);
}
} catch {}
// Debounced save
let _saveTimer = null;
function scheduleSave() {
clearTimeout(_saveTimer);
_saveTimer = setTimeout(() => {
try {
writeJsonAtomic(STATS_FILE, _state);
} catch {}
}, 5000);
}
function getHourKey() {
const d = new Date();
d.setMinutes(0, 0, 0);
return d.toISOString();
}
/**
* Record a completed request.
*/
export function recordRequest(model, success, durationMs, accountId) {
_state.totalRequests++;
if (success) _state.successCount++;
else _state.errorCount++;
// Per-model stats (includes a small ring buffer for p50/p95 latency)
if (!_state.modelCounts[model]) {
_state.modelCounts[model] = { requests: 0, success: 0, errors: 0, totalMs: 0, recentMs: [] };
}
const mc = _state.modelCounts[model];
mc.requests++;
if (success) mc.success++;
else mc.errors++;
mc.totalMs += durationMs;
if (!mc.recentMs) mc.recentMs = [];
if (durationMs > 0) {
mc.recentMs.push(durationMs);
if (mc.recentMs.length > 200) mc.recentMs.shift();
}
// Per-account stats
if (accountId) {
const aid = typeof accountId === 'string' ? accountId.slice(0, 8) : String(accountId);
if (!_state.accountCounts[aid]) {
_state.accountCounts[aid] = { requests: 0, success: 0, errors: 0 };
}
const ac = _state.accountCounts[aid];
ac.requests++;
if (success) ac.success++;
else ac.errors++;
}
// Hourly bucket
const hourKey = getHourKey();
let bucket = _state.hourlyBuckets.find(b => b.hour === hourKey);
if (!bucket) {
bucket = { hour: hourKey, requests: 0, errors: 0 };
_state.hourlyBuckets.push(bucket);
// Keep last 30 days of hourly data (720 buckets)
if (_state.hourlyBuckets.length > 720) _state.hourlyBuckets.shift();
}
bucket.requests++;
if (!success) bucket.errors++;
scheduleSave();
}
function percentile(sortedArr, p) {
if (!sortedArr.length) return 0;
const idx = Math.min(sortedArr.length - 1, Math.floor(sortedArr.length * p));
return sortedArr[idx];
}
/** Get all stats, with computed latency percentiles per model. */
export function getStats() {
const out = { ..._state };
out.modelCounts = {};
for (const [m, s] of Object.entries(_state.modelCounts)) {
const sorted = (s.recentMs || []).slice().sort((a, b) => a - b);
out.modelCounts[m] = {
requests: s.requests,
success: s.success,
errors: s.errors,
totalMs: s.totalMs,
avgMs: s.requests > 0 ? Math.round(s.totalMs / s.requests) : 0,
p50Ms: Math.round(percentile(sorted, 0.5)),
p95Ms: Math.round(percentile(sorted, 0.95)),
};
}
return out;
}
/** Reset all stats. */
export function resetStats() {
_state.totalRequests = 0;
_state.successCount = 0;
_state.errorCount = 0;
_state.modelCounts = {};
_state.accountCounts = {};
_state.hourlyBuckets = [];
_state.tokenTotals = {
fresh_input: 0, cache_read: 0, cache_write: 0,
output: 0, total: 0, requests_with_usage: 0,
};
_state.startedAt = Date.now();
scheduleSave();
}
/**
* v2.0.69 (#118): record per-request token bucket totals so the dashboard
* can show real fresh-input vs cache-read vs cache-write breakdown
* instead of the conflated prompt_tokens number.
*
* Accepts the OpenAI-shaped usage object that buildUsageBody returns —
* cascade_breakdown is the authoritative source when present, otherwise
* fall back to standard fields.
*/
export function recordTokenUsage(usage) {
if (!usage || typeof usage !== 'object') return;
const bd = usage.cascade_breakdown || null;
const fresh = bd?.fresh_input_tokens ?? Math.max(0, (usage.prompt_tokens || 0) - (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0));
const cacheR = bd?.cache_read_tokens ?? (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0);
const cacheW = bd?.cache_write_tokens ?? (usage.cache_creation_input_tokens || 0);
const output = bd?.output_tokens ?? (usage.completion_tokens || usage.output_tokens || 0);
if (!fresh && !cacheR && !cacheW && !output) return;
if (!_state.tokenTotals) {
_state.tokenTotals = { fresh_input: 0, cache_read: 0, cache_write: 0, output: 0, total: 0, requests_with_usage: 0 };
}
_state.tokenTotals.fresh_input += fresh;
_state.tokenTotals.cache_read += cacheR;
_state.tokenTotals.cache_write += cacheW;
_state.tokenTotals.output += output;
_state.tokenTotals.total += fresh + cacheR + cacheW + output;
_state.tokenTotals.requests_with_usage += 1;
scheduleSave();
}
export function recordPolicyBlocked() {
_state.policyBlockedCount = (_state.policyBlockedCount || 0) + 1;
scheduleSave();
}
export function recordRateLimited() {
_state.rateLimitedCount = (_state.rateLimitedCount || 0) + 1;
scheduleSave();
}