Spaces:

Ac66
/

W

Sleeping

App Files Files Community

W / src /dashboard /stats.js

Ac66's picture

Upload folder using huggingface_hub

2b64d42 verified 11 days ago

history blame contribute delete

6.11 kB

	/**
	* Request statistics collector with debounced JSON persistence.
	*/

	import { readFileSync, existsSync } from 'fs';
	import { writeJsonAtomic } from '../fs-atomic.js';
	import { join } from 'path';
	import { config } from '../config.js';

	const STATS_FILE = join(config.dataDir, 'stats.json');

	const _state = {
	startedAt: Date.now(),
	totalRequests: 0,
	successCount: 0,
	errorCount: 0,
	modelCounts: {}, // { "gpt-4o-mini": { requests, success, errors, totalMs } }
	accountCounts: {}, // { "abc123": { requests, success, errors } }
	hourlyBuckets: [], // [{ hour: "2026-04-09T07:00:00Z", requests, errors }]
	// v2.0.69 (#118 wnfilm) — bucket-level token totals so the dashboard
	// can show fresh_input / cache_read / cache_write / output without
	// having to recompute from the per-request usage stream. Keyed by
	// bucket so summing across the proxy lifetime is just `totals[k]`.
	tokenTotals: {
	fresh_input: 0,
	cache_read: 0,
	cache_write: 0,
	output: 0,
	total: 0,
	requests_with_usage: 0,
	},
	// v2.0.91 — track upstream rejection/cooldown events
	policyBlockedCount: 0,
	rateLimitedCount: 0,
	};

	// Load persisted stats
	try {
	if (existsSync(STATS_FILE)) {
	const saved = JSON.parse(readFileSync(STATS_FILE, 'utf-8'));
	Object.assign(_state, saved);
	}
	} catch {}

	// Debounced save
	let _saveTimer = null;
	function scheduleSave() {
	clearTimeout(_saveTimer);
	_saveTimer = setTimeout(() => {
	try {
	writeJsonAtomic(STATS_FILE, _state);
	} catch {}
	}, 5000);
	}

	function getHourKey() {
	const d = new Date();
	d.setMinutes(0, 0, 0);
	return d.toISOString();
	}

	/**
	* Record a completed request.
	*/
	export function recordRequest(model, success, durationMs, accountId) {
	_state.totalRequests++;
	if (success) _state.successCount++;
	else _state.errorCount++;

	// Per-model stats (includes a small ring buffer for p50/p95 latency)
	if (!_state.modelCounts[model]) {
	_state.modelCounts[model] = { requests: 0, success: 0, errors: 0, totalMs: 0, recentMs: [] };
	}
	const mc = _state.modelCounts[model];
	mc.requests++;
	if (success) mc.success++;
	else mc.errors++;
	mc.totalMs += durationMs;
	if (!mc.recentMs) mc.recentMs = [];
	if (durationMs > 0) {
	mc.recentMs.push(durationMs);
	if (mc.recentMs.length > 200) mc.recentMs.shift();
	}

	// Per-account stats
	if (accountId) {
	const aid = typeof accountId === 'string' ? accountId.slice(0, 8) : String(accountId);
	if (!_state.accountCounts[aid]) {
	_state.accountCounts[aid] = { requests: 0, success: 0, errors: 0 };
	}
	const ac = _state.accountCounts[aid];
	ac.requests++;
	if (success) ac.success++;
	else ac.errors++;
	}

	// Hourly bucket
	const hourKey = getHourKey();
	let bucket = _state.hourlyBuckets.find(b => b.hour === hourKey);
	if (!bucket) {
	bucket = { hour: hourKey, requests: 0, errors: 0 };
	_state.hourlyBuckets.push(bucket);
	// Keep last 30 days of hourly data (720 buckets)
	if (_state.hourlyBuckets.length > 720) _state.hourlyBuckets.shift();
	}
	bucket.requests++;
	if (!success) bucket.errors++;

	scheduleSave();
	}

	function percentile(sortedArr, p) {
	if (!sortedArr.length) return 0;
	const idx = Math.min(sortedArr.length - 1, Math.floor(sortedArr.length * p));
	return sortedArr[idx];
	}

	/** Get all stats, with computed latency percentiles per model. */
	export function getStats() {
	const out = { ..._state };
	out.modelCounts = {};
	for (const [m, s] of Object.entries(_state.modelCounts)) {
	const sorted = (s.recentMs \|\| []).slice().sort((a, b) => a - b);
	out.modelCounts[m] = {
	requests: s.requests,
	success: s.success,
	errors: s.errors,
	totalMs: s.totalMs,
	avgMs: s.requests > 0 ? Math.round(s.totalMs / s.requests) : 0,
	p50Ms: Math.round(percentile(sorted, 0.5)),
	p95Ms: Math.round(percentile(sorted, 0.95)),
	};
	}
	return out;
	}

	/** Reset all stats. */
	export function resetStats() {
	_state.totalRequests = 0;
	_state.successCount = 0;
	_state.errorCount = 0;
	_state.modelCounts = {};
	_state.accountCounts = {};
	_state.hourlyBuckets = [];
	_state.tokenTotals = {
	fresh_input: 0, cache_read: 0, cache_write: 0,
	output: 0, total: 0, requests_with_usage: 0,
	};
	_state.startedAt = Date.now();
	scheduleSave();
	}

	/**
	* v2.0.69 (#118): record per-request token bucket totals so the dashboard
	* can show real fresh-input vs cache-read vs cache-write breakdown
	* instead of the conflated prompt_tokens number.
	*
	* Accepts the OpenAI-shaped usage object that buildUsageBody returns —
	* cascade_breakdown is the authoritative source when present, otherwise
	* fall back to standard fields.
	*/
	export function recordTokenUsage(usage) {
	if (!usage \|\| typeof usage !== 'object') return;
	const bd = usage.cascade_breakdown \|\| null;
	const fresh = bd?.fresh_input_tokens ?? Math.max(0, (usage.prompt_tokens \|\| 0) - (usage.prompt_tokens_details?.cached_tokens \|\| usage.cache_read_input_tokens \|\| 0));
	const cacheR = bd?.cache_read_tokens ?? (usage.prompt_tokens_details?.cached_tokens \|\| usage.cache_read_input_tokens \|\| 0);
	const cacheW = bd?.cache_write_tokens ?? (usage.cache_creation_input_tokens \|\| 0);
	const output = bd?.output_tokens ?? (usage.completion_tokens \|\| usage.output_tokens \|\| 0);
	if (!fresh && !cacheR && !cacheW && !output) return;
	if (!_state.tokenTotals) {
	_state.tokenTotals = { fresh_input: 0, cache_read: 0, cache_write: 0, output: 0, total: 0, requests_with_usage: 0 };
	}
	_state.tokenTotals.fresh_input += fresh;
	_state.tokenTotals.cache_read += cacheR;
	_state.tokenTotals.cache_write += cacheW;
	_state.tokenTotals.output += output;
	_state.tokenTotals.total += fresh + cacheR + cacheW + output;
	_state.tokenTotals.requests_with_usage += 1;
	scheduleSave();
	}

	export function recordPolicyBlocked() {
	_state.policyBlockedCount = (_state.policyBlockedCount \|\| 0) + 1;
	scheduleSave();
	}

	export function recordRateLimited() {
	_state.rateLimitedCount = (_state.rateLimitedCount \|\| 0) + 1;
	scheduleSave();
	}