Spaces:

Ac66
/

W

Sleeping

File size: 6,105 Bytes

2b64d42

/**
 * Request statistics collector with debounced JSON persistence.
 */

import { readFileSync, existsSync } from 'fs';
import { writeJsonAtomic } from '../fs-atomic.js';
import { join } from 'path';
import { config } from '../config.js';

const STATS_FILE = join(config.dataDir, 'stats.json');

const _state = {
  startedAt: Date.now(),
  totalRequests: 0,
  successCount: 0,
  errorCount: 0,
  modelCounts: {},    // { "gpt-4o-mini": { requests, success, errors, totalMs } }
  accountCounts: {},  // { "abc123": { requests, success, errors } }
  hourlyBuckets: [],  // [{ hour: "2026-04-09T07:00:00Z", requests, errors }]
  // v2.0.69 (#118 wnfilm) — bucket-level token totals so the dashboard
  // can show fresh_input / cache_read / cache_write / output without
  // having to recompute from the per-request usage stream. Keyed by
  // bucket so summing across the proxy lifetime is just `totals[k]`.
  tokenTotals: {
    fresh_input: 0,
    cache_read: 0,
    cache_write: 0,
    output: 0,
    total: 0,
    requests_with_usage: 0,
  },
  // v2.0.91 — track upstream rejection/cooldown events
  policyBlockedCount: 0,
  rateLimitedCount: 0,
};

// Load persisted stats
try {
  if (existsSync(STATS_FILE)) {
    const saved = JSON.parse(readFileSync(STATS_FILE, 'utf-8'));
    Object.assign(_state, saved);
  }
} catch {}

// Debounced save
let _saveTimer = null;
function scheduleSave() {
  clearTimeout(_saveTimer);
  _saveTimer = setTimeout(() => {
    try {
      writeJsonAtomic(STATS_FILE, _state);
    } catch {}
  }, 5000);
}

function getHourKey() {
  const d = new Date();
  d.setMinutes(0, 0, 0);
  return d.toISOString();
}

/**
 * Record a completed request.
 */
export function recordRequest(model, success, durationMs, accountId) {
  _state.totalRequests++;
  if (success) _state.successCount++;
  else _state.errorCount++;

  // Per-model stats (includes a small ring buffer for p50/p95 latency)
  if (!_state.modelCounts[model]) {
    _state.modelCounts[model] = { requests: 0, success: 0, errors: 0, totalMs: 0, recentMs: [] };
  }
  const mc = _state.modelCounts[model];
  mc.requests++;
  if (success) mc.success++;
  else mc.errors++;
  mc.totalMs += durationMs;
  if (!mc.recentMs) mc.recentMs = [];
  if (durationMs > 0) {
    mc.recentMs.push(durationMs);
    if (mc.recentMs.length > 200) mc.recentMs.shift();
  }

  // Per-account stats
  if (accountId) {
    const aid = typeof accountId === 'string' ? accountId.slice(0, 8) : String(accountId);
    if (!_state.accountCounts[aid]) {
      _state.accountCounts[aid] = { requests: 0, success: 0, errors: 0 };
    }
    const ac = _state.accountCounts[aid];
    ac.requests++;
    if (success) ac.success++;
    else ac.errors++;
  }

  // Hourly bucket
  const hourKey = getHourKey();
  let bucket = _state.hourlyBuckets.find(b => b.hour === hourKey);
  if (!bucket) {
    bucket = { hour: hourKey, requests: 0, errors: 0 };
    _state.hourlyBuckets.push(bucket);
    // Keep last 30 days of hourly data (720 buckets)
    if (_state.hourlyBuckets.length > 720) _state.hourlyBuckets.shift();
  }
  bucket.requests++;
  if (!success) bucket.errors++;

  scheduleSave();
}

function percentile(sortedArr, p) {
  if (!sortedArr.length) return 0;
  const idx = Math.min(sortedArr.length - 1, Math.floor(sortedArr.length * p));
  return sortedArr[idx];
}

/** Get all stats, with computed latency percentiles per model. */
export function getStats() {
  const out = { ..._state };
  out.modelCounts = {};
  for (const [m, s] of Object.entries(_state.modelCounts)) {
    const sorted = (s.recentMs || []).slice().sort((a, b) => a - b);
    out.modelCounts[m] = {
      requests: s.requests,
      success: s.success,
      errors: s.errors,
      totalMs: s.totalMs,
      avgMs: s.requests > 0 ? Math.round(s.totalMs / s.requests) : 0,
      p50Ms: Math.round(percentile(sorted, 0.5)),
      p95Ms: Math.round(percentile(sorted, 0.95)),
    };
  }
  return out;
}

/** Reset all stats. */
export function resetStats() {
  _state.totalRequests = 0;
  _state.successCount = 0;
  _state.errorCount = 0;
  _state.modelCounts = {};
  _state.accountCounts = {};
  _state.hourlyBuckets = [];
  _state.tokenTotals = {
    fresh_input: 0, cache_read: 0, cache_write: 0,
    output: 0, total: 0, requests_with_usage: 0,
  };
  _state.startedAt = Date.now();
  scheduleSave();
}

/**
 * v2.0.69 (#118): record per-request token bucket totals so the dashboard
 * can show real fresh-input vs cache-read vs cache-write breakdown
 * instead of the conflated prompt_tokens number.
 *
 * Accepts the OpenAI-shaped usage object that buildUsageBody returns —
 * cascade_breakdown is the authoritative source when present, otherwise
 * fall back to standard fields.
 */
export function recordTokenUsage(usage) {
  if (!usage || typeof usage !== 'object') return;
  const bd = usage.cascade_breakdown || null;
  const fresh = bd?.fresh_input_tokens ?? Math.max(0, (usage.prompt_tokens || 0) - (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0));
  const cacheR = bd?.cache_read_tokens ?? (usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || 0);
  const cacheW = bd?.cache_write_tokens ?? (usage.cache_creation_input_tokens || 0);
  const output = bd?.output_tokens ?? (usage.completion_tokens || usage.output_tokens || 0);
  if (!fresh && !cacheR && !cacheW && !output) return;
  if (!_state.tokenTotals) {
    _state.tokenTotals = { fresh_input: 0, cache_read: 0, cache_write: 0, output: 0, total: 0, requests_with_usage: 0 };
  }
  _state.tokenTotals.fresh_input += fresh;
  _state.tokenTotals.cache_read += cacheR;
  _state.tokenTotals.cache_write += cacheW;
  _state.tokenTotals.output += output;
  _state.tokenTotals.total += fresh + cacheR + cacheW + output;
  _state.tokenTotals.requests_with_usage += 1;
  scheduleSave();
}

export function recordPolicyBlocked() {
  _state.policyBlockedCount = (_state.policyBlockedCount || 0) + 1;
  scheduleSave();
}

export function recordRateLimited() {
  _state.rateLimitedCount = (_state.rateLimitedCount || 0) + 1;
  scheduleSave();
}