icebear icebear0828 Claude Opus 4.6 commited on
Commit
fadda70
·
unverified ·
1 Parent(s): b87bedf

fix: pass through cached_tokens from Codex API (#58)

Browse files

* fix: pass through cached_tokens and reasoning_tokens from Codex API (#55)

The proxy was only extracting input_tokens/output_tokens from Codex
response usage, dropping input_tokens_details.cached_tokens and
output_tokens_details.reasoning_tokens. This caused cached token
counts to always show as 0 for downstream clients.

Fixed across all three output formats (OpenAI, Anthropic, Gemini)
in both streaming and non-streaming modes.

Closes #55

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: pass reasoning_tokens through onUsage in Anthropic/Gemini translators

The Anthropic and Gemini streaming translators were not forwarding
reasoning_tokens via onUsage, unlike the OpenAI translator. Also
fixes Gemini non-streaming path leaking cached_tokens: undefined
by using conditional spread consistent with other translators.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: icebear0828 <icebear0828@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>

src/routes/shared/proxy-handler.ts CHANGED
@@ -35,7 +35,7 @@ export interface FormatAdapter {
35
  api: CodexApi,
36
  response: Response,
37
  model: string,
38
- onUsage: (u: { input_tokens: number; output_tokens: number }) => void,
39
  onResponseId: (id: string) => void,
40
  ) => AsyncGenerator<string>;
41
  collectTranslator: (
@@ -44,7 +44,7 @@ export interface FormatAdapter {
44
  model: string,
45
  ) => Promise<{
46
  response: unknown;
47
- usage: { input_tokens: number; output_tokens: number };
48
  responseId: string | null;
49
  }>;
50
  }
@@ -94,7 +94,7 @@ export async function handleProxyRequest(
94
  JSON.stringify(req.codexRequest).slice(0, 300),
95
  );
96
 
97
- let usageInfo: { input_tokens: number; output_tokens: number } | undefined;
98
 
99
  // P0-2: AbortController to kill curl when client disconnects
100
  const abortController = new AbortController();
 
35
  api: CodexApi,
36
  response: Response,
37
  model: string,
38
+ onUsage: (u: { input_tokens: number; output_tokens: number; cached_tokens?: number; reasoning_tokens?: number }) => void,
39
  onResponseId: (id: string) => void,
40
  ) => AsyncGenerator<string>;
41
  collectTranslator: (
 
44
  model: string,
45
  ) => Promise<{
46
  response: unknown;
47
+ usage: { input_tokens: number; output_tokens: number; cached_tokens?: number; reasoning_tokens?: number };
48
  responseId: string | null;
49
  }>;
50
  }
 
94
  JSON.stringify(req.codexRequest).slice(0, 300),
95
  );
96
 
97
+ let usageInfo: { input_tokens: number; output_tokens: number; cached_tokens?: number; reasoning_tokens?: number } | undefined;
98
 
99
  // P0-2: AbortController to kill curl when client disconnects
100
  const abortController = new AbortController();
src/translation/codex-event-extractor.ts CHANGED
@@ -14,6 +14,8 @@ import {
14
  export interface UsageInfo {
15
  input_tokens: number;
16
  output_tokens: number;
 
 
17
  }
18
 
19
  export interface FunctionCallStart {
 
14
  export interface UsageInfo {
15
  input_tokens: number;
16
  output_tokens: number;
17
+ cached_tokens?: number;
18
+ reasoning_tokens?: number;
19
  }
20
 
21
  export interface FunctionCallStart {
src/translation/codex-to-anthropic.ts CHANGED
@@ -22,6 +22,8 @@ import { iterateCodexEvents, EmptyResponseError } from "./codex-event-extractor.
22
  export interface AnthropicUsageInfo {
23
  input_tokens: number;
24
  output_tokens: number;
 
 
25
  }
26
 
27
  /** Format an Anthropic SSE event with named event type */
@@ -47,6 +49,7 @@ export async function* streamCodexToAnthropic(
47
  const msgId = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
48
  let outputTokens = 0;
49
  let inputTokens = 0;
 
50
  let hasToolCalls = false;
51
  let hasContent = false;
52
  let contentIndex = 0;
@@ -218,7 +221,8 @@ export async function* streamCodexToAnthropic(
218
  if (evt.usage) {
219
  inputTokens = evt.usage.input_tokens;
220
  outputTokens = evt.usage.output_tokens;
221
- onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens });
 
222
  }
223
  // Inject error text if stream completed with no content
224
  if (!hasContent) {
@@ -242,7 +246,11 @@ export async function* streamCodexToAnthropic(
242
  yield formatSSE("message_delta", {
243
  type: "message_delta",
244
  delta: { stop_reason: hasToolCalls ? "tool_use" : "end_turn" },
245
- usage: { input_tokens: inputTokens, output_tokens: outputTokens },
 
 
 
 
246
  });
247
 
248
  // 5. message_stop
@@ -270,6 +278,7 @@ export async function collectCodexToAnthropicResponse(
270
  let fullReasoning = "";
271
  let inputTokens = 0;
272
  let outputTokens = 0;
 
273
  let responseId: string | null = null;
274
 
275
  // Collect tool calls
@@ -285,6 +294,7 @@ export async function collectCodexToAnthropicResponse(
285
  if (evt.usage) {
286
  inputTokens = evt.usage.input_tokens;
287
  outputTokens = evt.usage.output_tokens;
 
288
  }
289
  if (evt.functionCallDone) {
290
  let parsedInput: Record<string, unknown> = {};
@@ -323,6 +333,7 @@ export async function collectCodexToAnthropicResponse(
323
  const usage: AnthropicUsage = {
324
  input_tokens: inputTokens,
325
  output_tokens: outputTokens,
 
326
  };
327
 
328
  return {
 
22
  export interface AnthropicUsageInfo {
23
  input_tokens: number;
24
  output_tokens: number;
25
+ cached_tokens?: number;
26
+ reasoning_tokens?: number;
27
  }
28
 
29
  /** Format an Anthropic SSE event with named event type */
 
49
  const msgId = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
50
  let outputTokens = 0;
51
  let inputTokens = 0;
52
+ let cachedTokens: number | undefined;
53
  let hasToolCalls = false;
54
  let hasContent = false;
55
  let contentIndex = 0;
 
221
  if (evt.usage) {
222
  inputTokens = evt.usage.input_tokens;
223
  outputTokens = evt.usage.output_tokens;
224
+ cachedTokens = evt.usage.cached_tokens;
225
+ onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens, cached_tokens: cachedTokens, reasoning_tokens: evt.usage.reasoning_tokens });
226
  }
227
  // Inject error text if stream completed with no content
228
  if (!hasContent) {
 
246
  yield formatSSE("message_delta", {
247
  type: "message_delta",
248
  delta: { stop_reason: hasToolCalls ? "tool_use" : "end_turn" },
249
+ usage: {
250
+ input_tokens: inputTokens,
251
+ output_tokens: outputTokens,
252
+ ...(cachedTokens != null ? { cache_read_input_tokens: cachedTokens } : {}),
253
+ },
254
  });
255
 
256
  // 5. message_stop
 
278
  let fullReasoning = "";
279
  let inputTokens = 0;
280
  let outputTokens = 0;
281
+ let cachedTokens: number | undefined;
282
  let responseId: string | null = null;
283
 
284
  // Collect tool calls
 
294
  if (evt.usage) {
295
  inputTokens = evt.usage.input_tokens;
296
  outputTokens = evt.usage.output_tokens;
297
+ cachedTokens = evt.usage.cached_tokens;
298
  }
299
  if (evt.functionCallDone) {
300
  let parsedInput: Record<string, unknown> = {};
 
333
  const usage: AnthropicUsage = {
334
  input_tokens: inputTokens,
335
  output_tokens: outputTokens,
336
+ ...(cachedTokens != null ? { cache_read_input_tokens: cachedTokens } : {}),
337
  };
338
 
339
  return {
src/translation/codex-to-gemini.ts CHANGED
@@ -20,6 +20,8 @@ import { iterateCodexEvents, EmptyResponseError } from "./codex-event-extractor.
20
  export interface GeminiUsageInfo {
21
  input_tokens: number;
22
  output_tokens: number;
 
 
23
  }
24
 
25
  /**
@@ -35,6 +37,7 @@ export async function* streamCodexToGemini(
35
  ): AsyncGenerator<string> {
36
  let inputTokens = 0;
37
  let outputTokens = 0;
 
38
  let hasContent = false;
39
 
40
  for await (const evt of iterateCodexEvents(codexApi, rawResponse)) {
@@ -112,7 +115,8 @@ export async function* streamCodexToGemini(
112
  if (evt.usage) {
113
  inputTokens = evt.usage.input_tokens;
114
  outputTokens = evt.usage.output_tokens;
115
- onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens });
 
116
  }
117
 
118
  // Inject error text if stream completed with no content
@@ -148,6 +152,7 @@ export async function* streamCodexToGemini(
148
  promptTokenCount: inputTokens,
149
  candidatesTokenCount: outputTokens,
150
  totalTokenCount: inputTokens + outputTokens,
 
151
  },
152
  modelVersion: model,
153
  };
@@ -174,6 +179,7 @@ export async function collectCodexToGeminiResponse(
174
  let fullText = "";
175
  let inputTokens = 0;
176
  let outputTokens = 0;
 
177
  let responseId: string | null = null;
178
  const functionCallParts: GeminiPart[] = [];
179
 
@@ -186,6 +192,7 @@ export async function collectCodexToGeminiResponse(
186
  if (evt.usage) {
187
  inputTokens = evt.usage.input_tokens;
188
  outputTokens = evt.usage.output_tokens;
 
189
  }
190
  if (evt.functionCallDone) {
191
  let args: Record<string, unknown> = {};
@@ -201,12 +208,14 @@ export async function collectCodexToGeminiResponse(
201
  const usage: GeminiUsageInfo = {
202
  input_tokens: inputTokens,
203
  output_tokens: outputTokens,
 
204
  };
205
 
206
  const usageMetadata: GeminiUsageMetadata = {
207
  promptTokenCount: inputTokens,
208
  candidatesTokenCount: outputTokens,
209
  totalTokenCount: inputTokens + outputTokens,
 
210
  };
211
 
212
  // Detect empty response (HTTP 200 but no content)
 
20
  export interface GeminiUsageInfo {
21
  input_tokens: number;
22
  output_tokens: number;
23
+ cached_tokens?: number;
24
+ reasoning_tokens?: number;
25
  }
26
 
27
  /**
 
37
  ): AsyncGenerator<string> {
38
  let inputTokens = 0;
39
  let outputTokens = 0;
40
+ let cachedTokens: number | undefined;
41
  let hasContent = false;
42
 
43
  for await (const evt of iterateCodexEvents(codexApi, rawResponse)) {
 
115
  if (evt.usage) {
116
  inputTokens = evt.usage.input_tokens;
117
  outputTokens = evt.usage.output_tokens;
118
+ cachedTokens = evt.usage.cached_tokens;
119
+ onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens, cached_tokens: cachedTokens, reasoning_tokens: evt.usage.reasoning_tokens });
120
  }
121
 
122
  // Inject error text if stream completed with no content
 
152
  promptTokenCount: inputTokens,
153
  candidatesTokenCount: outputTokens,
154
  totalTokenCount: inputTokens + outputTokens,
155
+ ...(cachedTokens != null ? { cachedContentTokenCount: cachedTokens } : {}),
156
  },
157
  modelVersion: model,
158
  };
 
179
  let fullText = "";
180
  let inputTokens = 0;
181
  let outputTokens = 0;
182
+ let cachedTokens: number | undefined;
183
  let responseId: string | null = null;
184
  const functionCallParts: GeminiPart[] = [];
185
 
 
192
  if (evt.usage) {
193
  inputTokens = evt.usage.input_tokens;
194
  outputTokens = evt.usage.output_tokens;
195
+ cachedTokens = evt.usage.cached_tokens;
196
  }
197
  if (evt.functionCallDone) {
198
  let args: Record<string, unknown> = {};
 
208
  const usage: GeminiUsageInfo = {
209
  input_tokens: inputTokens,
210
  output_tokens: outputTokens,
211
+ ...(cachedTokens != null ? { cached_tokens: cachedTokens } : {}),
212
  };
213
 
214
  const usageMetadata: GeminiUsageMetadata = {
215
  promptTokenCount: inputTokens,
216
  candidatesTokenCount: outputTokens,
217
  totalTokenCount: inputTokens + outputTokens,
218
+ ...(cachedTokens != null ? { cachedContentTokenCount: cachedTokens } : {}),
219
  };
220
 
221
  // Detect empty response (HTTP 200 but no content)
src/translation/codex-to-openai.ts CHANGED
@@ -240,6 +240,20 @@ export async function* streamCodexToOpenAI(
240
  ],
241
  });
242
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  yield formatSSE({
244
  id: chunkId,
245
  object: "chat.completion.chunk",
@@ -252,6 +266,7 @@ export async function* streamCodexToOpenAI(
252
  finish_reason: hasToolCalls ? "tool_calls" : "stop",
253
  },
254
  ],
 
255
  });
256
  break;
257
  }
@@ -278,6 +293,8 @@ export async function collectCodexResponse(
278
  let fullReasoning = "";
279
  let promptTokens = 0;
280
  let completionTokens = 0;
 
 
281
  let responseId: string | null = null;
282
 
283
  // Collect tool calls
@@ -293,6 +310,8 @@ export async function collectCodexResponse(
293
  if (evt.usage) {
294
  promptTokens = evt.usage.input_tokens;
295
  completionTokens = evt.usage.output_tokens;
 
 
296
  }
297
  if (evt.functionCallDone) {
298
  toolCalls.push({
@@ -340,11 +359,19 @@ export async function collectCodexResponse(
340
  prompt_tokens: promptTokens,
341
  completion_tokens: completionTokens,
342
  total_tokens: promptTokens + completionTokens,
 
 
 
 
 
 
343
  },
344
  },
345
  usage: {
346
  input_tokens: promptTokens,
347
  output_tokens: completionTokens,
 
 
348
  },
349
  responseId,
350
  };
 
240
  ],
241
  });
242
  }
243
+ // Build usage object for final chunk (OpenAI includes usage in last streaming chunk)
244
+ const chunkUsage: ChatCompletionChunk["usage"] = evt.usage
245
+ ? {
246
+ prompt_tokens: evt.usage.input_tokens,
247
+ completion_tokens: evt.usage.output_tokens,
248
+ total_tokens: evt.usage.input_tokens + evt.usage.output_tokens,
249
+ ...(evt.usage.cached_tokens != null
250
+ ? { prompt_tokens_details: { cached_tokens: evt.usage.cached_tokens } }
251
+ : {}),
252
+ ...(evt.usage.reasoning_tokens != null
253
+ ? { completion_tokens_details: { reasoning_tokens: evt.usage.reasoning_tokens } }
254
+ : {}),
255
+ }
256
+ : null;
257
  yield formatSSE({
258
  id: chunkId,
259
  object: "chat.completion.chunk",
 
266
  finish_reason: hasToolCalls ? "tool_calls" : "stop",
267
  },
268
  ],
269
+ usage: chunkUsage,
270
  });
271
  break;
272
  }
 
293
  let fullReasoning = "";
294
  let promptTokens = 0;
295
  let completionTokens = 0;
296
+ let cachedTokens: number | undefined;
297
+ let reasoningTokens: number | undefined;
298
  let responseId: string | null = null;
299
 
300
  // Collect tool calls
 
310
  if (evt.usage) {
311
  promptTokens = evt.usage.input_tokens;
312
  completionTokens = evt.usage.output_tokens;
313
+ cachedTokens = evt.usage.cached_tokens;
314
+ reasoningTokens = evt.usage.reasoning_tokens;
315
  }
316
  if (evt.functionCallDone) {
317
  toolCalls.push({
 
359
  prompt_tokens: promptTokens,
360
  completion_tokens: completionTokens,
361
  total_tokens: promptTokens + completionTokens,
362
+ ...(cachedTokens != null
363
+ ? { prompt_tokens_details: { cached_tokens: cachedTokens } }
364
+ : {}),
365
+ ...(reasoningTokens != null
366
+ ? { completion_tokens_details: { reasoning_tokens: reasoningTokens } }
367
+ : {}),
368
  },
369
  },
370
  usage: {
371
  input_tokens: promptTokens,
372
  output_tokens: completionTokens,
373
+ cached_tokens: cachedTokens,
374
+ reasoning_tokens: reasoningTokens,
375
  },
376
  responseId,
377
  };
src/types/anthropic.ts CHANGED
@@ -133,6 +133,8 @@ export interface AnthropicContentBlock {
133
  export interface AnthropicUsage {
134
  input_tokens: number;
135
  output_tokens: number;
 
 
136
  }
137
 
138
  export interface AnthropicMessagesResponse {
 
133
  export interface AnthropicUsage {
134
  input_tokens: number;
135
  output_tokens: number;
136
+ cache_creation_input_tokens?: number;
137
+ cache_read_input_tokens?: number;
138
  }
139
 
140
  export interface AnthropicMessagesResponse {
src/types/codex-events.ts CHANGED
@@ -14,6 +14,8 @@ export interface CodexResponseData {
14
  usage?: {
15
  input_tokens: number;
16
  output_tokens: number;
 
 
17
  };
18
  [key: string]: unknown;
19
  }
@@ -156,6 +158,16 @@ function parseResponseData(data: unknown): CodexResponseData | undefined {
156
  input_tokens: typeof resp.usage.input_tokens === "number" ? resp.usage.input_tokens : 0,
157
  output_tokens: typeof resp.usage.output_tokens === "number" ? resp.usage.output_tokens : 0,
158
  };
 
 
 
 
 
 
 
 
 
 
159
  }
160
  return result;
161
  }
 
14
  usage?: {
15
  input_tokens: number;
16
  output_tokens: number;
17
+ cached_tokens?: number;
18
+ reasoning_tokens?: number;
19
  };
20
  [key: string]: unknown;
21
  }
 
158
  input_tokens: typeof resp.usage.input_tokens === "number" ? resp.usage.input_tokens : 0,
159
  output_tokens: typeof resp.usage.output_tokens === "number" ? resp.usage.output_tokens : 0,
160
  };
161
+ // Extract cached_tokens from input_tokens_details
162
+ const inputDetails = isRecord(resp.usage.input_tokens_details) ? resp.usage.input_tokens_details : undefined;
163
+ if (inputDetails && typeof inputDetails.cached_tokens === "number") {
164
+ result.usage.cached_tokens = inputDetails.cached_tokens;
165
+ }
166
+ // Extract reasoning_tokens from output_tokens_details
167
+ const outputDetails = isRecord(resp.usage.output_tokens_details) ? resp.usage.output_tokens_details : undefined;
168
+ if (outputDetails && typeof outputDetails.reasoning_tokens === "number") {
169
+ result.usage.reasoning_tokens = outputDetails.reasoning_tokens;
170
+ }
171
  }
172
  return result;
173
  }
src/types/gemini.ts CHANGED
@@ -105,6 +105,7 @@ export interface GeminiUsageMetadata {
105
  promptTokenCount: number;
106
  candidatesTokenCount: number;
107
  totalTokenCount: number;
 
108
  }
109
 
110
  export interface GeminiGenerateContentResponse {
 
105
  promptTokenCount: number;
106
  candidatesTokenCount: number;
107
  totalTokenCount: number;
108
+ cachedContentTokenCount?: number;
109
  }
110
 
111
  export interface GeminiGenerateContentResponse {
src/types/openai.ts CHANGED
@@ -101,6 +101,12 @@ export interface ChatCompletionUsage {
101
  prompt_tokens: number;
102
  completion_tokens: number;
103
  total_tokens: number;
 
 
 
 
 
 
104
  }
105
 
106
  export interface ChatCompletionResponse {
@@ -143,6 +149,7 @@ export interface ChatCompletionChunk {
143
  created: number;
144
  model: string;
145
  choices: ChatCompletionChunkChoice[];
 
146
  }
147
 
148
  // --- Error ---
 
101
  prompt_tokens: number;
102
  completion_tokens: number;
103
  total_tokens: number;
104
+ prompt_tokens_details?: {
105
+ cached_tokens?: number;
106
+ };
107
+ completion_tokens_details?: {
108
+ reasoning_tokens?: number;
109
+ };
110
  }
111
 
112
  export interface ChatCompletionResponse {
 
149
  created: number;
150
  model: string;
151
  choices: ChatCompletionChunkChoice[];
152
+ usage?: ChatCompletionUsage | null;
153
  }
154
 
155
  // --- Error ---