magicboris commited on
Commit
90bd459
·
verified ·
1 Parent(s): b70ac17

Create deno_julep_proxy.ts

Browse files
Files changed (1) hide show
  1. deno_julep_proxy.ts +569 -0
deno_julep_proxy.ts ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // deno_julep_proxy.ts
2
+
3
+ import { serve } from "https://deno.land/std@0.224.0/http/server.ts"; // Deno standard HTTP server
4
+
5
+ // Julep API Base URL (fixed)
6
+ const JULEP_API_BASE = "https://api.julep.ai/api";
7
+
8
+ // Hardcoded list of models (Agent IDs in this context)
9
+ const HARDCODED_MODELS = [
10
+ 'mistral-large-2411', 'o1', 'text-embedding-3-large', 'vertex_ai/text-embedding-004',
11
+ 'claude-3.5-haiku', 'cerebras/llama-4-scout-17b-16e-instruct', 'llama-3.1-8b',
12
+ 'magnum-v4-72b', 'voyage-multilingual-2', 'claude-3-haiku', 'gpt-4o',
13
+ 'BAAI/bge-m3', 'openrouter/meta-llama/llama-4-maverick', 'openrouter/meta-llama/llama-4-scout',
14
+ 'claude-3.5-sonnet', 'hermes-3-llama-3.1-70b', 'claude-3.5-sonnet-20240620',
15
+ 'qwen-2.5-72b-instruct', 'l3.3-euryale-70b', 'gpt-4o-mini', 'cerebras/llama-3.3-70b',
16
+ 'o1-preview', 'gemini-1.5-pro-latest', 'l3.1-euryale-70b', 'claude-3-sonnet',
17
+ 'Alibaba-NLP/gte-large-en-v1.5', 'openrouter/meta-llama/llama-4-scout:free',
18
+ 'llama-3.1-70b', 'eva-qwen-2.5-72b', 'claude-3.5-sonnet-20241022', 'gemini-2.0-flash',
19
+ 'deepseek-chat', 'o1-mini', 'eva-llama-3.33-70b', 'gemini-2.5-pro-preview-03-25',
20
+ 'gemini-1.5-pro', 'gpt-4-turbo', 'openrouter/meta-llama/llama-4-maverick:free',
21
+ 'o3-mini', 'claude-3.7-sonnet', 'voyage-3', 'cerebras/llama-3.1-8b', 'claude-3-opus'
22
+ ];
23
+
24
+ // --- Helper Functions ---
25
+
26
+ // Define acceptable log levels
27
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'trace'; // Added trace
28
+
29
+ function log(level: LogLevel, message: string, data: unknown = null): void {
30
+ // Basic check if the console object has the method
31
+ if (typeof console[level] === 'function') {
32
+ console[level](`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : '');
33
+ } else {
34
+ // Fallback for potentially missing methods like 'trace' in some environments
35
+ console.log(`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : '');
36
+ }
37
+ }
38
+
39
+
40
+ function getJulepApiKey(req: Request): string | null {
41
+ const authHeader = req.headers.get("Authorization");
42
+ if (authHeader && authHeader.startsWith("Bearer ")) {
43
+ log('debug', 'Extracted Julep API Key successfully.');
44
+ return authHeader.substring(7);
45
+ }
46
+ log('warn', 'Could not extract Julep API Key from Authorization header.');
47
+ return null;
48
+ }
49
+
50
+ // Note: Removed 'ctx' argument, using fire-and-forget for background tasks
51
+ async function cleanupJulepResources(agentId: string | null, sessionId: string | null, headers: HeadersInit): Promise<void> {
52
+ log('info', 'Attempting Julep resource cleanup.', { agentId, sessionId });
53
+ const cleanupPromises: Promise<void>[] = [];
54
+
55
+ // Define cleanup logic as separate async functions for clarity
56
+ const deleteResource = async (url: string, type: string, id: string) => {
57
+ try {
58
+ log('debug', `Sending DELETE request for ${type} ${id} to: ${url}`);
59
+ const response = await fetch(url, { method: "DELETE", headers });
60
+ const responseText = await response.text(); // Get text regardless of status
61
+ if (!response.ok) {
62
+ log('warn', `Cleanup failed for ${type} ${id}: ${response.status} ${response.statusText}`, { body: responseText });
63
+ } else {
64
+ log('info', `Cleanup successful for ${type} ${id}.`, { status: response.status, body: responseText });
65
+ }
66
+ } catch (err) {
67
+ log('error', `Cleanup error during fetch for ${type} ${id}: ${err instanceof Error ? err.message : String(err)}`, { error: err });
68
+ }
69
+ };
70
+
71
+ if (sessionId) {
72
+ const sessionDeleteUrl = `${JULEP_API_BASE}/sessions/${sessionId}`;
73
+ cleanupPromises.push(deleteResource(sessionDeleteUrl, 'session', sessionId));
74
+ }
75
+ if (agentId) {
76
+ const agentDeleteUrl = `${JULEP_API_BASE}/agents/${agentId}`;
77
+ // Add a small delay before deleting the agent, sometimes helps if session deletion is slow
78
+ await sleep(100);
79
+ cleanupPromises.push(deleteResource(agentDeleteUrl, 'agent', agentId));
80
+ }
81
+
82
+ if (cleanupPromises.length > 0) {
83
+ log('debug', `Waiting for ${cleanupPromises.length} cleanup promises.`);
84
+ // Run cleanup in background. Deno keeps running until promises settle.
85
+ Promise.allSettled(cleanupPromises)
86
+ .then(results => {
87
+ log('info', 'Cleanup promises settled.', { results });
88
+ })
89
+ .catch(error => {
90
+ log('error', 'Unexpected error during Promise.allSettled for cleanup.', { error });
91
+ });
92
+ } else {
93
+ log('info', 'No Julep resources to clean up.');
94
+ }
95
+ }
96
+
97
+ // Helper for small delays
98
+ function sleep(ms: number): Promise<void> {
99
+ return new Promise(resolve => setTimeout(resolve, ms));
100
+ }
101
+
102
+
103
+ // Helper to format Julep ToolCall delta to OpenAI format
104
+ function toolCallDeltaToOpenAI(julepToolCalls: any[] | undefined): any[] | undefined {
105
+ if (!julepToolCalls) return undefined;
106
+ return julepToolCalls.map((toolCall, index) => ({
107
+ index: toolCall.index ?? index,
108
+ id: toolCall.id,
109
+ type: "function",
110
+ function: {
111
+ name: toolCall.function?.name,
112
+ arguments: toolCall.function?.arguments,
113
+ },
114
+ }));
115
+ }
116
+
117
+ // Helper to format Julep ToolCall message to OpenAI format
118
+ function toolCallMessageToOpenAI(julepToolCalls: any[] | undefined): any[] | undefined {
119
+ if (!julepToolCalls) return undefined;
120
+ return julepToolCalls.map(toolCall => ({
121
+ id: toolCall.id,
122
+ type: "function",
123
+ function: {
124
+ name: toolCall.function?.name,
125
+ arguments: toolCall.function?.arguments,
126
+ },
127
+ }));
128
+ }
129
+
130
+
131
+ // Helper function to simulate streaming from a complete response
132
+ async function simulateStream(
133
+ julepChatData: any,
134
+ requestedModel: string,
135
+ writer: WritableStreamDefaultWriter<Uint8Array>,
136
+ encoder: TextEncoder
137
+ ): Promise<void> {
138
+ log('info', 'Starting stream simulation.');
139
+ try {
140
+ const baseChunk = {
141
+ id: julepChatData.id || `chatcmpl-sim-${Date.now()}`,
142
+ object: "chat.completion.chunk",
143
+ created: Math.floor(new Date(julepChatData.created_at || Date.now()).getTime() / 1000),
144
+ model: requestedModel,
145
+ system_fingerprint: julepChatData.system_fingerprint || null,
146
+ };
147
+
148
+ for (const [index, choice] of julepChatData.choices.entries()) {
149
+ log('debug', `Simulating stream for choice index ${index}.`);
150
+ const role = choice.message?.role;
151
+ const content = choice.message?.content;
152
+ // Use toolCallDeltaToOpenAI for consistency in chunk format? OpenAI expects delta.tool_calls.
153
+ // Julep provides full tool_calls in the *non-streamed* response. We need to format it for a *streamed* delta.
154
+ // Let's adjust this slightly for streaming simulation.
155
+ const toolCallsInput = choice.message?.tool_calls;
156
+ const toolCallsDelta = toolCallsInput ? toolCallDeltaToOpenAI(toolCallsInput) : undefined; // Format as delta
157
+ const finishReason = choice.finish_reason;
158
+
159
+ // 1. Send role chunk
160
+ if (role) {
161
+ const roleChunk = { ...baseChunk, choices: [{ index: index, delta: { role: role }, finish_reason: null }] };
162
+ log('debug', 'Sending role chunk:', roleChunk);
163
+ await writer.write(encoder.encode(`data: ${JSON.stringify(roleChunk)}\n\n`));
164
+ await sleep(5);
165
+ }
166
+
167
+ // 2. Send tool calls chunk(s) if they exist
168
+ // OpenAI streams tool calls as an array in the delta.
169
+ if (toolCallsDelta && toolCallsDelta.length > 0) {
170
+ const toolCallDeltaChunk = { ...baseChunk, choices: [{ index: index, delta: { tool_calls: toolCallsDelta }, finish_reason: null }] };
171
+ log('debug', 'Sending tool_calls chunk:', toolCallDeltaChunk);
172
+ await writer.write(encoder.encode(`data: ${JSON.stringify(toolCallDeltaChunk)}\n\n`));
173
+ await sleep(5);
174
+ }
175
+
176
+
177
+ // 3. Stream content
178
+ if (content && typeof content === 'string') {
179
+ log('debug', `Streaming content for choice ${index} (length: ${content.length})`);
180
+ for (const char of content) {
181
+ const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: char }, finish_reason: null }] };
182
+ // Avoid logging every single character chunk unless absolutely necessary for debugging
183
+ // log('trace', 'Sending content char chunk:', contentChunk);
184
+ await writer.write(encoder.encode(`data: ${JSON.stringify(contentChunk)}\n\n`));
185
+ await sleep(2); // Simulate typing delay
186
+ }
187
+ log('debug', `Finished streaming content for choice ${index}`);
188
+ } else if (content) {
189
+ // Send non-string content as a single chunk (might be structured JSON etc.)
190
+ const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: JSON.stringify(content) }, finish_reason: null }] };
191
+ log('debug', 'Sending non-string content chunk:', contentChunk);
192
+ await writer.write(encoder.encode(`data: ${JSON.stringify(contentChunk)}\n\n`));
193
+ await sleep(5);
194
+ }
195
+
196
+ // 4. Send finish reason chunk
197
+ if (finishReason) {
198
+ const finishChunk = { ...baseChunk, choices: [{ index: index, delta: {}, finish_reason: finishReason }] };
199
+ log('debug', 'Sending finish reason chunk:', finishChunk);
200
+ await writer.write(encoder.encode(`data: ${JSON.stringify(finishChunk)}\n\n`));
201
+ await sleep(5);
202
+ }
203
+ }
204
+
205
+ // 5. Send DONE marker
206
+ log('info', 'Sending [DONE] marker.');
207
+ await writer.write(encoder.encode('data: [DONE]\n\n'));
208
+ // Simulation successful, resolve the promise
209
+ log('info', 'Stream simulation completed successfully.');
210
+
211
+ } catch (error) {
212
+ log('error', `Error during stream simulation: ${error instanceof Error ? error.message : String(error)}`, { error: error });
213
+ try { await writer.abort(error); } catch {/* ignore abort error */} // Attempt to abort writer
214
+ // Rethrow or handle error appropriately if needed upstream
215
+ throw error; // Propagate error so the background task runner knows it failed
216
+ } finally {
217
+ // Ensure the writer is closed
218
+ try { await writer.close(); } catch {/* ignore close error */}
219
+ log('debug', 'Stream writer closed.');
220
+ }
221
+ }
222
+
223
+
224
+ // --- Endpoint Handlers ---
225
+
226
+ async function handleModels(req: Request): Promise<Response> {
227
+ log('info', 'Handling /v1/models request.');
228
+ const julepApiKey = getJulepApiKey(req); // Check key even for models endpoint? Optional.
229
+ if (!julepApiKey) {
230
+ log('warn', 'Unauthorized /v1/models request (missing API key).');
231
+ // Optionally allow models request without key, or enforce it:
232
+ // return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 });
233
+ }
234
+
235
+ const now = Math.floor(Date.now() / 1000);
236
+ const openaiModels = HARDCODED_MODELS.map((modelId) => ({
237
+ id: modelId, object: "model", created: now, owned_by: "julep",
238
+ permission: [{ id: `modelperm-${modelId}-${now}`, object: "model_permission", created: now, allow_create_engine: false, allow_sampling: true, allow_logprobs: true, allow_search_indices: false, allow_view: true, allow_fine_tuning: false, organization: "*", group: null, is_blocking: false, }],
239
+ root: modelId, parent: null,
240
+ }));
241
+ log('debug', 'Returning hardcoded models list.');
242
+ return new Response(JSON.stringify({ data: openaiModels, object: "list" }), {
243
+ headers: { "Content-Type": "application/json" }, status: 200,
244
+ });
245
+ }
246
+
247
+ // Note: Removed 'ctx' argument
248
+ async function handleChatCompletions(req: Request): Promise<Response> {
249
+ log('info', 'Handling /v1/chat/completions request.');
250
+ const julepApiKey = getJulepApiKey(req);
251
+ if (!julepApiKey) {
252
+ log('error', 'Unauthorized chat completions request: Missing Julep API Key.');
253
+ return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 });
254
+ }
255
+
256
+ // Define headers early, use this single object throughout
257
+ const headers: HeadersInit = {
258
+ "Authorization": `Bearer ${julepApiKey}`,
259
+ "Content-Type": "application/json",
260
+ };
261
+ log('debug', 'Julep API request headers prepared (key omitted).', { "Content-Type": headers["Content-Type"] });
262
+
263
+ let agentId: string | null = null;
264
+ let sessionId: string | null = null;
265
+ let requestBody: any = null; // Initialize here
266
+
267
+ try {
268
+ // 1. Parse Incoming Request Body
269
+ log('debug', 'Parsing incoming request body...');
270
+ try {
271
+ requestBody = await req.json();
272
+ log('info', 'Incoming request body parsed successfully.');
273
+ log('debug', 'Parsed request body:', requestBody); // Log the full body for debugging
274
+ } catch (e) {
275
+ log('error', `Failed to parse incoming request JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e });
276
+ let requestText = "[Could not read request text]";
277
+ try {
278
+ // Need to clone the original request to read body again
279
+ requestText = await req.clone().text();
280
+ log('debug', 'Raw incoming request body text:', requestText);
281
+ } catch (readErr) {
282
+ log('error', `Could not read raw request body text: ${readErr instanceof Error ? readErr.message : String(readErr)}`);
283
+ }
284
+ return new Response(`Bad Request: Invalid JSON format. ${e instanceof Error ? e.message : String(e)}`, { status: 400 });
285
+ }
286
+
287
+ const { model, messages, stream, ...rest } = requestBody;
288
+ const clientRequestedStream = stream === true;
289
+ log('info', `Request details: model=${model}, clientRequestedStream=${clientRequestedStream}`);
290
+
291
+ // Validate essential parameters
292
+ if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
293
+ log('error', 'Invalid request body: "model" and "messages" are required.', { model, messages });
294
+ return new Response("Invalid request body. 'model' and 'messages' are required.", { status: 400 });
295
+ }
296
+ if (!HARDCODED_MODELS.includes(model)) {
297
+ log('error', `Invalid model requested: ${model}`);
298
+ return new Response(`Invalid model: ${model}. Please use one of the available models.`, { status: 400 });
299
+ }
300
+ log('debug', 'Request parameters validated.');
301
+
302
+ // --- Agent and Session Creation ---
303
+ // 2. Create Agent
304
+ const createAgentUrl = `${JULEP_API_BASE}/agents`;
305
+ const createAgentBody = {
306
+ name: `temp-openai-${model}-${Date.now()}`,
307
+ model: model,
308
+ about: `Temporary agent for OpenAI model ${model}`,
309
+ // instructions: ["Follow user instructions carefully."], // Optional: Keep or remove? Julep needs it?
310
+ };
311
+ log('info', 'Attempting to create Julep Agent.', { url: createAgentUrl, body: createAgentBody });
312
+ const createAgentResponse = await fetch(createAgentUrl, { method: "POST", headers, body: JSON.stringify(createAgentBody) });
313
+ log('debug', `Create Agent response status: ${createAgentResponse.status}`);
314
+
315
+ if (!createAgentResponse.ok) {
316
+ const errorStatus = createAgentResponse.status;
317
+ const errorStatusText = createAgentResponse.statusText;
318
+ let errorText = "[Could not read error body]";
319
+ try { errorText = await createAgentResponse.text(); } catch (e) { log('warn', `Could not read error text from createAgentResponse: ${e instanceof Error ? e.message : String(e)}`); }
320
+ log('error', `Error creating Julep Agent: ${errorStatus} - ${errorText}`);
321
+ // No resources to cleanup yet
322
+ return new Response(`Error creating Julep Agent: ${errorStatusText} - ${errorText}`, { status: errorStatus });
323
+ }
324
+
325
+ let agentData: any;
326
+ try {
327
+ const agentResponseText = await createAgentResponse.clone().text(); // Clone before .json()
328
+ log('debug', 'Create Agent raw response text:', agentResponseText);
329
+ agentData = JSON.parse(agentResponseText); // Parse the cloned text
330
+ // Or await createAgentResponse.json(); if not cloning for logging
331
+ log('info', 'Julep Agent created successfully.', { agentData });
332
+ agentId = agentData.id;
333
+ } catch (e) {
334
+ log('error', `Failed to parse Julep Agent creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e });
335
+ // Attempt cleanup (fire-and-forget)
336
+ cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after agent parse error', err));
337
+ return new Response(`Internal Server Error: Failed to parse Julep Agent response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 });
338
+ }
339
+
340
+
341
+ // 3. Create Session
342
+ const createSessionUrl = `${JULEP_API_BASE}/sessions`;
343
+ const createSessionBody = { agent: agentId }; // Julep API uses agent
344
+ log('info', 'Attempting to create Julep Session.', { url: createSessionUrl, body: createSessionBody });
345
+ const createSessionResponse = await fetch(createSessionUrl, { method: "POST", headers, body: JSON.stringify(createSessionBody) });
346
+ log('debug', `Create Session response status: ${createSessionResponse.status}`);
347
+
348
+ if (!createSessionResponse.ok) {
349
+ const errorStatus = createSessionResponse.status;
350
+ const errorStatusText = createSessionResponse.statusText;
351
+ let errorText = "[Could not read error body]";
352
+ try { errorText = await createSessionResponse.text(); } catch (e) { log('warn', `Could not read error text from createSessionResponse: ${e instanceof Error ? e.message : String(e)}`); }
353
+ log('error', `Error creating Julep Session: ${errorStatus} - ${errorText}`);
354
+ // Cleanup the agent we just created (fire-and-forget)
355
+ cleanupJulepResources(agentId, null, headers).catch(err => log('error', 'Background cleanup failed after session creation error', err));
356
+ return new Response(`Error creating Julep Session: ${errorStatusText} - ${errorText}`, { status: errorStatus });
357
+ }
358
+
359
+ let sessionData: any;
360
+ try {
361
+ const sessionResponseText = await createSessionResponse.clone().text();
362
+ log('debug', 'Create Session raw response text:', sessionResponseText);
363
+ sessionData = JSON.parse(sessionResponseText);
364
+ log('info', 'Julep Session created successfully.', { sessionData });
365
+ sessionId = sessionData.id;
366
+ } catch (e) {
367
+ log('error', `Failed to parse Julep Session creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e });
368
+ // Cleanup agent and session (fire-and-forget)
369
+ cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after session parse error', err));
370
+ return new Response(`Internal Server Error: Failed to parse Julep Session response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 });
371
+ }
372
+
373
+ // --- Perform Chat Completion (ALWAYS non-streaming to Julep) ---
374
+ // 4. Send Chat Request to Julep
375
+ const chatUrl = `${JULEP_API_BASE}/sessions/${sessionId}/chat`;
376
+ const chatBodyToJulep = {
377
+ messages: messages.map((msg: any) => ({
378
+ role: msg.role,
379
+ content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
380
+ // Include tool_calls if present in the input message (OpenAI format)
381
+ tool_calls: msg.tool_calls, // Assuming Julep accepts OpenAI tool call format here
382
+ tool_call_id: msg.tool_call_id // If it's a tool response message
383
+ })),
384
+ stream: false, // Force non-streaming
385
+ // agent: agentId,
386
+ ...rest, // Pass through other OpenAI parameters like temperature, top_p, etc.
387
+ };
388
+ log('info', 'Sending Chat request to Julep (forced non-stream).', { url: chatUrl });
389
+ log('debug', 'Julep Chat Request Body:', chatBodyToJulep);
390
+ const chatResponse = await fetch(chatUrl, { method: "POST", headers, body: JSON.stringify(chatBodyToJulep) });
391
+ log('debug', `Julep Chat response status: ${chatResponse.status}`);
392
+
393
+
394
+ // --- Handle Julep Response ---
395
+ if (!chatResponse.ok) {
396
+ const errorStatus = chatResponse.status;
397
+ const errorStatusText = chatResponse.statusText;
398
+ let errorText = "[Could not read error body]";
399
+ try { errorText = await chatResponse.text(); } catch (e) { log('warn', `Could not read error text from chatResponse: ${e instanceof Error ? e.message : String(e)}`); }
400
+ log('error', `Error during Julep Chat Completion: ${errorStatus} - ${errorText}`);
401
+ // Cleanup agent and session (fire-and-forget)
402
+ cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after chat error', err));
403
+ return new Response(`Error during Julep Chat Completion: ${errorStatusText} - ${errorText}`, { status: errorStatus });
404
+ }
405
+
406
+ // Julep request was successful, get the full JSON body
407
+ let julepChatData: any;
408
+ try {
409
+ const chatResponseText = await chatResponse.clone().text();
410
+ log('debug', 'Julep Chat raw response text:', chatResponseText);
411
+ julepChatData = JSON.parse(chatResponseText);
412
+ log('info', 'Julep chat completion successful.', { responseId: julepChatData.id })
413
+ log('debug', 'Julep Chat response data:', julepChatData);
414
+ } catch (e) {
415
+ log('error', `Failed to parse Julep Chat response JSON (status was OK): ${e instanceof Error ? e.message : String(e)}`, { error: e });
416
+ // Cleanup agent and session (fire-and-forget)
417
+ cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after chat parse error', err));
418
+ return new Response(`Internal Server Error: Failed to parse Julep Chat response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 });
419
+ }
420
+
421
+ // *** Trigger cleanup NOW (fire-and-forget), before returning the response/stream ***
422
+ log('info', 'Julep chat successful, queueing cleanup.');
423
+ cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after successful chat', err));
424
+
425
+
426
+ // --- Format and Return Response to Client ---
427
+ // Access the actual chat response data, usually nested under 'response' in Julep API
428
+ const julepResponseData = julepChatData;
429
+ if (!julepResponseData || !julepResponseData.choices) {
430
+ log('error', 'Julep response format unexpected. Missing "response" or "response.choices".', { julepChatData });
431
+ return new Response('Internal Server Error: Unexpected format from Julep API.', { status: 500 });
432
+ }
433
+
434
+
435
+ if (clientRequestedStream) {
436
+ log('info', 'Client requested stream, starting simulation.');
437
+ const { readable, writable } = new TransformStream();
438
+ const writer = writable.getWriter();
439
+ const encoder = new TextEncoder();
440
+
441
+ // Start simulation in background (fire-and-forget)
442
+ // Pass julepResponseData which contains choices, usage etc.
443
+ simulateStream(julepResponseData, model, writer, encoder)
444
+ .catch(streamErr => {
445
+ log('error', 'Stream simulation background task failed.', { error: streamErr });
446
+ // We might not be able to signal this to the client easily if headers are already sent.
447
+ });
448
+
449
+ log('debug', 'Returning readable stream to client.');
450
+ return new Response(readable, {
451
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
452
+ status: 200,
453
+ });
454
+
455
+ } else {
456
+ log('info', 'Client requested non-streaming response.');
457
+ // Format julepResponseData to OpenAI format
458
+ const openaiCompletion = {
459
+ id: julepResponseData.id || `chatcmpl-${Date.now()}`,
460
+ object: "chat.completion",
461
+ created: Math.floor(new Date(julepResponseData.created_at || Date.now()).getTime() / 1000),
462
+ model: model, // Use the originally requested model
463
+ choices: julepResponseData.choices.map((choice: any) => ({
464
+ index: choice.index,
465
+ message: {
466
+ role: choice.message.role,
467
+ content: choice.message.content,
468
+ // Use toolCallMessageToOpenAI here for the completed message format
469
+ tool_calls: choice.message.tool_calls ? toolCallMessageToOpenAI(choice.message.tool_calls) : undefined
470
+ },
471
+ finish_reason: choice.finish_reason
472
+ })),
473
+ usage: julepResponseData.usage ? {
474
+ prompt_tokens: julepResponseData.usage.prompt_tokens,
475
+ completion_tokens: julepResponseData.usage.completion_tokens,
476
+ total_tokens: julepResponseData.usage.total_tokens
477
+ } : undefined,
478
+ system_fingerprint: julepResponseData.system_fingerprint || null,
479
+ };
480
+ log('debug', 'Formatted non-streaming OpenAI response:', openaiCompletion);
481
+ log('info', 'Returning non-streaming JSON response to client.');
482
+ return new Response(JSON.stringify(openaiCompletion), {
483
+ headers: { "Content-Type": "application/json" }, status: 200,
484
+ });
485
+ }
486
+
487
+ } catch (error) {
488
+ // Catch errors from initial parsing, validation, or unexpected issues within the try block
489
+ log('error', `Error in handleChatCompletions (outer catch): ${error instanceof Error ? error.message : String(error)}`, { error: error, agentId, sessionId });
490
+ // Use the headers defined at the start if available, otherwise create minimal ones
491
+ const headersForCatchCleanup = headers || { "Authorization": `Bearer ${julepApiKey}`, "Content-Type": "application/json" }; // Use existing headers if possible
492
+ // Attempt cleanup (fire-and-forget)
493
+ cleanupJulepResources(agentId, sessionId, headersForCatchCleanup).catch(err => log('error', 'Background cleanup failed in outer catch block', err));
494
+ return new Response(`Internal Server Error: ${error instanceof Error ? error.message : String(error)}`, { status: 500 });
495
+ }
496
+ }
497
+
498
+ // --- CORS Handlers ---
499
+ const corsHeaders = {
500
+ 'Access-Control-Allow-Origin': '*', // Adjust in production!
501
+ 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
502
+ 'Access-Control-Allow-Headers': 'Authorization, Content-Type',
503
+ };
504
+
505
+ function handleOptions(request: Request): Response {
506
+ log('debug', 'Handling OPTIONS preflight request.');
507
+ // Check if it's a CORS preflight request
508
+ if (request.headers.get('Origin') !== null &&
509
+ request.headers.get('Access-Control-Request-Method') !== null &&
510
+ request.headers.get('Access-Control-Request-Headers') !== null) {
511
+ log('debug', 'Returning CORS preflight headers.');
512
+ // Return CORS headers for preflight
513
+ return new Response(null, { headers: corsHeaders, status: 204 }); // Use 204 No Content for OPTIONS
514
+ } else {
515
+ // Handle standard OPTIONS request (non-CORS preflight)
516
+ log('debug', 'Returning standard OPTIONS Allow header.');
517
+ return new Response(null, { headers: { 'Allow': 'GET, POST, OPTIONS' } });
518
+ }
519
+ }
520
+
521
+ function addCorsHeaders(response: Response): Response {
522
+ // Create a new Headers object based on the response's headers
523
+ const newHeaders = new Headers(response.headers);
524
+ // Add CORS headers
525
+ Object.entries(corsHeaders).forEach(([key, value]) => {
526
+ newHeaders.set(key, value);
527
+ });
528
+
529
+ // Return a new Response with the modified headers
530
+ // Note: Creating a new Response is necessary as Response headers are immutable
531
+ return new Response(response.body, {
532
+ status: response.status,
533
+ statusText: response.statusText,
534
+ headers: newHeaders
535
+ });
536
+ }
537
+
538
+
539
+ // --- Main Deno Server Entry Point ---
540
+
541
+ log('info', 'Starting Deno server...');
542
+
543
+ serve(async (request: Request) => {
544
+ const url = new URL(request.url);
545
+ log('info', `Incoming request: ${request.method} ${url.pathname}${url.search}`);
546
+
547
+ // Handle CORS preflight requests first
548
+ if (request.method === 'OPTIONS') {
549
+ return handleOptions(request);
550
+ }
551
+
552
+ let response: Response;
553
+ try {
554
+ if (url.pathname === "/v1/models" && request.method === "GET") {
555
+ response = await handleModels(request);
556
+ } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
557
+ response = await handleChatCompletions(request);
558
+ } else {
559
+ log('warn', `Path not found: ${url.pathname}`);
560
+ response = new Response("Not Found", { status: 404 });
561
+ }
562
+ } catch (e) {
563
+ log('error', `Unhandled error in serve handler: ${e instanceof Error ? e.message : String(e)}`, { error: e, url: request.url, method: request.method });
564
+ response = new Response(`Internal Server Error: ${e instanceof Error ? e.message : String(e)}`, { status: 500 });
565
+ }
566
+
567
+ // Add CORS headers to all actual responses (OPTIONS handled separately)
568
+ return addCorsHeaders(response);
569
+ });