Spaces:

Calmlo
/

fal-openai-proxy

Paused

App Files Files Community

Calmlo commited on Apr 12, 2025

Commit

5838dda

verified ·

1 Parent(s): e706cfb

Update server.js

Browse files

Files changed (1) hide show

server.js +171 -153

server.js CHANGED Viewed

@@ -118,7 +118,7 @@ app.get('/v1/models', (req, res) => {
 function convertMessagesToFalPrompt(messages) {
     let fixed_system_prompt_content = "";
     const conversation_message_blocks = [];
-    console.log(`Original messages count: ${messages.length}`);
     // 1. 分离 System 消息，格式化 User/Assistant 消息
     for (const message of messages) {
@@ -145,7 +145,6 @@ function convertMessagesToFalPrompt(messages) {
         fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
         console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
     }
-    // 清理末尾可能多余的空白，以便后续判断和拼接
     fixed_system_prompt_content = fixed_system_prompt_content.trim();
@@ -155,7 +154,7 @@ function convertMessagesToFalPrompt(messages) {
          space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
     }
      const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
-    console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
     // 4. 反向填充 User/Assistant 对话历史
@@ -166,13 +165,13 @@ function convertMessagesToFalPrompt(messages) {
     let promptFull = false;
     let systemHistoryFull = (remaining_system_limit <= 0);
-    console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
     for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
         const message_block = conversation_message_blocks[i];
         const block_length = message_block.length;
         if (promptFull && systemHistoryFull) {
-            console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
             break;
         }
@@ -184,7 +183,7 @@ function convertMessagesToFalPrompt(messages) {
                 continue;
             } else {
                 promptFull = true;
-                console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
             }
         }
@@ -196,7 +195,7 @@ function convertMessagesToFalPrompt(messages) {
                  continue;
             } else {
                  systemHistoryFull = true;
-                 console.log(`System history limit (${remaining_system_limit}) reached.`);
             }
         }
     }
@@ -204,46 +203,36 @@ function convertMessagesToFalPrompt(messages) {
     // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
     const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
     const final_prompt = prompt_history_blocks.join('').trim();
-    // 定义分隔符
     const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
     let final_system_prompt = "";
-    // 检查各部分是否有内容 (使用 trim 后的固定部分)
     const hasFixedSystem = fixed_system_prompt_content.length > 0;
     const hasSystemHistory = system_prompt_history_content.length > 0;
     if (hasFixedSystem && hasSystemHistory) {
-        // 两部分都有，用分隔符连接
         final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
-        console.log("Combining fixed system prompt and history with separator.");
     } else if (hasFixedSystem) {
-        // 只有固定部分
         final_system_prompt = fixed_system_prompt_content;
-        console.log("Using only fixed system prompt.");
     } else if (hasSystemHistory) {
-        // 只有历史部分 (固定部分为空)
         final_system_prompt = system_prompt_history_content;
-        console.log("Using only history in system prompt slot.");
     }
-    // 如果两部分都为空，final_system_prompt 保持空字符串 ""
-    // 6. 返回结果
     const result = {
-        system_prompt: final_system_prompt, // 最终结果不需要再 trim
-        prompt: final_prompt              // final_prompt 在组合前已 trim
     };
-    console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
-    console.log(`Final prompt length (Hist): ${result.prompt.length}`);
     return result;
 }
 // === convertMessagesToFalPrompt 函数结束 ===
-// POST /v1/chat/completions endpoint (带 Key 重试逻辑)
 app.post('/v1/chat/completions', async (req, res) => {
     const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
     const requestId = `req-${Date.now()}`; // Unique ID for this incoming request
@@ -261,6 +250,20 @@ app.post('/v1/chat/completions', async (req, res) => {
     let lastError = null; // Store the last error encountered during key rotation
     let success = false; // Flag to indicate if any key succeeded
     // *** 重试循环：尝试最多 falKeys.length 次 ***
     for (let attempt = 0; attempt < falKeys.length; attempt++) {
         const keyIndexToTry = (currentFalKeyIndex + attempt) % falKeys.length;
@@ -273,31 +276,6 @@ app.post('/v1/chat/completions', async (req, res) => {
                 credentials: selectedFalKey,
             });
-            // 准备 Fal Input (只需要准备一次)
-            // 注意：如果 convertMessagesToFalPrompt 很耗时，可以移到循环外
-            const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
-            const falInput = {
-                model: model,
-                prompt: prompt,
-                ...(system_prompt && { system_prompt: system_prompt }),
-                reasoning: !!reasoning,
-            };
-            // 打印一次 Fal Input 和 Prompt 信息
-            if (attempt === 0) {
-                console.log(`[${requestId}] Fal Input:`, JSON.stringify(falInput, null, 2));
-                console.log(`[${requestId}] Forwarding request to fal-ai with system-priority + separator + recency input:`);
-                console.log(`[${requestId}] System Prompt Length:`, system_prompt?.length || 0);
-                console.log(`[${requestId}] Prompt Length:`, prompt?.length || 0);
-                // 为了简洁，默认注释掉详细内容的打印
-                // console.log(`[${requestId}] --- System Prompt Start ---`);
-                // console.log(system_prompt);
-                // console.log(`[${requestId}] --- System Prompt End ---`);
-                // console.log(`[${requestId}] --- Prompt Start ---`);
-                // console.log(prompt);
-                // console.log(`[${requestId}] --- Prompt End ---`);
-            }
             // --- 执行 Fal AI 调用 ---
             if (stream) {
                 // --- 流式处理 ---
@@ -305,138 +283,183 @@ app.post('/v1/chat/completions', async (req, res) => {
                 res.setHeader('Cache-Control', 'no-cache');
                 res.setHeader('Connection', 'keep-alive');
                 res.setHeader('Access-Control-Allow-Origin', '*');
-                // 注意：Headers 只能发送一次
                 let previousOutput = '';
-                const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
-                // 标记成功，设置下一次请求的起始 Key 索引
-                success = true;
-                currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length;
-                console.log(`[${requestId}] Key at index ${keyIndexToTry} successful. Next request starts at index ${currentFalKeyIndex}.`);
                 // 处理流
                 try {
-                     if (!res.headersSent) { // 确保 header 只发送一次
-                         res.flushHeaders();
-                         console.log(`[${requestId}] Stream headers flushed.`);
-                     }
                     for await (const event of falStream) {
-                        // ... (流处理逻辑基本不变，添加 requestId 用于日志) ...
                         const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
                         const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
                         const errorInfo = (event && event.error) ? event.error : null;
                         if (errorInfo) {
-                            console.error(`[${requestId}] Error received in fal stream event:`, errorInfo);
-                            const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
-                            if (!res.writableEnded) {
-                                res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
                             }
-                            break; // Stop processing on error in stream event
                         }
-                        let deltaContent = '';
-                        if (currentOutput.startsWith(previousOutput)) {
-                            deltaContent = currentOutput.substring(previousOutput.length);
-                        } else if (currentOutput.length > 0) {
-                            console.warn(`[${requestId}] Fal stream output mismatch detected. Sending full current output as delta.`, { previousLength: previousOutput.length, currentLength: currentOutput.length });
-                            deltaContent = currentOutput;
-                            previousOutput = ''; // Reset previous if mismatch
                         }
-                        previousOutput = currentOutput;
-                        // Send chunk if there's content or if it's the final chunk (isPartial is false)
-                        if (deltaContent || !isPartial) {
-                            const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
-                             if (!res.writableEnded) {
-                                res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
-                             }
                         }
-                     }
-                     // After the loop, ensure the [DONE] signal is sent if the stream finished normally
-                     if (!res.writableEnded) {
-                        res.write(`data: [DONE]\n\n`);
-                        res.end();
-                        console.log(`[${requestId}] Stream finished and [DONE] sent.`);
-                     }
-                } catch (streamError) {
-                    console.error(`[${requestId}] Error during fal stream processing loop:`, streamError);
-                    lastError = streamError; // Store error from stream processing
-                    try {
-                        // Don't mark success=false here, the key worked but the stream itself failed.
-                        // The outer loop should break because the response has likely been ended.
-                        if (!res.writableEnded) { // Check if we can still write to the response
-                            const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
-                            res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error", type: "proxy_error", details: errorDetails } })}\n\n`);
-                            res.write(`data: [DONE]\n\n`); // Send DONE even after error
                             res.end();
-                        } else {
-                             console.error(`[${requestId}] Stream already ended, cannot send error message.`);
                         }
-                    } catch (finalError) {
-                         console.error(`[${requestId}] Error sending stream error message to client:`, finalError);
-                        if (!res.writableEnded) { res.end(); }
                     }
-                     // Break the outer key retry loop as the stream failed mid-way
-                     break;
                 }
-                // 如果流成功处理完，直接跳出重试循环
-                break; // Exit the key retry loop
             } else {
-                // --- 非流式处理 ---
                 console.log(`[${requestId}] Executing non-stream request with key index ${keyIndexToTry}...`);
                 const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
-                // 检查 Fal AI 返回的业务错误 (例如输入无效)，这种错误不应该因为换 Key 而解决
                 if (result && result.error) {
                      console.error(`[${requestId}] Fal-ai returned a business error with key index ${keyIndexToTry}:`, result.error);
-                     // 将此视为最终错误，不重试其他 key
                      lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
-                     lastError.status = 500; // Or map from Fal error if possible, default 500
                      lastError.type = "fal_ai_error";
-                     break; // Exit retry loop, no point trying other keys for bad input
                 }
                 console.log(`[${requestId}] Received non-stream result from fal-ai with key index ${keyIndexToTry}`);
-                // console.log("Full non-stream result:", JSON.stringify(result, null, 2)); // Uncomment for detailed logs
-                // 标记成功，设置下一次请求的起始 Key 索引
-                success = true;
-                currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length;
-                console.log(`[${requestId}] Key at index ${keyIndexToTry} successful. Next request starts at index ${currentFalKeyIndex}.`);
                 const openAIResponse = {
                     id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
                     choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
-                    usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null }, // Fal doesn't provide token usage
-                    system_fingerprint: null, // Fal doesn't provide system fingerprint
-                    ...(result.reasoning && { fal_reasoning: result.reasoning }), // Include reasoning if present
                 };
                 res.json(openAIResponse);
-                console.log(`[${requestId}] Returned non-stream response.`);
                 break; // 成功，跳出重试循环
             }
         } catch (error) {
-            lastError = error; // Store the error from this attempt
-            const status = error?.status; // Fal client errors should have status
-            const errorMessage = error?.body?.detail || error?.message || 'Unknown error'; // Get detailed message if possible
-            console.warn(`[${requestId}] Attempt ${attempt + 1} with key index ${keyIndexToTry} failed. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
-            // 检查是否是与 Key 相关的错误 (401 Unauthorized, 403 Forbidden, 429 Rate Limit)
             if (status === 401 || status === 403 || status === 429) {
-                console.log(`[${requestId}] Key-related error (${status}). Trying next key...`);
-                // 继续循环尝试下一个 Key
             } else {
-                 // 如果是其他类型的错误 (如网络问题、Fal内部服务器错误5xx、请求参数错误400等)
-                 // 通常重试其他 Key 没有意义，直接中断重试
-                 console.error(`[${requestId}] Unrecoverable error encountered. Status: ${status || 'N/A'}. Stopping key rotation for this request.`);
-                 console.error("Error details:", error); // Log the full error object for debugging
-                 break; // 跳出重试循环
             }
         }
     } // --- 结束重试循环 ---
@@ -445,36 +468,31 @@ app.post('/v1/chat/completions', async (req, res) => {
     if (!success) {
         console.error(`[${requestId}] All Fal Key attempts failed or an unrecoverable error occurred.`);
         if (!res.headersSent) {
-            const statusCode = lastError?.status || 503; // Use status from last error if available, default to 503 Service Unavailable
             const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
-            // Try to extract a more specific message if available
-            const detailMessage = lastError?.body?.detail || errorMessage;
             const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
              console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
-             // 返回一个标准的 OpenAI 错误格式
              res.status(statusCode).json({
                  object: "error",
                  message: `All Fal Key attempts failed or an unrecoverable error occurred. Last error: ${detailMessage}`,
                  type: errorType,
                  param: null,
-                 code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : null)
              });
         } else if (!res.writableEnded) {
-            console.error(`[${requestId}] Headers already sent, but request failed after stream started or during processing. Ending response with error chunk.`);
-            // 尝试在流式响应中发送错误（如果可能）
-            try {
-                const errorDetails = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
-                const detailMessage = lastError?.body?.detail || errorDetails;
-                const errorChunk = { id: `chatcmpl-${Date.now()}-final-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Proxy Error: All key attempts failed or stream processing error. Last error: ${detailMessage}` } }] };
-                res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
-                res.write(`data: [DONE]\n\n`);
-                res.end();
-            } catch (e) {
-                console.error(`[${requestId}] Failed to write final error to stream:`, e);
-                if (!res.writableEnded) res.end(); // Force end if possible
-            }
         } else {
             console.error(`[${requestId}] Request failed, but response stream was already fully ended. Cannot send error.`);
         }
@@ -485,7 +503,7 @@ app.post('/v1/chat/completions', async (req, res) => {
 // 启动服务器 (更新启动信息)
 app.listen(PORT, () => {
     console.log(`===================================================`);
-    console.log(` Fal OpenAI Proxy Server (Key Rotation with Retry + System Top + Separator + Recency)`); // 更新描述
     console.log(` Listening on port: ${PORT}`);
     console.log(` Loaded ${falKeys.length} Fal AI Keys for rotation.`);
     console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
@@ -497,5 +515,5 @@ app.listen(PORT, () => {
 // 根路径响应 (更新信息)
 app.get('/', (req, res) => {
-    res.send('Fal OpenAI Proxy (Key Rotation with Retry + System Top + Separator + Recency Strategy) is running.'); // 更新描述
 });

 function convertMessagesToFalPrompt(messages) {
     let fixed_system_prompt_content = "";
     const conversation_message_blocks = [];
+    // console.log(`Original messages count: ${messages.length}`); // Reduced logging verbosity
     // 1. 分离 System 消息，格式化 User/Assistant 消息
     for (const message of messages) {
         fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
         console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
     }
     fixed_system_prompt_content = fixed_system_prompt_content.trim();
          space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
     }
      const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
+    // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
     // 4. 反向填充 User/Assistant 对话历史
     let promptFull = false;
     let systemHistoryFull = (remaining_system_limit <= 0);
+    // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
     for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
         const message_block = conversation_message_blocks[i];
         const block_length = message_block.length;
         if (promptFull && systemHistoryFull) {
+            // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
             break;
         }
                 continue;
             } else {
                 promptFull = true;
+                // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
             }
         }
                  continue;
             } else {
                  systemHistoryFull = true;
+                 // console.log(`System history limit (${remaining_system_limit}) reached.`);
             }
         }
     }
     // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
     const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
     const final_prompt = prompt_history_blocks.join('').trim();
     const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
     let final_system_prompt = "";
     const hasFixedSystem = fixed_system_prompt_content.length > 0;
     const hasSystemHistory = system_prompt_history_content.length > 0;
     if (hasFixedSystem && hasSystemHistory) {
         final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
+        // console.log("Combining fixed system prompt and history with separator.");
     } else if (hasFixedSystem) {
         final_system_prompt = fixed_system_prompt_content;
+        // console.log("Using only fixed system prompt.");
     } else if (hasSystemHistory) {
         final_system_prompt = system_prompt_history_content;
+        // console.log("Using only history in system prompt slot.");
     }
     const result = {
+        system_prompt: final_system_prompt,
+        prompt: final_prompt
     };
+    // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
+    // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
     return result;
 }
 // === convertMessagesToFalPrompt 函数结束 ===
+// POST /v1/chat/completions endpoint (带 Key 重试逻辑 - Stream 修正版)
 app.post('/v1/chat/completions', async (req, res) => {
     const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
     const requestId = `req-${Date.now()}`; // Unique ID for this incoming request
     let lastError = null; // Store the last error encountered during key rotation
     let success = false; // Flag to indicate if any key succeeded
+    // 准备 Fal Input (只需要准备一次)
+    const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
+    const falInput = {
+        model: model,
+        prompt: prompt,
+        ...(system_prompt && { system_prompt: system_prompt }),
+        reasoning: !!reasoning,
+    };
+    // 打印一次 Fal Input 和 Prompt 信息
+    console.log(`[${requestId}] Fal Input (prepared once):`, JSON.stringify(falInput, null, 2));
+    console.log(`[${requestId}] System Prompt Length:`, system_prompt?.length || 0);
+    console.log(`[${requestId}] Prompt Length:`, prompt?.length || 0);
     // *** 重试循环：尝试最多 falKeys.length 次 ***
     for (let attempt = 0; attempt < falKeys.length; attempt++) {
         const keyIndexToTry = (currentFalKeyIndex + attempt) % falKeys.length;
                 credentials: selectedFalKey,
             });
             // --- 执行 Fal AI 调用 ---
             if (stream) {
                 // --- 流式处理 ---
                 res.setHeader('Cache-Control', 'no-cache');
                 res.setHeader('Connection', 'keep-alive');
                 res.setHeader('Access-Control-Allow-Origin', '*');
+                // !! 不要在这里 flushHeaders !!
                 let previousOutput = '';
+                let firstEventProcessed = false;
+                let streamFailedMidway = false; // Flag for errors after successful start
+                let keyConfirmedWorking = false; // Flag if key actually produced data
+                const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
                 // 处理流
                 try {
                     for await (const event of falStream) {
                         const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
                         const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
                         const errorInfo = (event && event.error) ? event.error : null;
+                        const eventStatus = errorInfo?.status; // Check status within error object if present
+                        // --- 检查事件错误 ---
                         if (errorInfo) {
+                            console.warn(`[${requestId}] Error received in fal stream event (Key Index ${keyIndexToTry}):`, errorInfo);
+                            lastError = errorInfo; // Store the error
+                            // 如果是第一次事件且是 Key 相关错误 (401/403/429)，则中断此 key 的尝试
+                            if (!firstEventProcessed && (eventStatus === 401 || eventStatus === 403 || eventStatus === 429)) {
+                                console.warn(`[${requestId}] Key-related error (${eventStatus}) on first stream event for key index ${keyIndexToTry}. Aborting this attempt.`);
+                                // 不需要发送响应，直接跳出内部循环，让外部循环尝试下一个 key
+                                break; // Exit the inner `for await...of` loop
+                            } else {
+                                // 如果是其他错误，或者非第一次事件的错误，则认为是流处理失败
+                                console.error(`[${requestId}] Unrecoverable stream error or error after stream start.`);
+                                streamFailedMidway = true; // Mark stream as failed after start
+                                if (!res.headersSent) {
+                                     // 如果还没发header，说明key可能一开始就返回错误，直接发送500
+                                     res.status(500).json({ object: "error", message: `Fal Stream Error: ${JSON.stringify(errorInfo)}`, type:"fal_stream_error"});
+                                     console.error(`[${requestId}] Headers not sent, responding with 500 JSON error.`);
+                                } else if (!res.writableEnded) {
+                                     // 如果已发header，发送错误chunk
+                                    const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
+                                    res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
+                                    console.error(`[${requestId}] Headers sent, sending error chunk.`);
+                                }
+                                break; // Exit the inner `for await...of` loop
                             }
                         }
+                        // --- 成功接收到第一个非错误事件 ---
+                        if (!keyConfirmedWorking && !errorInfo) {
+                             success = true; // Mark overall success *for this request*
+                             keyConfirmedWorking = true; // Mark this specific key as working
+                             currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length; // Update global index for next request
+                             console.log(`[${requestId}] Key at index ${keyIndexToTry} confirmed working. Next request starts at index ${currentFalKeyIndex}.`);
+                             if (!res.headersSent) {
+                                 res.flushHeaders();
+                                 console.log(`[${requestId}] Stream headers flushed.`);
+                             }
+                             firstEventProcessed = true;
                         }
+                        // --- 处理有效数据 ---
+                        if (!errorInfo) {
+                            let deltaContent = '';
+                            if (currentOutput.startsWith(previousOutput)) {
+                                deltaContent = currentOutput.substring(previousOutput.length);
+                            } else if (currentOutput.length > 0) {
+                                console.warn(`[${requestId}] Fal stream output mismatch detected. Sending full current output as delta.`, { previousLength: previousOutput.length, currentLength: currentOutput.length });
+                                deltaContent = currentOutput;
+                                previousOutput = ''; // Reset previous if mismatch
+                            }
+                            previousOutput = currentOutput;
+                            if (deltaContent || !isPartial) {
+                                const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
+                                 if (!res.writableEnded) {
+                                    res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
+                                 }
+                            }
                         }
+                    } // End `for await...of` loop
+                    // --- 循环后处理 ---
+                    if (streamFailedMidway) {
+                         // 如果是因为流中途错误跳出的，确保响应结束
+                         if (!res.writableEnded) {
+                              res.write(`data: [DONE]\n\n`); // Send DONE even after error as per OpenAI spec
+                              res.end();
+                              console.log(`[${requestId}] Stream ended with [DONE] after mid-stream error.`);
+                         }
+                         break; // Exit the outer key retry loop because the stream failed *after* starting
+                    } else if (keyConfirmedWorking) {
+                         // 如果 Key 正常工作且循环正常结束 (没有 break)
+                         if (!res.writableEnded) {
+                            res.write(`data: [DONE]\n\n`);
                             res.end();
+                            console.log(`[${requestId}] Stream finished normally and [DONE] sent.`);
+                         }
+                         break; // Exit the outer key retry loop because we succeeded
+                    }
+                    // If loop finished without confirming the key worked and without mid-stream error (e.g., first event was key error)
+                    // let the outer loop continue to the next key.
+                } catch (streamProcessingError) {
+                    // This catches errors in the stream processing *logic* itself, less likely
+                    console.error(`[${requestId}] Error during fal stream processing loop logic:`, streamProcessingError);
+                    lastError = streamProcessingError;
+                    if (!res.headersSent) {
+                        res.status(500).json({ object: "error", message: `Proxy Stream Processing Error: ${streamProcessingError.message}`, type:"proxy_internal_error"});
+                        console.error(`[${requestId}] Headers not sent, responding with 500 JSON error for stream logic failure.`);
+                    } else if (!res.writableEnded) {
+                        try {
+                             res.write(`data: ${JSON.stringify({ error: { message: "Proxy Stream processing error", type: "proxy_internal_error", details: streamProcessingError.message } })}\n\n`);
+                             res.write(`data: [DONE]\n\n`);
+                             res.end();
+                             console.error(`[${requestId}] Headers sent, sending error chunk for stream logic failure.`);
+                        } catch (finalError) {
+                             console.error(`[${requestId}] Error sending final error message to client:`, finalError);
+                             if (!res.writableEnded) { res.end(); }
                         }
                     }
+                    break; // Exit the outer key retry loop
                 }
+                // If we reached here and `success` is true, it means the stream finished successfully.
+                if (success) {
+                    break; // Exit the outer key retry loop
+                }
+                // Otherwise, the stream ended because the first event was a key error, continue the outer loop.
             } else {
+                // --- 非流式处理 (基本不变) ---
                 console.log(`[${requestId}] Executing non-stream request with key index ${keyIndexToTry}...`);
                 const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
                 if (result && result.error) {
                      console.error(`[${requestId}] Fal-ai returned a business error with key index ${keyIndexToTry}:`, result.error);
                      lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
+                     lastError.status = result.status || 500; // Use status from error if available
                      lastError.type = "fal_ai_error";
+                     // Business errors (e.g., bad input) usually shouldn't be retried with other keys
+                     break; // Exit retry loop
                 }
                 console.log(`[${requestId}] Received non-stream result from fal-ai with key index ${keyIndexToTry}`);
+                success = true; // Mark overall success
+                currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length; // Update global index
+                console.log(`[${requestId}] Key at index ${keyIndexToTry} successful (non-stream). Next request starts at index ${currentFalKeyIndex}.`);
                 const openAIResponse = {
                     id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
                     choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
+                    usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
+                    system_fingerprint: null,
+                    ...(result.reasoning && { fal_reasoning: result.reasoning }),
                 };
                 res.json(openAIResponse);
                 break; // 成功，跳出重试循环
             }
         } catch (error) {
+            // This outer catch handles errors from fal.config, fal.stream setup (before first event), fal.subscribe setup
+            lastError = error;
+            const status = error?.status;
+            const errorMessage = error?.body?.detail || error?.message || 'Unknown setup error';
+            console.warn(`[${requestId}] Attempt ${attempt + 1} with key index ${keyIndexToTry} failed during setup. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
+            console.error("Setup Error details:", error); // Log full error
+            // Check for key-related errors during setup
             if (status === 401 || status === 403 || status === 429) {
+                console.log(`[${requestId}] Key-related setup error (${status}). Trying next key...`);
+                // Continue the outer loop
             } else {
+                 // Unrecoverable setup error (e.g., network, internal fal error)
+                 console.error(`[${requestId}] Unrecoverable setup error encountered. Status: ${status || 'N/A'}. Stopping key rotation.`);
+                 break; // Exit the outer key retry loop
             }
         }
     } // --- 结束重试循环 ---
     if (!success) {
         console.error(`[${requestId}] All Fal Key attempts failed or an unrecoverable error occurred.`);
         if (!res.headersSent) {
+            const statusCode = lastError?.status || 503; // Use status from last error (could be from setup or first stream event), default 503
             const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
+            const detailMessage = lastError?.body?.detail || errorMessage; // Prefer detailed message
             const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
              console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
              res.status(statusCode).json({
                  object: "error",
                  message: `All Fal Key attempts failed or an unrecoverable error occurred. Last error: ${detailMessage}`,
                  type: errorType,
                  param: null,
+                 code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : "service_unavailable")
              });
         } else if (!res.writableEnded) {
+            // This case should be less likely now as stream errors are handled inside the loop
+            console.error(`[${requestId}] Headers potentially sent, but request failed. Attempting to end stream.`);
+             try {
+                 // Don't send another error chunk if one might have been sent already
+                 res.write(`data: [DONE]\n\n`);
+                 res.end();
+             } catch (e) {
+                 console.error(`[${requestId}] Failed to write final [DONE] to stream:`, e);
+                 if (!res.writableEnded) res.end();
+             }
         } else {
             console.error(`[${requestId}] Request failed, but response stream was already fully ended. Cannot send error.`);
         }
 // 启动服务器 (更新启动信息)
 app.listen(PORT, () => {
     console.log(`===================================================`);
+    console.log(` Fal OpenAI Proxy Server (Key Rotation with Retry v2 + System Top + Separator + Recency)`); // 更新描述
     console.log(` Listening on port: ${PORT}`);
     console.log(` Loaded ${falKeys.length} Fal AI Keys for rotation.`);
     console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
 // 根路径响应 (更新信息)
 app.get('/', (req, res) => {
+    res.send('Fal OpenAI Proxy (Key Rotation with Retry v2 + System Top + Separator + Recency Strategy) is running.'); // 更新描述
 });