Spaces:

Calmlo
/

fal-openai-proxy

Paused

App Files Files Community

Calmlo commited on Apr 12, 2025

Commit

e6d9a41

verified ·

1 Parent(s): 713f2f6

Update server.js

Browse files

Files changed (1) hide show

server.js +166 -283

server.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import express from 'express';
-import { fal } from '@fal-ai/client';
 // --- Key Management Setup ---
 // Read comma-separated keys from the SINGLE environment variable FAL_KEY
@@ -29,63 +30,91 @@ if (!API_KEY) {
 }
 let currentKeyIndex = 0;
-const invalidKeys = new Set(); // Keep track of keys that failed
 console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
-// Function to get the next valid key in a round-robin fashion
 function getNextValidKey() {
     if (invalidKeys.size >= falKeys.length) {
         console.error("All Fal AI keys are marked as invalid.");
         return null; // No valid keys left
     }
     const initialIndex = currentKeyIndex;
-    let attempts = 0;
     while (attempts < falKeys.length) {
         const keyIndex = currentKeyIndex % falKeys.length;
         const key = falKeys[keyIndex];
-        // Move to the next index for the *next* call
         currentKeyIndex = (keyIndex + 1) % falKeys.length;
         if (!invalidKeys.has(key)) {
             // Found a valid key
             console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
-            return { key, index: keyIndex };
         }
         attempts++;
-        // Continue loop to check the next key
     }
-    // Should not be reached if invalidKeys.size check is correct, but as a safeguard
-    console.error("Could not find a valid Fal AI key after checking all.");
     return null;
 }
-// Function to check if an error is likely related to a bad key
 // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
 function isKeyRelatedError(error) {
-    const message = error?.message?.toLowerCase() || '';
-    const status = error?.status; // Check if the error object has a status code
     // Check for specific HTTP status codes indicative of auth/permission issues
-    if (status === 401 || status === 403) {
         console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
         return true;
     }
-    // Check for common error message patterns
     if (message.includes('invalid api key') ||
         message.includes('authentication failed') ||
         message.includes('permission denied') ||
-        message.includes('quota exceeded') || // Include quota errors as key-related for rotation
         message.includes('forbidden') ||
-        message.includes('unauthorized')) { // Add 'unauthorized'
-        console.warn(`Detected potential key-related error (message: ${message})`);
         return true;
     }
     // Add more specific checks based on observed Fal AI errors if needed
     return false;
 }
 // --- End Key Management Setup ---
@@ -128,247 +157,119 @@ const SYSTEM_PROMPT_LIMIT = 4800;
 // === 限制定义结束 ===
 // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
-const FAL_SUPPORTED_MODELS = [
-    "anthropic/claude-3.7-sonnet",
-    "anthropic/claude-3.5-sonnet",
-    "anthropic/claude-3-5-haiku",
-    "anthropic/claude-3-haiku",
-    "google/gemini-pro-1.5",
-    "google/gemini-flash-1.5",
-    "google/gemini-flash-1.5-8b",
-    "google/gemini-2.0-flash-001",
-    "meta-llama/llama-3.2-1b-instruct",
-    "meta-llama/llama-3.2-3b-instruct",
-    "meta-llama/llama-3.1-8b-instruct",
-    "meta-llama/llama-3.1-70b-instruct",
-    "openai/gpt-4o-mini",
-    "openai/gpt-4o",
-    "deepseek/deepseek-r1",
-    "meta-llama/llama-4-maverick",
-    "meta-llama/llama-4-scout"
-];
 // Helper function getOwner (unchanged)
-const getOwner = (modelId) => {
-    if (modelId && modelId.includes('/')) {
-        return modelId.split('/')[0];
-    }
-    return 'fal-ai';
-}
 // GET /v1/models endpoint (unchanged)
-app.get('/v1/models', (req, res) => {
-    console.log("Received request for GET /v1/models");
-    try {
-        const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
-            id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: getOwner(modelId) // Use current time for created
-        }));
-        res.json({ object: "list", data: modelsData });
-        console.log("Successfully returned model list.");
-    } catch (error) {
-        console.error("Error processing GET /v1/models:", error);
-        res.status(500).json({ error: "Failed to retrieve model list." });
-    }
-});
 // convertMessagesToFalPrompt 函数 (unchanged)
-function convertMessagesToFalPrompt(messages) {
-    // ... (keep existing conversion logic)
-    let fixed_system_prompt_content = "";
-    const conversation_message_blocks = [];
-    // console.log(`Original messages count: ${messages.length}`); // Less verbose logging
-    // 1. 分离 System 消息，格式化 User/Assistant 消息
-    for (const message of messages) {
-        let content = (message.content === null || message.content === undefined) ? "" : String(message.content);
-        switch (message.role) {
-            case 'system':
-                fixed_system_prompt_content += `System: ${content}\n\n`;
-                break;
-            case 'user':
-                conversation_message_blocks.push(`Human: ${content}\n\n`);
-                break;
-            case 'assistant':
-                conversation_message_blocks.push(`Assistant: ${content}\n\n`);
-                break;
-            default:
-                console.warn(`Unsupported role: ${message.role}`);
-                continue;
-        }
-    }
-    // 2. 截断合并后的 system 消息（如果超长）
-    if (fixed_system_prompt_content.length > SYSTEM_PROMPT_LIMIT) {
-        const originalLength = fixed_system_prompt_content.length;
-        fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
-        console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
-    }
-    fixed_system_prompt_content = fixed_system_prompt_content.trim();
-    // 3. 计算 system_prompt 中留给对话历史的剩余空间
-    let space_occupied_by_fixed_system = 0;
-    if (fixed_system_prompt_content.length > 0) {
-         space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
-    }
-     const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
-    // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
-    // 4. 反向填充 User/Assistant 对话历史
-    const prompt_history_blocks = [];
-    const system_prompt_history_blocks = [];
-    let current_prompt_length = 0;
-    let current_system_history_length = 0;
-    let promptFull = false;
-    let systemHistoryFull = (remaining_system_limit <= 0);
-    // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
-    for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
-        const message_block = conversation_message_blocks[i];
-        const block_length = message_block.length;
-        if (promptFull && systemHistoryFull) {
-            // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
-            break;
-        }
-        if (!promptFull) {
-            if (current_prompt_length + block_length <= PROMPT_LIMIT) {
-                prompt_history_blocks.unshift(message_block);
-                current_prompt_length += block_length;
-                continue;
-            } else {
-                promptFull = true;
-                // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
-            }
-        }
-        if (!systemHistoryFull) {
-            if (current_system_history_length + block_length <= remaining_system_limit) {
-                 system_prompt_history_blocks.unshift(message_block);
-                 current_system_history_length += block_length;
-                 continue;
-            } else {
-                 systemHistoryFull = true;
-                 // console.log(`System history limit (${remaining_system_limit}) reached.`);
-            }
-        }
-    }
-    // 5. 组合最终的 prompt 和 system_prompt
-    const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
-    const final_prompt = prompt_history_blocks.join('').trim();
-    const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
-    let final_system_prompt = "";
-    const hasFixedSystem = fixed_system_prompt_content.length > 0;
-    const hasSystemHistory = system_prompt_history_content.length > 0;
-    if (hasFixedSystem && hasSystemHistory) {
-        final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
-        // console.log("Combining fixed system prompt and history with separator.");
-    } else if (hasFixedSystem) {
-        final_system_prompt = fixed_system_prompt_content;
-        // console.log("Using only fixed system prompt.");
-    } else if (hasSystemHistory) {
-        final_system_prompt = system_prompt_history_content;
-        // console.log("Using only history in system prompt slot.");
-    }
-    const result = {
-        system_prompt: final_system_prompt,
-        prompt: final_prompt
-    };
-    // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
-    // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
-    return result;
-}
 // === convertMessagesToFalPrompt 函数结束 ===
-// --- Helper function to make Fal AI request with retries ---
 async function makeFalRequestWithRetry(falInput, stream = false) {
     let attempts = 0;
-    const maxAttempts = falKeys.length; // Try each key at most once per request
-    const attemptedKeysInThisRequest = new Set(); // Track keys tried for *this* specific request
     while (attempts < maxAttempts) {
-        const keyInfo = getNextValidKey();
         if (!keyInfo) {
-            // This happens if all keys are currently in the invalidKeys set
             throw new Error("No valid Fal AI keys available (all marked as invalid).");
         }
-        // Avoid retrying the *exact same key* within the *same request attempt cycle*
-        // This guards against potential infinite loops if getNextValidKey had issues
         if (attemptedKeysInThisRequest.has(keyInfo.key)) {
-             console.warn(`Key at index ${keyInfo.index} already attempted for this request cycle. Skipping.`);
-             // Don't increment attempts here, as we didn't actually *use* the key.
-             // Let the loop continue to find the next *different* valid key.
-             // If all keys are invalid, the check at the start of the loop handles it.
              continue;
         }
         attemptedKeysInThisRequest.add(keyInfo.key);
-        attempts++; // Count this as a distinct attempt with a key
         try {
             console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
-            // *** CRITICAL: Reconfigure fal client with the selected key ***
-            console.warn("Concurrency Warning: Reconfiguring global fal client. Ensure sufficient instance isolation if under high load.");
-            fal.config({ credentials: keyInfo.key });
             if (stream) {
-                // Return the stream directly for the caller to handle
                 const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
                 console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
-                return falStream; // Success, let the caller handle iteration
             } else {
-                // For non-stream, wait for the result here
                 console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
                 const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
                 console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
-                // Check for errors *within* the successful response structure
-                 if (result && result.error) {
-                     console.error(`Fal-ai returned an error in non-stream result (Key Index ${keyInfo.index}):`, result.error);
-                     // Treat this like a general Fal error, not necessarily a key error unless message indicates it
-                     // Convert it to a standard Error object to be caught below
-                     throw new Error(`Fal-ai error in result: ${JSON.stringify(result.error)}`);
                  }
-                return result; // Success
             }
         } catch (error) {
             console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
             if (isKeyRelatedError(error)) {
-                console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error.`);
                 invalidKeys.add(keyInfo.key);
-                // Continue to the next iteration to try another key
             } else {
-                // Not identified as a key-related error (e.g., network issue, bad input, internal Fal error)
-                // Fail the request immediately, don't retry with other keys for this type of error.
-                console.error("Error does not appear to be key-related. Failing request without further retries.");
-                throw error; // Re-throw the original error to be caught by the main handler
             }
         }
-    }
-    // If the loop finishes, it means all keys were tried and marked invalid *within this request cycle*
     throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
 }
-// POST /v1/chat/completions endpoint (Modified to use retry logic)
 app.post('/v1/chat/completions', async (req, res) => {
-    const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
-    // Basic logging for request entry
     console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
     if (!FAL_SUPPORTED_MODELS.includes(model)) {
-         console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
     }
     if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
         console.error("Invalid request: Missing 'model' or 'messages' array.");
@@ -376,111 +277,105 @@ app.post('/v1/chat/completions', async (req, res) => {
     }
     try {
-        // --- Prepare Input ---
         const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
         const falInput = {
             model: model,
             prompt: prompt,
             ...(system_prompt && { system_prompt: system_prompt }),
-            reasoning: !!reasoning, // Ensure boolean
         };
-        // console.log("Fal Input:", JSON.stringify(falInput, null, 2)); // Verbose logging
         console.log("Attempting Fal request with key rotation/retry...");
-        // --- Handle Stream vs Non-Stream ---
         if (stream) {
             res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
             res.setHeader('Cache-Control', 'no-cache');
             res.setHeader('Connection', 'keep-alive');
-            res.setHeader('Access-Control-Allow-Origin', '*'); // Consider restricting in production
             res.flushHeaders();
             let previousOutput = '';
-            let falStream;
             try {
-                 // Initiate stream using the retry logic
-                 falStream = await makeFalRequestWithRetry(falInput, true);
-                 // Process the stream events
                 for await (const event of falStream) {
                     const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
                     const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
                     const errorInfo = (event && event.error) ? event.error : null;
-                     if (errorInfo) {
-                        // Log error from within the stream, but continue processing if possible
                         console.error("Error received *within* fal stream event:", errorInfo);
-                        // Send an error chunk to the client (optional, depends on desired behavior)
-                        const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Event Error: ${JSON.stringify(errorInfo)}` } }] };
-                        // Safety check before writing
                         if (!res.writableEnded) {
                              res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
-                        } else {
-                             console.warn("Stream already ended when trying to write stream event error.");
-                        }
-                        // Decide whether to break or continue based on error severity if needed
                     }
-                    // Calculate delta (same logic as before)
                     let deltaContent = '';
                     if (currentOutput.startsWith(previousOutput)) {
-                        deltaContent = currentOutput.substring(previousOutput.length);
                     } else if (currentOutput.length > 0) {
-                         // console.warn("Fal stream output mismatch. Sending full current output as delta."); // Less verbose
                          deltaContent = currentOutput;
-                         previousOutput = ''; // Reset previous output on mismatch
                     }
                     previousOutput = currentOutput;
-                    // Send OpenAI compatible chunk
-                    if (deltaContent || !isPartial) { // Send even if delta is empty when finishing
                         const openAIChunk = {
-                            id: `chatcmpl-${Date.now()}`, // Consider more unique ID if needed
                             object: "chat.completion.chunk",
                             created: Math.floor(Date.now() / 1000),
-                            model: model, // Echo back the requested model
-                            choices: [{
-                                index: 0,
-                                delta: { content: deltaContent },
-                                finish_reason: isPartial === false ? "stop" : null
-                             }]
                         };
-                        // Safety check before writing
                         if (!res.writableEnded) {
                             res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
-                        } else {
-                             console.warn("Stream already ended when trying to write data chunk.");
-                        }
                     }
-                 } // End for-await loop
-                 // Send the final [DONE] marker
-                 if (!res.writableEnded) {
-                     res.write(`data: [DONE]\n\n`);
-                     res.end();
-                     console.log("<-- Stream finished successfully.");
-                 } else {
-                      console.log("<-- Stream finished, but connection was already ended.");
-                 }
             } catch (streamError) {
-                // Catches errors from makeFalRequestWithRetry OR the stream iteration itself
                 console.error('Error during stream request processing:', streamError.message || streamError);
                  try {
                      if (!res.headersSent) {
                          // Error likely occurred in makeFalRequestWithRetry before stream started
-                         res.status(502).json({ // 502 Bad Gateway might be appropriate
                             error: 'Failed to initiate Fal stream',
-                            details: streamError.message || 'Underlying Fal request failed or timed out.'
                          });
                          console.log("<-- Stream initiation failed response sent.");
                      } else if (!res.writableEnded) {
                          // Stream started but failed during processing
                          const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
-                         // Send error details in the stream if possible
                          res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
-                         res.write(`data: [DONE]\n\n`); // Still send DONE after error for client handling
                          res.end();
                          console.log("<-- Stream error sent, stream ended.");
                      } else {
@@ -488,7 +383,6 @@ app.post('/v1/chat/completions', async (req, res) => {
                      }
                  } catch (finalError) {
                     console.error('Error sending stream error message to client:', finalError);
-                    // Ensure response is ended if possible
                     if (!res.writableEnded) { res.end(); }
                  }
             }
@@ -496,50 +390,38 @@ app.post('/v1/chat/completions', async (req, res) => {
         } else {
             // --- Non-Stream ---
             try {
-                // Get the result using the retry logic
                 const result = await makeFalRequestWithRetry(falInput, false);
-                // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2)); // Verbose
-                // Construct OpenAI compatible response
                 const openAIResponse = {
                     id: `chatcmpl-${result.requestId || Date.now()}`,
                     object: "chat.completion",
                     created: Math.floor(Date.now() / 1000),
-                    model: model, // Echo back requested model
-                    choices: [{
-                        index: 0,
-                        message: {
-                            role: "assistant",
-                            content: result.output || "" // Ensure content is string
-                         },
-                        finish_reason: "stop" // Assume stop for non-stream success
-                    }],
-                    usage: { // Provide null usage as Fal doesn't return it
-                        prompt_tokens: null,
-                        completion_tokens: null,
-                        total_tokens: null
-                     },
-                    system_fingerprint: null, // Fal doesn't provide this
-                     ...(result.reasoning && { fal_reasoning: result.reasoning }), // Include Fal specific reasoning if present
                 };
                 res.json(openAIResponse);
                 console.log("<-- Non-stream response sent successfully.");
             } catch (error) {
-                 // Catches errors from makeFalRequestWithRetry (e.g., all keys failed or non-key error)
                 console.error('Error during non-stream request processing:', error.message || error);
                 if (!res.headersSent) {
                     const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
                     // Check if it was the "all keys failed" error
                     const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
-                        ? `Fal request failed after trying all available keys: ${errorMessage}`
                         : `Internal Server Error processing Fal request: ${errorMessage}`;
-                    // Use 502 Bad Gateway if it's likely an upstream (Fal) failure
-                    res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
                     console.log("<-- Non-stream error response sent.");
                 } else {
-                    // Should be rare for non-stream, but handle just in case
                     console.error("Headers already sent for non-stream error? This is unexpected.");
                     if (!res.writableEnded) { res.end(); }
                 }
@@ -568,9 +450,10 @@ app.listen(PORT, () => {
     console.log(` Listening on port : ${PORT}`);
     console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
     console.log(` Loaded Keys Count   : ${falKeys.length}`);
     console.log(` API Key Auth        : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
     console.log(` Input Limits        : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
-    console.log(` Concurrency Warning : Global Fal client reconfigured per request.`);
     console.log(`---------------------------------------------------------------------`);
     console.log(` Endpoints:`);
     console.log(`   POST http://localhost:${PORT}/v1/chat/completions`);
@@ -580,5 +463,5 @@ app.listen(PORT, () => {
 // 根路径响应 (Updated message)
 app.get('/', (req, res) => {
-    res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s).`);
 });

 import express from 'express';
+// **CRITICAL: Import the 'fal' object directly for configuration**
+import { fal } from '@fal-ai/client'; // Make sure 'fal' is the object you configure
 // --- Key Management Setup ---
 // Read comma-separated keys from the SINGLE environment variable FAL_KEY
 }
 let currentKeyIndex = 0;
+// **NEW: Keep track of keys that failed persistently during runtime**
+const invalidKeys = new Set();
 console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
+// **MODIFIED: Function to get the next *valid* key**
 function getNextValidKey() {
+    // Check if all keys have been marked as invalid
     if (invalidKeys.size >= falKeys.length) {
         console.error("All Fal AI keys are marked as invalid.");
         return null; // No valid keys left
     }
     const initialIndex = currentKeyIndex;
+    let attempts = 0; // Prevent infinite loops in edge cases
     while (attempts < falKeys.length) {
         const keyIndex = currentKeyIndex % falKeys.length;
         const key = falKeys[keyIndex];
+        // Move to the next index for the *next* call, regardless of validity
         currentKeyIndex = (keyIndex + 1) % falKeys.length;
+        // Check if the current key is NOT in the invalid set
         if (!invalidKeys.has(key)) {
             // Found a valid key
             console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
+            return { key, index: keyIndex }; // Return the key and its original index
+        } else {
+             console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
         }
         attempts++;
+        // If the loop started at an invalid key and wrapped around,
+        // check if we've checked all keys since the start
+        if (currentKeyIndex === initialIndex && attempts > 0) {
+            // This check might be redundant due to the invalidKeys.size check at the top,
+            // but serves as an extra safeguard.
+             console.warn("Looped through all keys, potentially all are invalid.");
+            break;
+        }
     }
+    // If we exit the loop, it means no valid key was found
+    console.error("Could not find a valid Fal AI key after checking all potentially available keys.");
     return null;
 }
+// **NEW/MODIFIED: Function to check if an error is likely related to a bad key**
 // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
 function isKeyRelatedError(error) {
+    // Check if error itself is null/undefined
+    if (!error) return false;
+    const message = error.message?.toLowerCase() || '';
+    // Check if the error object has a 'status' property (common in HTTP errors)
+    const status = error.status || error.statusCode; // Check common status properties
     // Check for specific HTTP status codes indicative of auth/permission issues
+    // 401: Unauthorized, 403: Forbidden, 429: Too Many Requests (often quota related)
+    if (status === 401 || status === 403 || status === 429) {
         console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
         return true;
     }
+    // Check for common error message patterns (case-insensitive)
+    // Add more specific Fal AI error messages as you encounter them
     if (message.includes('invalid api key') ||
         message.includes('authentication failed') ||
         message.includes('permission denied') ||
+        message.includes('quota exceeded') || // Treat quota errors as key-related for rotation
         message.includes('forbidden') ||
+        message.includes('unauthorized') ||
+        message.includes('rate limit') || // Often linked to key limits
+        message.includes('credentials')) // Generic credential errors
+        {
+        console.warn(`Detected potential key-related error (message: "${message}")`);
         return true;
     }
     // Add more specific checks based on observed Fal AI errors if needed
+    // Example: Check for specific error codes if Fal AI provides them
+    // if (error.code === 'SOME_FAL_AUTH_ERROR_CODE') {
+    //     return true;
+    // }
     return false;
 }
 // --- End Key Management Setup ---
 // === 限制定义结束 ===
 // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
+const FAL_SUPPORTED_MODELS = [ /* ... model list ... */ ];
 // Helper function getOwner (unchanged)
+const getOwner = (modelId) => { /* ... */ };
 // GET /v1/models endpoint (unchanged)
+app.get('/v1/models', (req, res) => { /* ... */ });
 // convertMessagesToFalPrompt 函数 (unchanged)
+function convertMessagesToFalPrompt(messages) { /* ... */ }
 // === convertMessagesToFalPrompt 函数结束 ===
+// --- NEW: Helper function to make Fal AI request with retries ---
 async function makeFalRequestWithRetry(falInput, stream = false) {
     let attempts = 0;
+    // Max attempts should be the total number of keys initially available
+    const maxAttempts = falKeys.length;
+    // Keep track of keys tried *within this specific request attempt*
+    // This prevents infinite loops if getNextValidKey had issues,
+    // and ensures we try each *available* key at most once per request.
+    const attemptedKeysInThisRequest = new Set();
     while (attempts < maxAttempts) {
+        const keyInfo = getNextValidKey(); // Get the next *valid* key info { key, index }
         if (!keyInfo) {
+            // This should only happen if all keys are in the invalidKeys set
+            console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
             throw new Error("No valid Fal AI keys available (all marked as invalid).");
         }
+        // Prevent retrying the exact same key if getNextValidKey logic had an issue
+        // or if a key wasn't marked invalid correctly on a previous attempt within this request.
         if (attemptedKeysInThisRequest.has(keyInfo.key)) {
+            console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next.`);
+            // Don't increment 'attempts' here as we didn't use the key. Let the loop find the next.
+            // If all keys end up being skipped, the `!keyInfo` check or `attempts < maxAttempts` will eventually handle it.
              continue;
         }
         attemptedKeysInThisRequest.add(keyInfo.key);
+        attempts++; // Count this as a distinct attempt with a unique key for this request
         try {
             console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
+            // *** CRITICAL: Reconfigure the global fal client with the selected key for this attempt ***
+            // Warning: This reconfigures the GLOBAL client. If you have many concurrent requests,
+            // this could lead to race conditions. Consider instance isolation or a pool manager
+            // for high-concurrency scenarios if the library doesn't support per-request credentials easily.
+            console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Ensure this is safe for your concurrency model.`);
+            fal.config({ credentials: keyInfo.key }); // Use the specific key for this attempt
             if (stream) {
+                // Use the configured global 'fal' object for the stream request
                 const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
                 console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
+                // Success! Return the stream iterator directly
+                return falStream;
             } else {
+                // Use the configured global 'fal' object for the non-stream request
                 console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
+                // Assuming fal.subscribe or similar method for non-streaming
+                // Adapt this line if your non-stream method is different
                 const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
                 console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
+                // Check for errors *within* the successful response structure (if applicable)
+                if (result && result.error) {
+                     console.error(`Fal-ai returned an error in the result payload (Key Index ${keyInfo.index}):`, result.error);
+                     // Decide if this specific payload error should also invalidate the key
+                     if (isKeyRelatedError(result.error)) { // Reuse the checker
+                         console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
+                         invalidKeys.add(keyInfo.key);
+                         // Continue the loop to try the next key
+                         continue; // Go to the next iteration of the while loop
+                     } else {
+                         // Throw an error that will be caught by the outer handler, not retried
+                         throw new Error(`Fal-ai error in result payload: ${JSON.stringify(result.error)}`);
+                     }
                  }
+                 // Success! Return the result
+                return result;
             }
         } catch (error) {
             console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
+            // Check if the caught error indicates the key is invalid
             if (isKeyRelatedError(error)) {
+                console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to caught error.`);
+                // **ACTION: Add the failed key to the set of invalid keys**
                 invalidKeys.add(keyInfo.key);
+                // Continue to the next iteration of the while loop to try another key
             } else {
+                // Error is not key-related (e.g., network issue, bad input, internal Fal error)
+                // Don't retry with other keys for this type of error. Fail the request immediately.
+                console.error("Error does not appear to be key-related. Failing request without further key retries.");
+                throw error; // Re-throw the original error to be caught by the main endpoint handler
             }
         }
+    } // End while loop
+    // If the loop finishes, it means all available keys were tried and failed with key-related errors
     throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
 }
+// POST /v1/chat/completions endpoint (MODIFIED to use retry logic)
 app.post('/v1/chat/completions', async (req, res) => {
+    const { model, messages, stream = false, reasoning = false, /* ...restOpenAIParams */ } = req.body;
     console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
+    // Basic Validation (unchanged)
     if (!FAL_SUPPORTED_MODELS.includes(model)) {
+        console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
     }
     if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
         console.error("Invalid request: Missing 'model' or 'messages' array.");
     }
     try {
+        // --- Prepare Input (unchanged) ---
         const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
         const falInput = {
             model: model,
             prompt: prompt,
             ...(system_prompt && { system_prompt: system_prompt }),
+            reasoning: !!reasoning,
         };
         console.log("Attempting Fal request with key rotation/retry...");
+        // --- Handle Stream vs Non-Stream using the retry helper ---
         if (stream) {
             res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
             res.setHeader('Cache-Control', 'no-cache');
             res.setHeader('Connection', 'keep-alive');
+            res.setHeader('Access-Control-Allow-Origin', '*');
             res.flushHeaders();
             let previousOutput = '';
+            let falStream; // Declare falStream here
             try {
+                // **MODIFIED: Initiate stream using the retry logic helper**
+                falStream = await makeFalRequestWithRetry(falInput, true);
+                // Process the stream events (logic mostly unchanged)
                 for await (const event of falStream) {
                     const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
                     const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
                     const errorInfo = (event && event.error) ? event.error : null;
+                    // Handle errors *within* the stream payload
+                    if (errorInfo) {
                         console.error("Error received *within* fal stream event:", errorInfo);
+                        // Optionally send an error chunk (check if needed)
+                        const errorChunk = { /* ... error chunk details ... */ };
                         if (!res.writableEnded) {
                              res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
+                        } else { console.warn("Stream ended before writing event error."); }
+                         // Decide if this stream error should also invalidate the key
+                         // Note: The key might have already been marked invalid if the error happened during initial connection
+                         // You might need more context to decide if an *in-stream* error means the key is bad.
+                         // For now, we just log it. If it causes the stream to terminate prematurely,
+                         // the outer catch might handle it, but key invalidation might not occur unless the
+                         // error object passed to the catch block triggers isKeyRelatedError.
                     }
+                    // Calculate delta (logic unchanged)
                     let deltaContent = '';
                     if (currentOutput.startsWith(previousOutput)) {
+                         deltaContent = currentOutput.substring(previousOutput.length);
                     } else if (currentOutput.length > 0) {
                          deltaContent = currentOutput;
+                         previousOutput = '';
                     }
                     previousOutput = currentOutput;
+                    // Send OpenAI compatible chunk (logic unchanged)
+                    if (deltaContent || !isPartial) {
                         const openAIChunk = {
+                            id: `chatcmpl-${Date.now()}`,
                             object: "chat.completion.chunk",
                             created: Math.floor(Date.now() / 1000),
+                            model: model,
+                            choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }]
                         };
                         if (!res.writableEnded) {
                             res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
+                        } else { console.warn("Stream ended before writing data chunk."); }
                     }
+                } // End for-await loop
+                // Send the final [DONE] marker (logic unchanged)
+                if (!res.writableEnded) {
+                    res.write(`data: [DONE]\n\n`);
+                    res.end();
+                    console.log("<-- Stream finished successfully.");
+                } else {
+                    console.log("<-- Stream finished, but connection was already ended.");
+                }
             } catch (streamError) {
+                // **MODIFIED: Catches errors from makeFalRequestWithRetry OR stream iteration**
+                // This error could be a non-key-related error, or the "all keys failed" error.
                 console.error('Error during stream request processing:', streamError.message || streamError);
                  try {
                      if (!res.headersSent) {
                          // Error likely occurred in makeFalRequestWithRetry before stream started
+                         const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
+                         res.status(502).json({ // 502 Bad Gateway is appropriate for upstream failure
                             error: 'Failed to initiate Fal stream',
+                            details: errorMessage
                          });
                          console.log("<-- Stream initiation failed response sent.");
                      } else if (!res.writableEnded) {
                          // Stream started but failed during processing
                          const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
                          res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
+                         res.write(`data: [DONE]\n\n`); // Still send DONE for client handling
                          res.end();
                          console.log("<-- Stream error sent, stream ended.");
                      } else {
                      }
                  } catch (finalError) {
                     console.error('Error sending stream error message to client:', finalError);
                     if (!res.writableEnded) { res.end(); }
                  }
             }
         } else {
             // --- Non-Stream ---
             try {
+                // **MODIFIED: Get the result using the retry logic helper**
                 const result = await makeFalRequestWithRetry(falInput, false);
+                // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2));
+                // Construct OpenAI compatible response (logic unchanged)
                 const openAIResponse = {
                     id: `chatcmpl-${result.requestId || Date.now()}`,
                     object: "chat.completion",
                     created: Math.floor(Date.now() / 1000),
+                    model: model,
+                    choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
+                    usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
+                    system_fingerprint: null,
+                     ...(result.reasoning && { fal_reasoning: result.reasoning }),
                 };
                 res.json(openAIResponse);
                 console.log("<-- Non-stream response sent successfully.");
             } catch (error) {
+                 // **MODIFIED: Catches errors from makeFalRequestWithRetry**
+                 // This error could be a non-key-related error, or the "all keys failed" error.
                 console.error('Error during non-stream request processing:', error.message || error);
                 if (!res.headersSent) {
                     const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
                     // Check if it was the "all keys failed" error
                     const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
+                        ? `Fal request failed: ${errorMessage}` // Simplified message
                         : `Internal Server Error processing Fal request: ${errorMessage}`;
+                    res.status(502).json({ error: 'Fal Request Failed', details: finalMessage }); // 502 Bad Gateway
                     console.log("<-- Non-stream error response sent.");
                 } else {
                     console.error("Headers already sent for non-stream error? This is unexpected.");
                     if (!res.writableEnded) { res.end(); }
                 }
     console.log(` Listening on port : ${PORT}`);
     console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
     console.log(` Loaded Keys Count   : ${falKeys.length}`);
+    console.log(` Invalid Keys Set    : Initialized (size: ${invalidKeys.size})`); // Show invalid set size
     console.log(` API Key Auth        : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
     console.log(` Input Limits        : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
+    console.log(` Concurrency Warning : Global Fal client reconfigured per request attempt!`);
     console.log(`---------------------------------------------------------------------`);
     console.log(` Endpoints:`);
     console.log(`   POST http://localhost:${PORT}/v1/chat/completions`);
 // 根路径响应 (Updated message)
 app.get('/', (req, res) => {
+    res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s). Currently ${invalidKeys.size} key(s) marked as invalid.`); // Show invalid count
 });