Spaces:

xt8
/

g

Running

App Files Files Community

xt8 commited on Jul 8, 2025

Commit

b5254fd

verified ·

1 Parent(s): c895a8e

Update main.ts

Browse files

Files changed (1) hide show

main.ts +222 -232

main.ts CHANGED Viewed

@@ -1,16 +1,21 @@
 import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
-// [修改] 引入具体的 Encoder 类，并使用 npm 导入方式以获得更好的 Deno 兼容性
-import { Encoder } from "npm:wav@1.0.2";
-// [新增] 引入 MP3 解码器
-import { MpegDecoder } from "https://esm.sh/mpg123-decoder@0.6.5";
 // --- 常量定义 ---
 const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制（单位：MB）
 const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
 const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
-// --- 接口定义 ---
 interface OpenAIMessage {
   role: "system" | "user" | "assistant";
   content: string | Array<{
@@ -29,15 +34,18 @@ interface OpenAIRequest {
   stream?: boolean;
 }
-// [新增] OpenAI TTS 请求接口定义
-interface OpenAITTSRequest {
-    model: string; // e.g., 'tts-1', 'tts-1-hd'
-    input: string; // The text to synthesize
-    voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
-    response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav'; // 增加了 wav 选项
-    speed?: number; // Not directly supported by Gemini TTS, will be ignored
 }
 class GoogleAIService {
   public apiKeys: string[];
@@ -66,95 +74,7 @@ class GoogleAIService {
     this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
     return key;
   }
-  // --- [新增] TTS 功能 ---
-  /**
-   * 映射 OpenAI 的语音名称到 Google Gemini TTS 的预置语音名称。
-   * 参考: https://ai.google.dev/gemini-api/docs/text-to-speech#supported_voices
-   */
-  private getGoogleVoice(openAIVoice: string): string {
-    const voiceMap: { [key: string]: string } = {
-        'Puck': 'Puck',      // A good default, versatile voice'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede'
-        'Charon': 'Charon',        // Another male voice option
-        'Kore': 'Kore',     // Female, narrative style
-        'Fenrir': 'Fenrir',       // Deep, male voice
-        'Leda': 'Leda',    // Energetic female voice
-        'Aoede': 'Aoede',    // Gentle female voice
-        // Fallback to a default if the voice is not in the map
-        'default': 'Puck'
-    };
-    return voiceMap[openAIVoice] || voiceMap['default'];
-  }
-  /**
-   * [新增] 调用 Google Gemini TTS API 生成语音。
-   * @param input - 要转换为语音的文本。
-   * @param model - 请求的模型（在Google端，我们硬编码为TTS模型）。
-   * @param voice - OpenAI 格式的语音名称。
-   * @returns 返回包含 MP3 音频数据的 ArrayBuffer。
-   */
-  async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
-    const apiKey = this.getNextApiKey();
-    const googleVoice = this.getGoogleVoice(voice);
-    // Google Gemini TTS 目前使用固定的模型名称
-    const ttsModel = "gemini-2.5-flash-preview-tts";
-    console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
-    const requestBody = {
-        "contents": [{
-            "parts":[{
-                "text": input
-            }]
-        }],
-        "generationConfig": {
-            "responseModalities": ["AUDIO"],
-            "speechConfig": {
-                "voiceConfig": {
-                    "prebuiltVoiceConfig": {
-                        "voiceName": googleVoice
-                    }
-                }
-            }
-        },
-        "model": ttsModel,
-    };
-    const response = await fetch(
-        `https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`,
-        {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify(requestBody),
-        }
-    );
-    if (!response.ok) {
-        const errorBody = await response.json().catch(() => response.text());
-        const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
-        console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
-        throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
-    }
-    const data = await response.json();
-    const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
-    if (!audioContentBase64) {
-        throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
-    }
-    const binaryString = atob(audioContentBase64);
-    const len = binaryString.length;
-    const bytes = new Uint8Array(len);
-    for (let i = 0; i < len; i++) {
-        bytes[i] = binaryString.charCodeAt(i);
-    }
-    return bytes.buffer;
-  }
-  // --- 现有代码保持不变 (折叠以保持简洁) ---
   async fetchOfficialModels(): Promise<any[]> {
     const now = Date.now();
     if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
@@ -182,6 +102,7 @@ class GoogleAIService {
         console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
         return this.cachedModels;
       }
       return this.getFallbackModels();
     } catch (error) {
       console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
@@ -193,9 +114,7 @@ class GoogleAIService {
     return [
       { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
       { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
-      { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] },
-      // [新增] 在模型列表中添加TTS模型，使其在 /v1/models 接口可见
-      { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model for generating high-quality audio.", supportedGenerationMethods: ["generateContent"], id: "gemini-2.5-flash-preview-tts" }
     ];
   }
@@ -214,13 +133,17 @@ class GoogleAIService {
     return 'unknown';
   }
   private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
     const docType = this.getDocumentType(documentUrl);
     if (!documentUrl.startsWith("data:")) {
       if (documentUrl.startsWith("http")) {
         throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
       }
       throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
     }
@@ -228,15 +151,16 @@ class GoogleAIService {
     if (parts.length !== 2) {
         throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
     }
-    const [mimeInfo, base64Data] = parts;
     const approxSizeInBytes = base64Data.length * 0.75;
     if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
         throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
     }
     const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
     if (docType === 'txt' || docType === 'md') {
       try {
         const textContent = atob(base64Data);
@@ -246,11 +170,12 @@ class GoogleAIService {
         throw new Error(`Invalid base64 encoding for ${docType} document.`);
       }
     }
     const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
     return { mimeType: finalMimeType, data: base64Data, docType };
   }
   private extractImageData(imageUrl: string): { mimeType: string; data: string } {
     if (imageUrl.startsWith("data:image/")) {
       const [mimeInfo, base64Data] = imageUrl.split(",");
@@ -267,7 +192,6 @@ class GoogleAIService {
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
     const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
     console.log(`Processing document with model: ${documentModel}`);
     let contents;
@@ -279,27 +203,28 @@ class GoogleAIService {
         const messageParts = msg.content.map(part => {
           if (part.type === "text") return { text: part.text };
           if (part.type === "image_url" && part.image_url) {
             const { mimeType, data } = this.extractImageData(part.image_url.url);
             return { inlineData: { mimeType, data } };
           }
           if (part.type === "document" && part.document) {
             const docData = this.extractDocumentData(part.document.url);
             console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
             if (docData.docType === 'txt' || docData.docType === 'md') {
-              const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
               return { text: `${prefix}${docData.text}` };
             }
             if (docData.docType === 'pdf') {
               return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
             }
             return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
           }
           return { text: "" };
         });
         return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
       });
     } catch (error) {
@@ -343,13 +268,67 @@ class GoogleAIService {
     if (candidate.finishReason === "SAFETY") {
         throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
     }
     if (candidate.finishReason === "RECITATION") {
         throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
     }
     return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
   }
   async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
     const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
     if (hasDocument) {
@@ -358,7 +337,6 @@ class GoogleAIService {
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
     const contents = messages.map(msg => {
       if (typeof msg.content === "string") {
         return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
@@ -385,6 +363,7 @@ class GoogleAIService {
       contents,
       generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
     };
     if (enableSearch) {
       requestBody.tools = [{ googleSearchRetrieval: {} }];
     }
@@ -398,14 +377,17 @@ class GoogleAIService {
       const errorText = await response.text();
       throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
     }
     const data = await response.json();
     if (!data.candidates || data.candidates.length === 0) {
       throw new Error("No response generated from Google AI");
     }
     const candidate = data.candidates[0];
     if (candidate.finishReason === "SAFETY") {
       throw new Error("Response blocked due to safety filters");
     }
     return candidate.content?.parts[0]?.text || "No response generated";
   }
@@ -435,6 +417,7 @@ class GoogleAIService {
       const errorText = await response.text();
       throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
     }
     const data = await response.json();
     if (!data.candidates || data.candidates.length === 0) {
       throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
@@ -461,9 +444,10 @@ class GoogleAIService {
       result.imageBase64 = imageBase64;
       result.imageUrl = `data:image/png;base64,${imageBase64}`;
     }
     return result;
   }
   async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -494,6 +478,7 @@ class GoogleAIService {
     if (candidate.finishReason === "SAFETY") {
       throw new Error("Response blocked due to safety filters");
     }
     return candidate.content?.parts[0]?.text || "No response generated";
   }
@@ -549,91 +534,96 @@ class OpenAICompatibleServer {
            lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
   }
-  /**
-   * [新增] 将MP3音频数据转码为WAV格式。
-   * @param mp3Buffer 包含MP3数据的ArrayBuffer。
-   * @returns 返回一个包含WAV数据的Promise<Uint8Array>。
-   */
-  private async _transcodeMp3ToWav(mp3Buffer: ArrayBuffer): Promise<Uint8Array> {
-    console.log("Transcoding MP3 to WAV...");
-    const decoder = new MpegDecoder();
-    // 确保解码器资源在使用后被释放
     try {
-        await decoder.ready;
-        const mp3Data = new Uint8Array(mp3Buffer);
-        const { data, channels, sampleRate } = decoder.decode(mp3Data);
-        console.log(`Decoded MP3: ${sampleRate}Hz, ${channels} channels, ${data.length} samples.`);
-        // 使用 'wav' 库将原始 PCM 数据编码为 WAV
-        const wavEncoder = new Encoder(channels, { sampleRate });
-        wavEncoder.write(data);
-        const wavDataStream = wavEncoder.end();
-        // 将WAV数据流收集到一个 Uint8Array 中
-        const chunks: Uint8Array[] = [];
-        for await (const chunk of wavDataStream) {
-            chunks.push(chunk);
-        }
-        // 合并所有块
-        const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
-        const wavResult = new Uint8Array(totalLength);
-        let offset = 0;
-        for (const chunk of chunks) {
-            wavResult.set(chunk, offset);
-            offset += chunk.length;
-        }
-        console.log(`Successfully transcoded to WAV (${(wavResult.length / 1024).toFixed(2)} KB).`);
-        return wavResult;
-    } finally {
-        decoder.free(); // 释放 wasm 解码器占用的内存
-    }
-  }
-  /**
-   * [修改] 处理 OpenAI 兼容的 TTS 请求, 并将结果转为 WAV 格式
-   */
-  private async handleAudioSpeech(request: Request): Promise<Response> {
-    try {
-        const body: OpenAITTSRequest = await request.json();
-        if (!body.input || !body.voice || !body.model) {
-            return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
         }
-        // 1. 从 Google 获取 MP3 格式的音频
-        const mp3AudioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
-        // 2. 将 MP3 转码为 WAV
-        const wavAudioBuffer = await this._transcodeMp3ToWav(mp3AudioBuffer);
-        // 3. 返回 WAV 格式的音频
-        return new Response(wavAudioBuffer, {
-            headers: {
-                // [修改] Content-Type 已更改为 WAV
-                "Content-Type": "audio/wav",
-                "Access-Control-Allow-Origin": "*",
-            }
-        });
     } catch (error) {
-        console.error("Error in audio speech generation:", error.message);
-        return new Response(
-            JSON.stringify({
-                error: {
-                    message: error.message,
-                    type: "api_error",
-                    code: null
-                }
-            }),
-            { status: 500, headers: { "Content-Type": "application/json" } }
-        );
     }
   }
   private async handleChatCompletions(request: Request): Promise<Response> {
     try {
       const body: OpenAIRequest = await request.json();
@@ -652,7 +642,7 @@ class OpenAICompatibleServer {
       );
       const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
       let inputImages: any[] = [];
       if (hasImages) {
         body.messages.forEach(msg => {
@@ -663,9 +653,9 @@ class OpenAICompatibleServer {
           }
         });
       }
-      let responseText: string;
       if (hasDocument) {
         responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
       } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
@@ -708,7 +698,7 @@ class OpenAICompatibleServer {
       );
     }
   }
   private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
     const encoder = new TextEncoder();
     const streamId = `chatcmpl-${Date.now()}`;
@@ -718,62 +708,56 @@ class OpenAICompatibleServer {
     return new ReadableStream({
         start(controller) {
             const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
-            controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
         },
         pull(controller) {
             if (contentQueue.length === 0) {
                 const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
-                controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
-                controller.enqueue(encoder.encode('data: [DONE]\n\n'));
                 controller.close();
                 return;
             }
             const char = contentQueue.shift();
             const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
-            controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
         }
     });
   }
   private async handleModels(): Promise<Response> {
     try {
       const googleModels = await this.googleAI.fetchOfficialModels();
-      const fallbackModels = this.googleAI['getFallbackModels'](); // Access private method for a complete list
-      const allModels = [...googleModels, ...fallbackModels];
-      const uniqueModelMap = new Map();
-      allModels.forEach(model => {
-          const modelId = model.id || model.name.replace('models/', '');
-          if (!uniqueModelMap.has(modelId)) {
-              uniqueModelMap.set(modelId, {
-                  id: modelId,
-                  object: "model",
-                  created: Math.floor(Date.now() / 1000),
-                  owned_by: "google",
-                  description: model.description || model.displayName,
-                  maxTokens: model.inputTokenLimit || model.maxTokens
-              });
-          }
-      });
       const models = {
         object: "list",
-        data: Array.from(uniqueModelMap.values()),
       };
       return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
     } catch (error) {
       console.error("Error fetching models:", error);
       return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
     }
   }
   private async handleStatus(): Promise<Response> {
       const status = {
-          status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
           api_keys_loaded: this.googleAI.apiKeys.length,
           models_in_cache: this.googleAI.cachedModels.length,
-          models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
       };
       return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
   }
@@ -792,20 +776,24 @@ class OpenAICompatibleServer {
     const url = new URL(request.url);
     let response: Response;
     if (url.pathname === "/health" || url.pathname === "/status") {
       response = await this.handleStatus();
     } else if (!this.authenticate(request)) {
       response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
-    } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
-      response = await this.handleAudioSpeech(request);
     } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
       response = await this.handleChatCompletions(request);
     } else if (url.pathname === "/v1/models" && request.method === "GET") {
       response = await this.handleModels();
     } else {
       response = new Response("Not Found", { status: 404 });
     }
     const finalHeaders = new Headers(response.headers);
     for (const [key, value] of Object.entries(corsHeaders)) {
       finalHeaders.set(key, value);
@@ -817,26 +805,28 @@ class OpenAICompatibleServer {
 // --- 服务器启动 ---
 const server = new OpenAICompatibleServer();
-console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000...");
 console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
 console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
 server.googleAI.fetchOfficialModels().then(models => {
   console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
 }).catch(error => {
   console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
 });
-console.log("\n🔗 Endpoints:");
 console.log("   POST /v1/chat/completions");
-// [修改] 更新日志以反映 WAV 输出
-console.log("   POST /v1/audio/speech      <-- [NEW] OpenAI TTS compatible endpoint (outputs WAV)");
 console.log("   GET  /v1/models");
 console.log("   GET  /status");
 await serve(
   (request: Request) => server.handleRequest(request),
-  // [注意] 您的原始代码使用了 7860 端口，这里保持一致
-  { port: 7860 }
-);

 import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
 // --- 常量定义 ---
 const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制（单位：MB）
 const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
 const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
+// Gemini 支持的声音列表
+const GEMINI_VOICES = [
+  { name: "Puck", language: "en-US", gender: "neutral" },
+  { name: "Charon", language: "en-US", gender: "neutral" },
+  { name: "Kore", language: "en-US", gender: "neutral" },
+  { name: "Fenrir", language: "en-US", gender: "neutral" },
+  { name: "Aoede", language: "en-US", gender: "neutral" },
+] as const;
+type VoiceName = typeof GEMINI_VOICES[number]["name"];
 interface OpenAIMessage {
   role: "system" | "user" | "assistant";
   content: string | Array<{
   stream?: boolean;
 }
+// TTS 相关接口
+interface TTSRequest {
+  model: string;
+  input: string;
+  voice?: VoiceName;
+  response_format?: "mp3" | "opus" | "aac" | "flac";
+  speed?: number;
 }
+interface TTSResponse {
+  audio: string; // base64 编码的音频数据
+}
 class GoogleAIService {
   public apiKeys: string[];
     this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
     return key;
   }
   async fetchOfficialModels(): Promise<any[]> {
     const now = Date.now();
     if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
         console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
         return this.cachedModels;
       }
       return this.getFallbackModels();
     } catch (error) {
       console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
     return [
       { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
       { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
+      { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] }
     ];
   }
     return 'unknown';
   }
+  /**
+   * [关键改进] 提取并验证文档数据，增加大小检查和更稳健的解析
+   */
   private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
     const docType = this.getDocumentType(documentUrl);
     if (!documentUrl.startsWith("data:")) {
       if (documentUrl.startsWith("http")) {
         throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
       }
+      // 如果不是data url或http url，则假定为纯base64数据，但这是一种不推荐的格式
+      // 为了健壮性，我们强制要求使用标准的 data URL
       throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
     }
     if (parts.length !== 2) {
         throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
     }
+    const [mimeInfo, base64Data] = parts;
+    // **改进1: 检查文件大小**
+    // Base64 字符串的长度约是原始数据的 4/3。
     const approxSizeInBytes = base64Data.length * 0.75;
     if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
         throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
     }
     const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
     if (docType === 'txt' || docType === 'md') {
       try {
         const textContent = atob(base64Data);
         throw new Error(`Invalid base64 encoding for ${docType} document.`);
       }
     }
+    // 自动识别PDF的MIME类型
     const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
     return { mimeType: finalMimeType, data: base64Data, docType };
   }
   private extractImageData(imageUrl: string): { mimeType: string; data: string } {
     if (imageUrl.startsWith("data:image/")) {
       const [mimeInfo, base64Data] = imageUrl.split(",");
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
     const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
     console.log(`Processing document with model: ${documentModel}`);
     let contents;
         const messageParts = msg.content.map(part => {
           if (part.type === "text") return { text: part.text };
           if (part.type === "image_url" && part.image_url) {
             const { mimeType, data } = this.extractImageData(part.image_url.url);
             return { inlineData: { mimeType, data } };
           }
           if (part.type === "document" && part.document) {
             const docData = this.extractDocumentData(part.document.url);
             console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
             if (docData.docType === 'txt' || docData.docType === 'md') {
+              const prefix = docData.docType === 'md' ? 'Markdown document content:\\n' : 'Text document content:\\n';
               return { text: `${prefix}${docData.text}` };
             }
             if (docData.docType === 'pdf') {
               return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
             }
             return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
           }
           return { text: "" };
         });
         return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
       });
     } catch (error) {
     if (candidate.finishReason === "SAFETY") {
         throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
     }
     if (candidate.finishReason === "RECITATION") {
         throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
     }
     return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
   }
+  // TTS 功能
+  async generateSpeech(text: string, voice: VoiceName = "Puck"): Promise<string> {
+    const apiKey = this.getNextApiKey();
+    const requestBody = {
+      input: {
+        text: text
+      },
+      voice: {
+        name: voice,
+        languageCode: "en-US"
+      },
+      audioConfig: {
+        audioEncoding: "MP3",
+        speakingRate: 1.0,
+        pitch: 0.0,
+        volumeGainDb: 0.0
+      }
+    };
+    try {
+      const response = await fetch(
+        `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateSpeech?key=${apiKey}`,
+        {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify(requestBody)
+        }
+      );
+      if (!response.ok) {
+        const errorText = await response.text();
+        throw new Error(`Gemini TTS API error: ${response.status} - ${errorText}`);
+      }
+      const data = await response.json();
+      if (!data.audioContent) {
+        throw new Error("No audio content returned from Gemini TTS");
+      }
+      return data.audioContent; // 返回 base64 编码的音频数据
+    } catch (error) {
+      console.error("Error generating speech:", error);
+      throw error;
+    }
+  }
+  // 获取可用的声音列表
+  getAvailableVoices(): typeof GEMINI_VOICES {
+    return GEMINI_VOICES;
+  }
   async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
     const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
     if (hasDocument) {
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
     const contents = messages.map(msg => {
       if (typeof msg.content === "string") {
         return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
       contents,
       generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
     };
     if (enableSearch) {
       requestBody.tools = [{ googleSearchRetrieval: {} }];
     }
       const errorText = await response.text();
       throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
     }
     const data = await response.json();
     if (!data.candidates || data.candidates.length === 0) {
       throw new Error("No response generated from Google AI");
     }
     const candidate = data.candidates[0];
     if (candidate.finishReason === "SAFETY") {
       throw new Error("Response blocked due to safety filters");
     }
     return candidate.content?.parts[0]?.text || "No response generated";
   }
       const errorText = await response.text();
       throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
     }
     const data = await response.json();
     if (!data.candidates || data.candidates.length === 0) {
       throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
       result.imageBase64 = imageBase64;
       result.imageUrl = `data:image/png;base64,${imageBase64}`;
     }
     return result;
   }
   async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
     const apiKey = this.getNextApiKey();
     const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
     if (candidate.finishReason === "SAFETY") {
       throw new Error("Response blocked due to safety filters");
     }
     return candidate.content?.parts[0]?.text || "No response generated";
   }
            lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
   }
+  private async handleTTS(request: Request): Promise<Response> {
     try {
+      const body: TTSRequest = await request.json();
+      const { input, voice = "Puck", model } = body;
+      // 验证输入
+      if (!input || input.trim().length === 0) {
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: "Input text is required",
+              type: "invalid_request_error",
+              code: null
+            }
+          }),
+          { status: 400, headers: { "Content-Type": "application/json" } }
+        );
+      }
+      // 验证声音
+      const availableVoices = this.googleAI.getAvailableVoices();
+      const isValidVoice = availableVoices.some(v => v.name === voice);
+      if (!isValidVoice) {
+        return new Response(
+          JSON.stringify({
+            error: {
+              message: `Invalid voice "${voice}". Available voices: ${availableVoices.map(v => v.name).join(", ")}`,
+              type: "invalid_request_error",
+              code: null
+            }
+          }),
+          { status: 400, headers: { "Content-Type": "application/json" } }
+        );
+      }
+      console.log(`TTS request: voice=${voice}, text length=${input.length}`);
+      // 生成语音
+      const audioBase64 = await this.googleAI.generateSpeech(input, voice);
+      // 将 base64 转换为二进制数据
+      const audioData = Uint8Array.from(atob(audioBase64), c => c.charCodeAt(0));
+      return new Response(audioData, {
+        headers: {
+          "Content-Type": "audio/mpeg",
+          "Content-Length": audioData.length.toString()
         }
+      });
+    } catch (error) {
+      console.error("Error in TTS:", error.message);
+      return new Response(
+        JSON.stringify({
+          error: {
+            message: error.message,
+            type: "api_error",
+            code: null
+          }
+        }),
+        { status: 500, headers: { "Content-Type": "application/json" } }
+      );
+    }
+  }
+  private async handleVoices(): Promise<Response> {
+    try {
+      const voices = this.googleAI.getAvailableVoices();
+      const voicesResponse = {
+        object: "list",
+        data: voices.map(voice => ({
+          id: voice.name,
+          name: voice.name,
+          language: voice.language,
+          gender: voice.gender
+        }))
+      };
+      return new Response(JSON.stringify(voicesResponse), {
+        headers: { "Content-Type": "application/json" }
+      });
     } catch (error) {
+      console.error("Error fetching voices:", error);
+      return new Response(
+        JSON.stringify({ error: { message: "Failed to fetch voices." } }),
+        { status: 500, headers: { "Content-Type": "application/json" } }
+      );
     }
   }
   private async handleChatCompletions(request: Request): Promise<Response> {
     try {
       const body: OpenAIRequest = await request.json();
       );
       const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
       let inputImages: any[] = [];
       if (hasImages) {
         body.messages.forEach(msg => {
           }
         });
       }
+      let responseText: string;
+      // Routing logic based on keywords and content types
       if (hasDocument) {
         responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
       } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
       );
     }
   }
   private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
     const encoder = new TextEncoder();
     const streamId = `chatcmpl-${Date.now()}`;
     return new ReadableStream({
         start(controller) {
             const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
+            controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\\n\\n`));
         },
         pull(controller) {
             if (contentQueue.length === 0) {
                 const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
+                controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\\n\\n`));
+                controller.enqueue(encoder.encode('data: [DONE]\\n\\n'));
                 controller.close();
                 return;
             }
             const char = contentQueue.shift();
             const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
+            controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\\n\\n`));
         }
     });
   }
   private async handleModels(): Promise<Response> {
     try {
       const googleModels = await this.googleAI.fetchOfficialModels();
       const models = {
         object: "list",
+        data: googleModels.map(model => {
+          const modelId = model.name.replace('models/', '');
+          return {
+            id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google",
+            description: model.description || model.displayName, maxTokens: model.inputTokenLimit || model.maxTokens
+          };
+        })
       };
       return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
     } catch (error) {
       console.error("Error fetching models:", error);
       return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
     }
   }
   private async handleStatus(): Promise<Response> {
       const status = {
+          status: "healthy", timestamp: new Date().toISOString(), version: "2.6.0",
           api_keys_loaded: this.googleAI.apiKeys.length,
           models_in_cache: this.googleAI.cachedModels.length,
+          models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never",
+          features: {
+            chat_completions: true,
+            image_generation: true,
+            document_processing: true,
+            text_to_speech: true,
+            voice_list: true
+          }
       };
       return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
   }
     const url = new URL(request.url);
     let response: Response;
+    // Handle routes
     if (url.pathname === "/health" || url.pathname === "/status") {
       response = await this.handleStatus();
     } else if (!this.authenticate(request)) {
       response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
     } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
       response = await this.handleChatCompletions(request);
     } else if (url.pathname === "/v1/models" && request.method === "GET") {
       response = await this.handleModels();
+    } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
+      response = await this.handleTTS(request);
+    } else if (url.pathname === "/v1/voices" && request.method === "GET") {
+      response = await this.handleVoices();
     } else {
       response = new Response("Not Found", { status: 404 });
     }
+    // Add CORS headers to all responses
     const finalHeaders = new Headers(response.headers);
     for (const [key, value] of Object.entries(corsHeaders)) {
       finalHeaders.set(key, value);
 // --- 服务器启动 ---
 const server = new OpenAICompatibleServer();
+console.log("🚀 OpenAI Compatible Server with Google AI and TTS starting on port 7860...");
 console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
 console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
+// Pre-fetch models at startup
 server.googleAI.fetchOfficialModels().then(models => {
   console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
 }).catch(error => {
   console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
 });
+console.log("\\n🔗 Endpoints:");
 console.log("   POST /v1/chat/completions");
 console.log("   GET  /v1/models");
+console.log("   POST /v1/audio/speech (TTS)");
+console.log("   GET  /v1/voices");
 console.log("   GET  /status");
+const voices = server.googleAI.getAvailableVoices();
+console.log(`\\n🎤 Available TTS voices: ${voices.map(v => v.name).join(", ")}`);
 await serve(
   (request: Request) => server.handleRequest(request),
+  { port: 7860 }
+);