Spaces:

xt8
/

g

Running

App Files Files Community

xt8 commited on Jul 8, 2025

Commit

4fad59b

verified ·

1 Parent(s): 7ee1829

Update main.ts

Browse files

Files changed (1) hide show

main.ts +76 -43

main.ts CHANGED Viewed

@@ -1,5 +1,9 @@
 import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
-import wav from 'wav'
 // --- 常量定义 ---
 const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制（单位：MB）
@@ -30,7 +34,7 @@ interface OpenAITTSRequest {
     model: string; // e.g., 'tts-1', 'tts-1-hd'
     input: string; // The text to synthesize
     voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
-    response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; // Google TTS returns MP3, so we'll ignore others for now
     speed?: number; // Not directly supported by Gemini TTS, will be ignored
 }
@@ -62,28 +66,6 @@ class GoogleAIService {
     this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
     return key;
   }
-  async function saveWaveFile(
-    filename,
-    pcmData,
-    channels = 1,
-    rate = 24000,
-    sampleWidth = 2,
-    ) {
-    return new Promise((resolve, reject) => {
-        const writer = new wav.FileWriter(filename, {
-                channels,
-                sampleRate: rate,
-                bitDepth: sampleWidth * 8,
-        });
-        writer.on('finish', resolve);
-        writer.on('error', reject);
-        writer.write(pcmData);
-        writer.end();
-    });
-    }
   // --- [新增] TTS 功能 ---
@@ -110,12 +92,12 @@ class GoogleAIService {
    * @param input - 要转换为语音的文本。
    * @param model - 请求的模型（在Google端，我们硬编码为TTS模型）。
    * @param voice - OpenAI 格式的语音名称。
-   * @returns 返回包含音频数据的 ArrayBuffer。
    */
   async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
     const apiKey = this.getNextApiKey();
     const googleVoice = this.getGoogleVoice(voice);
-    // 根据 curl 命令，模型是固定的 TTS 模型
     const ttsModel = "gemini-2.5-flash-preview-tts";
     console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
@@ -157,17 +139,21 @@ class GoogleAIService {
     const data = await response.json();
-    // 提取 base64 编码的音频数据
     const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
     if (!audioContentBase64) {
         throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
-    }
-    const audioBuffer = Buffer.from(audioContentBase64, 'base64');
-    const fileName = 'out.wav';
-    return await saveWaveFile(fileName, audioBuffer);
   }
-  // --- 现有代码保持不变 ---
   async fetchOfficialModels(): Promise<any[]> {
     const now = Date.now();
@@ -564,7 +550,52 @@ class OpenAICompatibleServer {
   }
   /**
-   * [新增] 处理 OpenAI 兼容的 TTS 请求
    */
   private async handleAudioSpeech(request: Request): Promise<Response> {
     try {
@@ -574,11 +605,16 @@ class OpenAICompatibleServer {
             return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
         }
-        const audioFile = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
-        // Google Gemini TTS API 生成的是 MP3 格式的音频。
-        return new Response(audioBuffer, {
             headers: {
                 "Content-Type": "audio/wav",
                 "Access-Control-Allow-Origin": "*",
             }
@@ -756,12 +792,10 @@ class OpenAICompatibleServer {
     const url = new URL(request.url);
     let response: Response;
-    // Handle routes
     if (url.pathname === "/health" || url.pathname === "/status") {
       response = await this.handleStatus();
     } else if (!this.authenticate(request)) {
       response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
-    // [修改] 添加 TTS 路由
     } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
       response = await this.handleAudioSpeech(request);
     } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
@@ -772,7 +806,6 @@ class OpenAICompatibleServer {
       response = new Response("Not Found", { status: 404 });
     }
-    // Add CORS headers to all responses
     const finalHeaders = new Headers(response.headers);
     for (const [key, value] of Object.entries(corsHeaders)) {
       finalHeaders.set(key, value);
@@ -789,7 +822,6 @@ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000.
 console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
 console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
-// Pre-fetch models at startup
 server.googleAI.fetchOfficialModels().then(models => {
   console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
 }).catch(error => {
@@ -798,12 +830,13 @@ server.googleAI.fetchOfficialModels().then(models => {
 console.log("\n🔗 Endpoints:");
 console.log("   POST /v1/chat/completions");
-console.log("   POST /v1/audio/speech      <-- [NEW] OpenAI TTS compatible endpoint"); // [修改] 更新启动日志
 console.log("   GET  /v1/models");
 console.log("   GET  /status");
-// [修改] 端口从 7860 改为 8000，与日志一致。您可以根据需要改回 7860。
 await serve(
   (request: Request) => server.handleRequest(request),
-  { port: 7860 }
 );

 import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
+// [修改] 引入具体的 Encoder 类，并使用 npm 导入方式以获得更好的 Deno 兼容性
+import { Encoder } from "npm:wav@1.0.2";
+// [新增] 引入 MP3 解码器
+import { MpegDecoder } from "npm:mpg123-decoder@0.6.5";
 // --- 常量定义 ---
 const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制（单位：MB）
     model: string; // e.g., 'tts-1', 'tts-1-hd'
     input: string; // The text to synthesize
     voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
+    response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav'; // 增加了 wav 选项
     speed?: number; // Not directly supported by Gemini TTS, will be ignored
 }
     this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
     return key;
   }
   // --- [新增] TTS 功能 ---
    * @param input - 要转换为语音的文本。
    * @param model - 请求的模型（在Google端，我们硬编码为TTS模型）。
    * @param voice - OpenAI 格式的语音名称。
+   * @returns 返回包含 MP3 音频数据的 ArrayBuffer。
    */
   async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
     const apiKey = this.getNextApiKey();
     const googleVoice = this.getGoogleVoice(voice);
+    // Google Gemini TTS 目前使用固定的模型名称
     const ttsModel = "gemini-2.5-flash-preview-tts";
     console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
     const data = await response.json();
     const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
     if (!audioContentBase64) {
         throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
+    }
+    const binaryString = atob(audioContentBase64);
+    const len = binaryString.length;
+    const bytes = new Uint8Array(len);
+    for (let i = 0; i < len; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+    }
+    return bytes.buffer;
   }
+  // --- 现有代码保持不变 (折叠以保持简洁) ---
   async fetchOfficialModels(): Promise<any[]> {
     const now = Date.now();
   }
   /**
+   * [新增] 将MP3音频数据转码为WAV格式。
+   * @param mp3Buffer 包含MP3数据的ArrayBuffer。
+   * @returns 返回一个包含WAV数据的Promise<Uint8Array>。
+   */
+  private async _transcodeMp3ToWav(mp3Buffer: ArrayBuffer): Promise<Uint8Array> {
+    console.log("Transcoding MP3 to WAV...");
+    const decoder = new MpegDecoder();
+    // 确保解码器资源在使用后被释放
+    try {
+        await decoder.ready;
+        const mp3Data = new Uint8Array(mp3Buffer);
+        const { data, channels, sampleRate } = decoder.decode(mp3Data);
+        console.log(`Decoded MP3: ${sampleRate}Hz, ${channels} channels, ${data.length} samples.`);
+        // 使用 'wav' 库将原始 PCM 数据编码为 WAV
+        const wavEncoder = new Encoder(channels, { sampleRate });
+        wavEncoder.write(data);
+        const wavDataStream = wavEncoder.end();
+        // 将WAV数据流收集到一个 Uint8Array 中
+        const chunks: Uint8Array[] = [];
+        for await (const chunk of wavDataStream) {
+            chunks.push(chunk);
+        }
+        // 合并所有块
+        const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
+        const wavResult = new Uint8Array(totalLength);
+        let offset = 0;
+        for (const chunk of chunks) {
+            wavResult.set(chunk, offset);
+            offset += chunk.length;
+        }
+        console.log(`Successfully transcoded to WAV (${(wavResult.length / 1024).toFixed(2)} KB).`);
+        return wavResult;
+    } finally {
+        decoder.free(); // 释放 wasm 解码器占用的内存
+    }
+  }
+  /**
+   * [修改] 处理 OpenAI 兼容的 TTS 请求, 并将结果转为 WAV 格式
    */
   private async handleAudioSpeech(request: Request): Promise<Response> {
     try {
             return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
         }
+        // 1. 从 Google 获取 MP3 格式的音频
+        const mp3AudioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
+        // 2. 将 MP3 转码为 WAV
+        const wavAudioBuffer = await this._transcodeMp3ToWav(mp3AudioBuffer);
+        // 3. 返回 WAV 格式的音频
+        return new Response(wavAudioBuffer, {
             headers: {
+                // [修改] Content-Type 已更改为 WAV
                 "Content-Type": "audio/wav",
                 "Access-Control-Allow-Origin": "*",
             }
     const url = new URL(request.url);
     let response: Response;
     if (url.pathname === "/health" || url.pathname === "/status") {
       response = await this.handleStatus();
     } else if (!this.authenticate(request)) {
       response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
     } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
       response = await this.handleAudioSpeech(request);
     } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
       response = new Response("Not Found", { status: 404 });
     }
     const finalHeaders = new Headers(response.headers);
     for (const [key, value] of Object.entries(corsHeaders)) {
       finalHeaders.set(key, value);
 console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
 console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
 server.googleAI.fetchOfficialModels().then(models => {
   console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
 }).catch(error => {
 console.log("\n🔗 Endpoints:");
 console.log("   POST /v1/chat/completions");
+// [修改] 更新日志以反映 WAV 输出
+console.log("   POST /v1/audio/speech      <-- [NEW] OpenAI TTS compatible endpoint (outputs WAV)");
 console.log("   GET  /v1/models");
 console.log("   GET  /status");
 await serve(
   (request: Request) => server.handleRequest(request),
+  // [注意] 您的原始代码使用了 7860 端口，这里保持一致
+  { port: 7860 }
 );