xt8 commited on
Commit
4fad59b
·
verified ·
1 Parent(s): 7ee1829

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +76 -43
main.ts CHANGED
@@ -1,5 +1,9 @@
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
- import wav from 'wav'
 
 
 
 
3
 
4
  // --- 常量定义 ---
5
  const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
@@ -30,7 +34,7 @@ interface OpenAITTSRequest {
30
  model: string; // e.g., 'tts-1', 'tts-1-hd'
31
  input: string; // The text to synthesize
32
  voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
33
- response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; // Google TTS returns MP3, so we'll ignore others for now
34
  speed?: number; // Not directly supported by Gemini TTS, will be ignored
35
  }
36
 
@@ -62,28 +66,6 @@ class GoogleAIService {
62
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
63
  return key;
64
  }
65
-
66
- async function saveWaveFile(
67
- filename,
68
- pcmData,
69
- channels = 1,
70
- rate = 24000,
71
- sampleWidth = 2,
72
- ) {
73
- return new Promise((resolve, reject) => {
74
- const writer = new wav.FileWriter(filename, {
75
- channels,
76
- sampleRate: rate,
77
- bitDepth: sampleWidth * 8,
78
- });
79
-
80
- writer.on('finish', resolve);
81
- writer.on('error', reject);
82
-
83
- writer.write(pcmData);
84
- writer.end();
85
- });
86
- }
87
 
88
  // --- [新增] TTS 功能 ---
89
 
@@ -110,12 +92,12 @@ class GoogleAIService {
110
  * @param input - 要转换为语音的文本。
111
  * @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
112
  * @param voice - OpenAI 格式的语音名称。
113
- * @returns 返回包含音频数据的 ArrayBuffer。
114
  */
115
  async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
116
  const apiKey = this.getNextApiKey();
117
  const googleVoice = this.getGoogleVoice(voice);
118
- // 根据 curl 命令,模型是固定的 TTS 模型
119
  const ttsModel = "gemini-2.5-flash-preview-tts";
120
 
121
  console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
@@ -157,17 +139,21 @@ class GoogleAIService {
157
 
158
  const data = await response.json();
159
 
160
- // 提取 base64 编码的音频数据
161
  const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
162
  if (!audioContentBase64) {
163
  throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
164
- }
165
- const audioBuffer = Buffer.from(audioContentBase64, 'base64');
166
- const fileName = 'out.wav';
167
- return await saveWaveFile(fileName, audioBuffer);
 
 
 
 
 
168
  }
169
 
170
- // --- 现有代码保持不变 ---
171
 
172
  async fetchOfficialModels(): Promise<any[]> {
173
  const now = Date.now();
@@ -564,7 +550,52 @@ class OpenAICompatibleServer {
564
  }
565
 
566
  /**
567
- * [新增] 处理 OpenAI 兼容的 TTS 请求
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  */
569
  private async handleAudioSpeech(request: Request): Promise<Response> {
570
  try {
@@ -574,11 +605,16 @@ class OpenAICompatibleServer {
574
  return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
575
  }
576
 
577
- const audioFile = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
 
 
 
 
578
 
579
- // Google Gemini TTS API 生成的是 MP3 格式的音频
580
- return new Response(audioBuffer, {
581
  headers: {
 
582
  "Content-Type": "audio/wav",
583
  "Access-Control-Allow-Origin": "*",
584
  }
@@ -756,12 +792,10 @@ class OpenAICompatibleServer {
756
  const url = new URL(request.url);
757
  let response: Response;
758
 
759
- // Handle routes
760
  if (url.pathname === "/health" || url.pathname === "/status") {
761
  response = await this.handleStatus();
762
  } else if (!this.authenticate(request)) {
763
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
764
- // [修改] 添加 TTS 路由
765
  } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
766
  response = await this.handleAudioSpeech(request);
767
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
@@ -772,7 +806,6 @@ class OpenAICompatibleServer {
772
  response = new Response("Not Found", { status: 404 });
773
  }
774
 
775
- // Add CORS headers to all responses
776
  const finalHeaders = new Headers(response.headers);
777
  for (const [key, value] of Object.entries(corsHeaders)) {
778
  finalHeaders.set(key, value);
@@ -789,7 +822,6 @@ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000.
789
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
790
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
791
 
792
- // Pre-fetch models at startup
793
  server.googleAI.fetchOfficialModels().then(models => {
794
  console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
795
  }).catch(error => {
@@ -798,12 +830,13 @@ server.googleAI.fetchOfficialModels().then(models => {
798
 
799
  console.log("\n🔗 Endpoints:");
800
  console.log(" POST /v1/chat/completions");
801
- console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint"); // [修改] 更新启动日志
 
802
  console.log(" GET /v1/models");
803
  console.log(" GET /status");
804
 
805
- // [修改] 端口从 7860 改为 8000,与日志一致。您可以根据需要改回 7860。
806
  await serve(
807
  (request: Request) => server.handleRequest(request),
808
- { port: 7860 }
 
809
  );
 
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
+ // [修改] 引入具体的 Encoder 类,并使用 npm 导入方式以获得更好的 Deno 兼容性
3
+ import { Encoder } from "npm:wav@1.0.2";
4
+ // [新增] 引入 MP3 解码器
5
+ import { MpegDecoder } from "npm:mpg123-decoder@0.6.5";
6
+
7
 
8
  // --- 常量定义 ---
9
  const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
 
34
  model: string; // e.g., 'tts-1', 'tts-1-hd'
35
  input: string; // The text to synthesize
36
  voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
37
+ response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav'; // 增加了 wav 选项
38
  speed?: number; // Not directly supported by Gemini TTS, will be ignored
39
  }
40
 
 
66
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
67
  return key;
68
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  // --- [新增] TTS 功能 ---
71
 
 
92
  * @param input - 要转换为语音的文本。
93
  * @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
94
  * @param voice - OpenAI 格式的语音名称。
95
+ * @returns 返回包含 MP3 音频数据的 ArrayBuffer。
96
  */
97
  async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
98
  const apiKey = this.getNextApiKey();
99
  const googleVoice = this.getGoogleVoice(voice);
100
+ // Google Gemini TTS 目前使用固定的模型名称
101
  const ttsModel = "gemini-2.5-flash-preview-tts";
102
 
103
  console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
 
139
 
140
  const data = await response.json();
141
 
 
142
  const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
143
  if (!audioContentBase64) {
144
  throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
145
+ }
146
+
147
+ const binaryString = atob(audioContentBase64);
148
+ const len = binaryString.length;
149
+ const bytes = new Uint8Array(len);
150
+ for (let i = 0; i < len; i++) {
151
+ bytes[i] = binaryString.charCodeAt(i);
152
+ }
153
+ return bytes.buffer;
154
  }
155
 
156
+ // --- 现有代码保持不变 (折叠以保持简洁) ---
157
 
158
  async fetchOfficialModels(): Promise<any[]> {
159
  const now = Date.now();
 
550
  }
551
 
552
  /**
553
+ * [新增] 将MP3音频数据转码为WAV格式。
554
+ * @param mp3Buffer 包含MP3数据的ArrayBuffer。
555
+ * @returns 返回一个包含WAV数据的Promise<Uint8Array>。
556
+ */
557
+ private async _transcodeMp3ToWav(mp3Buffer: ArrayBuffer): Promise<Uint8Array> {
558
+ console.log("Transcoding MP3 to WAV...");
559
+ const decoder = new MpegDecoder();
560
+
561
+ // 确保解码器资源在使用后被释放
562
+ try {
563
+ await decoder.ready;
564
+ const mp3Data = new Uint8Array(mp3Buffer);
565
+ const { data, channels, sampleRate } = decoder.decode(mp3Data);
566
+
567
+ console.log(`Decoded MP3: ${sampleRate}Hz, ${channels} channels, ${data.length} samples.`);
568
+
569
+ // 使用 'wav' 库将原始 PCM 数据编码为 WAV
570
+ const wavEncoder = new Encoder(channels, { sampleRate });
571
+ wavEncoder.write(data);
572
+ const wavDataStream = wavEncoder.end();
573
+
574
+ // 将WAV数据流收集到一个 Uint8Array 中
575
+ const chunks: Uint8Array[] = [];
576
+ for await (const chunk of wavDataStream) {
577
+ chunks.push(chunk);
578
+ }
579
+
580
+ // 合并所有块
581
+ const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
582
+ const wavResult = new Uint8Array(totalLength);
583
+ let offset = 0;
584
+ for (const chunk of chunks) {
585
+ wavResult.set(chunk, offset);
586
+ offset += chunk.length;
587
+ }
588
+
589
+ console.log(`Successfully transcoded to WAV (${(wavResult.length / 1024).toFixed(2)} KB).`);
590
+ return wavResult;
591
+ } finally {
592
+ decoder.free(); // 释放 wasm 解码器占用的内存
593
+ }
594
+ }
595
+
596
+
597
+ /**
598
+ * [修改] 处理 OpenAI 兼容的 TTS 请求, 并将结果转为 WAV 格式
599
  */
600
  private async handleAudioSpeech(request: Request): Promise<Response> {
601
  try {
 
605
  return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
606
  }
607
 
608
+ // 1. Google 获取 MP3 格式的音频
609
+ const mp3AudioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
610
+
611
+ // 2. 将 MP3 转码为 WAV
612
+ const wavAudioBuffer = await this._transcodeMp3ToWav(mp3AudioBuffer);
613
 
614
+ // 3. 返回 WAV 格式的音频
615
+ return new Response(wavAudioBuffer, {
616
  headers: {
617
+ // [修改] Content-Type 已更改为 WAV
618
  "Content-Type": "audio/wav",
619
  "Access-Control-Allow-Origin": "*",
620
  }
 
792
  const url = new URL(request.url);
793
  let response: Response;
794
 
 
795
  if (url.pathname === "/health" || url.pathname === "/status") {
796
  response = await this.handleStatus();
797
  } else if (!this.authenticate(request)) {
798
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
 
799
  } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
800
  response = await this.handleAudioSpeech(request);
801
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
 
806
  response = new Response("Not Found", { status: 404 });
807
  }
808
 
 
809
  const finalHeaders = new Headers(response.headers);
810
  for (const [key, value] of Object.entries(corsHeaders)) {
811
  finalHeaders.set(key, value);
 
822
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
823
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
824
 
 
825
  server.googleAI.fetchOfficialModels().then(models => {
826
  console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
827
  }).catch(error => {
 
830
 
831
  console.log("\n🔗 Endpoints:");
832
  console.log(" POST /v1/chat/completions");
833
+ // [修改] 更新日志以反映 WAV 输出
834
+ console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint (outputs WAV)");
835
  console.log(" GET /v1/models");
836
  console.log(" GET /status");
837
 
 
838
  await serve(
839
  (request: Request) => server.handleRequest(request),
840
+ // [注意] 您的原始代码使用了 7860 端口,这里保持一致
841
+ { port: 7860 }
842
  );