Update main.ts
Browse files
main.ts
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
// --- 常量定义 ---
|
| 5 |
const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
|
|
@@ -30,7 +34,7 @@ interface OpenAITTSRequest {
|
|
| 30 |
model: string; // e.g., 'tts-1', 'tts-1-hd'
|
| 31 |
input: string; // The text to synthesize
|
| 32 |
voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
|
| 33 |
-
response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; //
|
| 34 |
speed?: number; // Not directly supported by Gemini TTS, will be ignored
|
| 35 |
}
|
| 36 |
|
|
@@ -62,28 +66,6 @@ class GoogleAIService {
|
|
| 62 |
this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
|
| 63 |
return key;
|
| 64 |
}
|
| 65 |
-
|
| 66 |
-
async function saveWaveFile(
|
| 67 |
-
filename,
|
| 68 |
-
pcmData,
|
| 69 |
-
channels = 1,
|
| 70 |
-
rate = 24000,
|
| 71 |
-
sampleWidth = 2,
|
| 72 |
-
) {
|
| 73 |
-
return new Promise((resolve, reject) => {
|
| 74 |
-
const writer = new wav.FileWriter(filename, {
|
| 75 |
-
channels,
|
| 76 |
-
sampleRate: rate,
|
| 77 |
-
bitDepth: sampleWidth * 8,
|
| 78 |
-
});
|
| 79 |
-
|
| 80 |
-
writer.on('finish', resolve);
|
| 81 |
-
writer.on('error', reject);
|
| 82 |
-
|
| 83 |
-
writer.write(pcmData);
|
| 84 |
-
writer.end();
|
| 85 |
-
});
|
| 86 |
-
}
|
| 87 |
|
| 88 |
// --- [新增] TTS 功能 ---
|
| 89 |
|
|
@@ -110,12 +92,12 @@ class GoogleAIService {
|
|
| 110 |
* @param input - 要转换为语音的文本。
|
| 111 |
* @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
|
| 112 |
* @param voice - OpenAI 格式的语音名称。
|
| 113 |
-
* @returns 返回包含音频数据的 ArrayBuffer。
|
| 114 |
*/
|
| 115 |
async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
|
| 116 |
const apiKey = this.getNextApiKey();
|
| 117 |
const googleVoice = this.getGoogleVoice(voice);
|
| 118 |
-
//
|
| 119 |
const ttsModel = "gemini-2.5-flash-preview-tts";
|
| 120 |
|
| 121 |
console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
|
|
@@ -157,17 +139,21 @@ class GoogleAIService {
|
|
| 157 |
|
| 158 |
const data = await response.json();
|
| 159 |
|
| 160 |
-
// 提取 base64 编码的音频数据
|
| 161 |
const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
| 162 |
if (!audioContentBase64) {
|
| 163 |
throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
|
| 164 |
-
}
|
| 165 |
-
|
| 166 |
-
const
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
}
|
| 169 |
|
| 170 |
-
// --- 现有代码保持不变 ---
|
| 171 |
|
| 172 |
async fetchOfficialModels(): Promise<any[]> {
|
| 173 |
const now = Date.now();
|
|
@@ -564,7 +550,52 @@ class OpenAICompatibleServer {
|
|
| 564 |
}
|
| 565 |
|
| 566 |
/**
|
| 567 |
-
* [新增]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
*/
|
| 569 |
private async handleAudioSpeech(request: Request): Promise<Response> {
|
| 570 |
try {
|
|
@@ -574,11 +605,16 @@ class OpenAICompatibleServer {
|
|
| 574 |
return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
|
| 575 |
}
|
| 576 |
|
| 577 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
|
| 579 |
-
//
|
| 580 |
-
return new Response(
|
| 581 |
headers: {
|
|
|
|
| 582 |
"Content-Type": "audio/wav",
|
| 583 |
"Access-Control-Allow-Origin": "*",
|
| 584 |
}
|
|
@@ -756,12 +792,10 @@ class OpenAICompatibleServer {
|
|
| 756 |
const url = new URL(request.url);
|
| 757 |
let response: Response;
|
| 758 |
|
| 759 |
-
// Handle routes
|
| 760 |
if (url.pathname === "/health" || url.pathname === "/status") {
|
| 761 |
response = await this.handleStatus();
|
| 762 |
} else if (!this.authenticate(request)) {
|
| 763 |
response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
|
| 764 |
-
// [修改] 添加 TTS 路由
|
| 765 |
} else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
|
| 766 |
response = await this.handleAudioSpeech(request);
|
| 767 |
} else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
|
|
@@ -772,7 +806,6 @@ class OpenAICompatibleServer {
|
|
| 772 |
response = new Response("Not Found", { status: 404 });
|
| 773 |
}
|
| 774 |
|
| 775 |
-
// Add CORS headers to all responses
|
| 776 |
const finalHeaders = new Headers(response.headers);
|
| 777 |
for (const [key, value] of Object.entries(corsHeaders)) {
|
| 778 |
finalHeaders.set(key, value);
|
|
@@ -789,7 +822,6 @@ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000.
|
|
| 789 |
console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
|
| 790 |
console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
|
| 791 |
|
| 792 |
-
// Pre-fetch models at startup
|
| 793 |
server.googleAI.fetchOfficialModels().then(models => {
|
| 794 |
console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
|
| 795 |
}).catch(error => {
|
|
@@ -798,12 +830,13 @@ server.googleAI.fetchOfficialModels().then(models => {
|
|
| 798 |
|
| 799 |
console.log("\n🔗 Endpoints:");
|
| 800 |
console.log(" POST /v1/chat/completions");
|
| 801 |
-
|
|
|
|
| 802 |
console.log(" GET /v1/models");
|
| 803 |
console.log(" GET /status");
|
| 804 |
|
| 805 |
-
// [修改] 端口从 7860 改为 8000,与日志一致。您可以根据需要改回 7860。
|
| 806 |
await serve(
|
| 807 |
(request: Request) => server.handleRequest(request),
|
| 808 |
-
|
|
|
|
| 809 |
);
|
|
|
|
| 1 |
import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
|
| 2 |
+
// [修改] 引入具体的 Encoder 类,并使用 npm 导入方式以获得更好的 Deno 兼容性
|
| 3 |
+
import { Encoder } from "npm:wav@1.0.2";
|
| 4 |
+
// [新增] 引入 MP3 解码器
|
| 5 |
+
import { MpegDecoder } from "npm:mpg123-decoder@0.6.5";
|
| 6 |
+
|
| 7 |
|
| 8 |
// --- 常量定义 ---
|
| 9 |
const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
|
|
|
|
| 34 |
model: string; // e.g., 'tts-1', 'tts-1-hd'
|
| 35 |
input: string; // The text to synthesize
|
| 36 |
voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
|
| 37 |
+
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav'; // 增加了 wav 选项
|
| 38 |
speed?: number; // Not directly supported by Gemini TTS, will be ignored
|
| 39 |
}
|
| 40 |
|
|
|
|
| 66 |
this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
|
| 67 |
return key;
|
| 68 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
// --- [新增] TTS 功能 ---
|
| 71 |
|
|
|
|
| 92 |
* @param input - 要转换为语音的文本。
|
| 93 |
* @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
|
| 94 |
* @param voice - OpenAI 格式的语音名称。
|
| 95 |
+
* @returns 返回包含 MP3 音频数据的 ArrayBuffer。
|
| 96 |
*/
|
| 97 |
async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
|
| 98 |
const apiKey = this.getNextApiKey();
|
| 99 |
const googleVoice = this.getGoogleVoice(voice);
|
| 100 |
+
// Google Gemini TTS 目前使用固定的模型名称
|
| 101 |
const ttsModel = "gemini-2.5-flash-preview-tts";
|
| 102 |
|
| 103 |
console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
|
|
|
|
| 139 |
|
| 140 |
const data = await response.json();
|
| 141 |
|
|
|
|
| 142 |
const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
| 143 |
if (!audioContentBase64) {
|
| 144 |
throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
const binaryString = atob(audioContentBase64);
|
| 148 |
+
const len = binaryString.length;
|
| 149 |
+
const bytes = new Uint8Array(len);
|
| 150 |
+
for (let i = 0; i < len; i++) {
|
| 151 |
+
bytes[i] = binaryString.charCodeAt(i);
|
| 152 |
+
}
|
| 153 |
+
return bytes.buffer;
|
| 154 |
}
|
| 155 |
|
| 156 |
+
// --- 现有代码保持不变 (折叠以保持简洁) ---
|
| 157 |
|
| 158 |
async fetchOfficialModels(): Promise<any[]> {
|
| 159 |
const now = Date.now();
|
|
|
|
| 550 |
}
|
| 551 |
|
| 552 |
/**
|
| 553 |
+
* [新增] 将MP3音频数据转码为WAV格式。
|
| 554 |
+
* @param mp3Buffer 包含MP3数据的ArrayBuffer。
|
| 555 |
+
* @returns 返回一个包含WAV数据的Promise<Uint8Array>。
|
| 556 |
+
*/
|
| 557 |
+
private async _transcodeMp3ToWav(mp3Buffer: ArrayBuffer): Promise<Uint8Array> {
|
| 558 |
+
console.log("Transcoding MP3 to WAV...");
|
| 559 |
+
const decoder = new MpegDecoder();
|
| 560 |
+
|
| 561 |
+
// 确保解码器资源在使用后被释放
|
| 562 |
+
try {
|
| 563 |
+
await decoder.ready;
|
| 564 |
+
const mp3Data = new Uint8Array(mp3Buffer);
|
| 565 |
+
const { data, channels, sampleRate } = decoder.decode(mp3Data);
|
| 566 |
+
|
| 567 |
+
console.log(`Decoded MP3: ${sampleRate}Hz, ${channels} channels, ${data.length} samples.`);
|
| 568 |
+
|
| 569 |
+
// 使用 'wav' 库将原始 PCM 数据编码为 WAV
|
| 570 |
+
const wavEncoder = new Encoder(channels, { sampleRate });
|
| 571 |
+
wavEncoder.write(data);
|
| 572 |
+
const wavDataStream = wavEncoder.end();
|
| 573 |
+
|
| 574 |
+
// 将WAV数据流收集到一个 Uint8Array 中
|
| 575 |
+
const chunks: Uint8Array[] = [];
|
| 576 |
+
for await (const chunk of wavDataStream) {
|
| 577 |
+
chunks.push(chunk);
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
// 合并所有块
|
| 581 |
+
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
| 582 |
+
const wavResult = new Uint8Array(totalLength);
|
| 583 |
+
let offset = 0;
|
| 584 |
+
for (const chunk of chunks) {
|
| 585 |
+
wavResult.set(chunk, offset);
|
| 586 |
+
offset += chunk.length;
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
console.log(`Successfully transcoded to WAV (${(wavResult.length / 1024).toFixed(2)} KB).`);
|
| 590 |
+
return wavResult;
|
| 591 |
+
} finally {
|
| 592 |
+
decoder.free(); // 释放 wasm 解码器占用的内存
|
| 593 |
+
}
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
/**
|
| 598 |
+
* [修改] 处理 OpenAI 兼容的 TTS 请求, 并将结果转为 WAV 格式
|
| 599 |
*/
|
| 600 |
private async handleAudioSpeech(request: Request): Promise<Response> {
|
| 601 |
try {
|
|
|
|
| 605 |
return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
|
| 606 |
}
|
| 607 |
|
| 608 |
+
// 1. 从 Google 获取 MP3 格式的音频
|
| 609 |
+
const mp3AudioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
|
| 610 |
+
|
| 611 |
+
// 2. 将 MP3 转码为 WAV
|
| 612 |
+
const wavAudioBuffer = await this._transcodeMp3ToWav(mp3AudioBuffer);
|
| 613 |
|
| 614 |
+
// 3. 返回 WAV 格式的音频
|
| 615 |
+
return new Response(wavAudioBuffer, {
|
| 616 |
headers: {
|
| 617 |
+
// [修改] Content-Type 已更改为 WAV
|
| 618 |
"Content-Type": "audio/wav",
|
| 619 |
"Access-Control-Allow-Origin": "*",
|
| 620 |
}
|
|
|
|
| 792 |
const url = new URL(request.url);
|
| 793 |
let response: Response;
|
| 794 |
|
|
|
|
| 795 |
if (url.pathname === "/health" || url.pathname === "/status") {
|
| 796 |
response = await this.handleStatus();
|
| 797 |
} else if (!this.authenticate(request)) {
|
| 798 |
response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
|
|
|
|
| 799 |
} else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
|
| 800 |
response = await this.handleAudioSpeech(request);
|
| 801 |
} else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
|
|
|
|
| 806 |
response = new Response("Not Found", { status: 404 });
|
| 807 |
}
|
| 808 |
|
|
|
|
| 809 |
const finalHeaders = new Headers(response.headers);
|
| 810 |
for (const [key, value] of Object.entries(corsHeaders)) {
|
| 811 |
finalHeaders.set(key, value);
|
|
|
|
| 822 |
console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
|
| 823 |
console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
|
| 824 |
|
|
|
|
| 825 |
server.googleAI.fetchOfficialModels().then(models => {
|
| 826 |
console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
|
| 827 |
}).catch(error => {
|
|
|
|
| 830 |
|
| 831 |
console.log("\n🔗 Endpoints:");
|
| 832 |
console.log(" POST /v1/chat/completions");
|
| 833 |
+
// [修改] 更新日志以反映 WAV 输出
|
| 834 |
+
console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint (outputs WAV)");
|
| 835 |
console.log(" GET /v1/models");
|
| 836 |
console.log(" GET /status");
|
| 837 |
|
|
|
|
| 838 |
await serve(
|
| 839 |
(request: Request) => server.handleRequest(request),
|
| 840 |
+
// [注意] 您的原始代码使用了 7860 端口,这里保持一致
|
| 841 |
+
{ port: 7860 }
|
| 842 |
);
|