xt8 commited on
Commit
b5254fd
·
verified ·
1 Parent(s): c895a8e

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +222 -232
main.ts CHANGED
@@ -1,16 +1,21 @@
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
- // [修改] 引入具体的 Encoder 类,并使用 npm 导入方式以获得更好的 Deno 兼容性
3
- import { Encoder } from "npm:wav@1.0.2";
4
- // [新增] 引入 MP3 解码器
5
- import { MpegDecoder } from "https://esm.sh/mpg123-decoder@0.6.5";
6
-
7
 
8
  // --- 常量定义 ---
9
  const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
10
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
11
  const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
12
 
13
- // --- 接口定义 ---
 
 
 
 
 
 
 
 
 
 
14
  interface OpenAIMessage {
15
  role: "system" | "user" | "assistant";
16
  content: string | Array<{
@@ -29,15 +34,18 @@ interface OpenAIRequest {
29
  stream?: boolean;
30
  }
31
 
32
- // [新增] OpenAI TTS 请求接口定义
33
- interface OpenAITTSRequest {
34
- model: string; // e.g., 'tts-1', 'tts-1-hd'
35
- input: string; // The text to synthesize
36
- voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
37
- response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav'; // 增加了 wav 选项
38
- speed?: number; // Not directly supported by Gemini TTS, will be ignored
39
  }
40
 
 
 
 
41
 
42
  class GoogleAIService {
43
  public apiKeys: string[];
@@ -66,95 +74,7 @@ class GoogleAIService {
66
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
67
  return key;
68
  }
69
-
70
- // --- [新增] TTS 功能 ---
71
-
72
- /**
73
- * 映射 OpenAI 的语音名称到 Google Gemini TTS 的预置语音名称。
74
- * 参考: https://ai.google.dev/gemini-api/docs/text-to-speech#supported_voices
75
- */
76
- private getGoogleVoice(openAIVoice: string): string {
77
- const voiceMap: { [key: string]: string } = {
78
- 'Puck': 'Puck', // A good default, versatile voice'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede'
79
- 'Charon': 'Charon', // Another male voice option
80
- 'Kore': 'Kore', // Female, narrative style
81
- 'Fenrir': 'Fenrir', // Deep, male voice
82
- 'Leda': 'Leda', // Energetic female voice
83
- 'Aoede': 'Aoede', // Gentle female voice
84
- // Fallback to a default if the voice is not in the map
85
- 'default': 'Puck'
86
- };
87
- return voiceMap[openAIVoice] || voiceMap['default'];
88
- }
89
-
90
- /**
91
- * [新增] 调用 Google Gemini TTS API 生成语音。
92
- * @param input - 要转换为语音的文本。
93
- * @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
94
- * @param voice - OpenAI 格式的语音名称。
95
- * @returns 返回包含 MP3 音频数据的 ArrayBuffer。
96
- */
97
- async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
98
- const apiKey = this.getNextApiKey();
99
- const googleVoice = this.getGoogleVoice(voice);
100
- // Google Gemini TTS 目前使用固定的模型名称
101
- const ttsModel = "gemini-2.5-flash-preview-tts";
102
 
103
- console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
104
-
105
- const requestBody = {
106
- "contents": [{
107
- "parts":[{
108
- "text": input
109
- }]
110
- }],
111
- "generationConfig": {
112
- "responseModalities": ["AUDIO"],
113
- "speechConfig": {
114
- "voiceConfig": {
115
- "prebuiltVoiceConfig": {
116
- "voiceName": googleVoice
117
- }
118
- }
119
- }
120
- },
121
- "model": ttsModel,
122
- };
123
-
124
- const response = await fetch(
125
- `https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`,
126
- {
127
- method: "POST",
128
- headers: { "Content-Type": "application/json" },
129
- body: JSON.stringify(requestBody),
130
- }
131
- );
132
-
133
- if (!response.ok) {
134
- const errorBody = await response.json().catch(() => response.text());
135
- const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
136
- console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
137
- throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
138
- }
139
-
140
- const data = await response.json();
141
-
142
- const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
143
- if (!audioContentBase64) {
144
- throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
145
- }
146
-
147
- const binaryString = atob(audioContentBase64);
148
- const len = binaryString.length;
149
- const bytes = new Uint8Array(len);
150
- for (let i = 0; i < len; i++) {
151
- bytes[i] = binaryString.charCodeAt(i);
152
- }
153
- return bytes.buffer;
154
- }
155
-
156
- // --- 现有代码保持不变 (折叠以保持简洁) ---
157
-
158
  async fetchOfficialModels(): Promise<any[]> {
159
  const now = Date.now();
160
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
@@ -182,6 +102,7 @@ class GoogleAIService {
182
  console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
183
  return this.cachedModels;
184
  }
 
185
  return this.getFallbackModels();
186
  } catch (error) {
187
  console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
@@ -193,9 +114,7 @@ class GoogleAIService {
193
  return [
194
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
195
  { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
196
- { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] },
197
- // [新增] 在模型列表中添加TTS模型,使其在 /v1/models 接口可见
198
- { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model for generating high-quality audio.", supportedGenerationMethods: ["generateContent"], id: "gemini-2.5-flash-preview-tts" }
199
  ];
200
  }
201
 
@@ -214,13 +133,17 @@ class GoogleAIService {
214
  return 'unknown';
215
  }
216
 
 
 
 
217
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
218
  const docType = this.getDocumentType(documentUrl);
219
-
220
  if (!documentUrl.startsWith("data:")) {
221
  if (documentUrl.startsWith("http")) {
222
  throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
223
  }
 
 
224
  throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
225
  }
226
 
@@ -228,15 +151,16 @@ class GoogleAIService {
228
  if (parts.length !== 2) {
229
  throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
230
  }
231
- const [mimeInfo, base64Data] = parts;
232
 
 
 
 
233
  const approxSizeInBytes = base64Data.length * 0.75;
234
  if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
235
  throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
236
  }
237
 
238
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
239
-
240
  if (docType === 'txt' || docType === 'md') {
241
  try {
242
  const textContent = atob(base64Data);
@@ -246,11 +170,12 @@ class GoogleAIService {
246
  throw new Error(`Invalid base64 encoding for ${docType} document.`);
247
  }
248
  }
249
-
 
250
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
251
  return { mimeType: finalMimeType, data: base64Data, docType };
252
  }
253
-
254
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
255
  if (imageUrl.startsWith("data:image/")) {
256
  const [mimeInfo, base64Data] = imageUrl.split(",");
@@ -267,7 +192,6 @@ class GoogleAIService {
267
  const apiKey = this.getNextApiKey();
268
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
269
  const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
270
-
271
  console.log(`Processing document with model: ${documentModel}`);
272
 
273
  let contents;
@@ -279,27 +203,28 @@ class GoogleAIService {
279
 
280
  const messageParts = msg.content.map(part => {
281
  if (part.type === "text") return { text: part.text };
282
-
283
  if (part.type === "image_url" && part.image_url) {
284
  const { mimeType, data } = this.extractImageData(part.image_url.url);
285
  return { inlineData: { mimeType, data } };
286
  }
287
-
288
  if (part.type === "document" && part.document) {
289
  const docData = this.extractDocumentData(part.document.url);
290
  console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
291
 
292
  if (docData.docType === 'txt' || docData.docType === 'md') {
293
- const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
294
  return { text: `${prefix}${docData.text}` };
295
  }
 
296
  if (docData.docType === 'pdf') {
297
  return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
298
  }
 
299
  return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
300
  }
301
  return { text: "" };
302
  });
 
303
  return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
304
  });
305
  } catch (error) {
@@ -343,13 +268,67 @@ class GoogleAIService {
343
  if (candidate.finishReason === "SAFETY") {
344
  throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
345
  }
 
346
  if (candidate.finishReason === "RECITATION") {
347
  throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
348
  }
349
 
350
  return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
351
  }
352
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
354
  const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
355
  if (hasDocument) {
@@ -358,7 +337,6 @@ class GoogleAIService {
358
 
359
  const apiKey = this.getNextApiKey();
360
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
361
-
362
  const contents = messages.map(msg => {
363
  if (typeof msg.content === "string") {
364
  return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
@@ -385,6 +363,7 @@ class GoogleAIService {
385
  contents,
386
  generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
387
  };
 
388
  if (enableSearch) {
389
  requestBody.tools = [{ googleSearchRetrieval: {} }];
390
  }
@@ -398,14 +377,17 @@ class GoogleAIService {
398
  const errorText = await response.text();
399
  throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
400
  }
 
401
  const data = await response.json();
402
  if (!data.candidates || data.candidates.length === 0) {
403
  throw new Error("No response generated from Google AI");
404
  }
 
405
  const candidate = data.candidates[0];
406
  if (candidate.finishReason === "SAFETY") {
407
  throw new Error("Response blocked due to safety filters");
408
  }
 
409
  return candidate.content?.parts[0]?.text || "No response generated";
410
  }
411
 
@@ -435,6 +417,7 @@ class GoogleAIService {
435
  const errorText = await response.text();
436
  throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
437
  }
 
438
  const data = await response.json();
439
  if (!data.candidates || data.candidates.length === 0) {
440
  throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
@@ -461,9 +444,10 @@ class GoogleAIService {
461
  result.imageBase64 = imageBase64;
462
  result.imageUrl = `data:image/png;base64,${imageBase64}`;
463
  }
 
464
  return result;
465
  }
466
-
467
  async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
468
  const apiKey = this.getNextApiKey();
469
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -494,6 +478,7 @@ class GoogleAIService {
494
  if (candidate.finishReason === "SAFETY") {
495
  throw new Error("Response blocked due to safety filters");
496
  }
 
497
  return candidate.content?.parts[0]?.text || "No response generated";
498
  }
499
 
@@ -549,91 +534,96 @@ class OpenAICompatibleServer {
549
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
550
  }
551
 
552
- /**
553
- * [新增] 将MP3音频数据转码为WAV格式。
554
- * @param mp3Buffer 包含MP3数据的ArrayBuffer。
555
- * @returns 返回一个包含WAV数据的Promise<Uint8Array>。
556
- */
557
- private async _transcodeMp3ToWav(mp3Buffer: ArrayBuffer): Promise<Uint8Array> {
558
- console.log("Transcoding MP3 to WAV...");
559
- const decoder = new MpegDecoder();
560
-
561
- // 确保解码器资源在使用后被释放
562
  try {
563
- await decoder.ready;
564
- const mp3Data = new Uint8Array(mp3Buffer);
565
- const { data, channels, sampleRate } = decoder.decode(mp3Data);
566
-
567
- console.log(`Decoded MP3: ${sampleRate}Hz, ${channels} channels, ${data.length} samples.`);
568
-
569
- // 使用 'wav' 库将原始 PCM 数据编码为 WAV
570
- const wavEncoder = new Encoder(channels, { sampleRate });
571
- wavEncoder.write(data);
572
- const wavDataStream = wavEncoder.end();
573
-
574
- // 将WAV数据流收集到一个 Uint8Array 中
575
- const chunks: Uint8Array[] = [];
576
- for await (const chunk of wavDataStream) {
577
- chunks.push(chunk);
578
- }
579
 
580
- // 合并所有块
581
- const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
582
- const wavResult = new Uint8Array(totalLength);
583
- let offset = 0;
584
- for (const chunk of chunks) {
585
- wavResult.set(chunk, offset);
586
- offset += chunk.length;
587
- }
588
-
589
- console.log(`Successfully transcoded to WAV (${(wavResult.length / 1024).toFixed(2)} KB).`);
590
- return wavResult;
591
- } finally {
592
- decoder.free(); // 释放 wasm 解码器占用的内存
593
- }
594
- }
595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
- /**
598
- * [修改] 处理 OpenAI 兼容的 TTS 请求, 并将结果转为 WAV 格式
599
- */
600
- private async handleAudioSpeech(request: Request): Promise<Response> {
601
- try {
602
- const body: OpenAITTSRequest = await request.json();
603
-
604
- if (!body.input || !body.voice || !body.model) {
605
- return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
 
 
 
606
  }
 
607
 
608
- // 1. Google 获取 MP3 格式的音频
609
- const mp3AudioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
 
 
 
 
 
 
 
 
 
 
 
 
610
 
611
- // 2. MP3 转码为 WAV
612
- const wavAudioBuffer = await this._transcodeMp3ToWav(mp3AudioBuffer);
 
 
 
 
 
 
 
 
 
 
613
 
614
- // 3. 返回 WAV 格式的音频
615
- return new Response(wavAudioBuffer, {
616
- headers: {
617
- // [修改] Content-Type 已更改为 WAV
618
- "Content-Type": "audio/wav",
619
- "Access-Control-Allow-Origin": "*",
620
- }
621
- });
622
  } catch (error) {
623
- console.error("Error in audio speech generation:", error.message);
624
- return new Response(
625
- JSON.stringify({
626
- error: {
627
- message: error.message,
628
- type: "api_error",
629
- code: null
630
- }
631
- }),
632
- { status: 500, headers: { "Content-Type": "application/json" } }
633
- );
634
  }
635
  }
636
-
637
  private async handleChatCompletions(request: Request): Promise<Response> {
638
  try {
639
  const body: OpenAIRequest = await request.json();
@@ -652,7 +642,7 @@ class OpenAICompatibleServer {
652
  );
653
 
654
  const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
655
-
656
  let inputImages: any[] = [];
657
  if (hasImages) {
658
  body.messages.forEach(msg => {
@@ -663,9 +653,9 @@ class OpenAICompatibleServer {
663
  }
664
  });
665
  }
666
-
667
- let responseText: string;
668
 
 
 
669
  if (hasDocument) {
670
  responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
671
  } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
@@ -708,7 +698,7 @@ class OpenAICompatibleServer {
708
  );
709
  }
710
  }
711
-
712
  private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
713
  const encoder = new TextEncoder();
714
  const streamId = `chatcmpl-${Date.now()}`;
@@ -718,62 +708,56 @@ class OpenAICompatibleServer {
718
  return new ReadableStream({
719
  start(controller) {
720
  const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
721
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
722
  },
723
  pull(controller) {
724
  if (contentQueue.length === 0) {
725
  const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
726
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
727
- controller.enqueue(encoder.encode('data: [DONE]\n\n'));
728
  controller.close();
729
  return;
730
  }
731
  const char = contentQueue.shift();
732
  const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
733
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
734
  }
735
  });
736
  }
737
-
738
  private async handleModels(): Promise<Response> {
739
  try {
740
  const googleModels = await this.googleAI.fetchOfficialModels();
741
- const fallbackModels = this.googleAI['getFallbackModels'](); // Access private method for a complete list
742
-
743
- const allModels = [...googleModels, ...fallbackModels];
744
- const uniqueModelMap = new Map();
745
- allModels.forEach(model => {
746
- const modelId = model.id || model.name.replace('models/', '');
747
- if (!uniqueModelMap.has(modelId)) {
748
- uniqueModelMap.set(modelId, {
749
- id: modelId,
750
- object: "model",
751
- created: Math.floor(Date.now() / 1000),
752
- owned_by: "google",
753
- description: model.description || model.displayName,
754
- maxTokens: model.inputTokenLimit || model.maxTokens
755
- });
756
- }
757
- });
758
-
759
  const models = {
760
  object: "list",
761
- data: Array.from(uniqueModelMap.values()),
 
 
 
 
 
 
762
  };
763
-
764
  return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
765
  } catch (error) {
766
  console.error("Error fetching models:", error);
767
  return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
768
  }
769
  }
770
-
771
  private async handleStatus(): Promise<Response> {
772
  const status = {
773
- status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
774
  api_keys_loaded: this.googleAI.apiKeys.length,
775
  models_in_cache: this.googleAI.cachedModels.length,
776
- models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
 
 
 
 
 
 
 
777
  };
778
  return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
779
  }
@@ -792,20 +776,24 @@ class OpenAICompatibleServer {
792
  const url = new URL(request.url);
793
  let response: Response;
794
 
 
795
  if (url.pathname === "/health" || url.pathname === "/status") {
796
  response = await this.handleStatus();
797
  } else if (!this.authenticate(request)) {
798
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
799
- } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
800
- response = await this.handleAudioSpeech(request);
801
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
802
  response = await this.handleChatCompletions(request);
803
  } else if (url.pathname === "/v1/models" && request.method === "GET") {
804
  response = await this.handleModels();
 
 
 
 
805
  } else {
806
  response = new Response("Not Found", { status: 404 });
807
  }
808
 
 
809
  const finalHeaders = new Headers(response.headers);
810
  for (const [key, value] of Object.entries(corsHeaders)) {
811
  finalHeaders.set(key, value);
@@ -817,26 +805,28 @@ class OpenAICompatibleServer {
817
 
818
  // --- 服务器启动 ---
819
  const server = new OpenAICompatibleServer();
820
-
821
- console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000...");
822
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
823
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
824
 
 
825
  server.googleAI.fetchOfficialModels().then(models => {
826
  console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
827
  }).catch(error => {
828
  console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
829
  });
830
 
831
- console.log("\n🔗 Endpoints:");
832
  console.log(" POST /v1/chat/completions");
833
- // [修改] 更新日志以反映 WAV 输出
834
- console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint (outputs WAV)");
835
  console.log(" GET /v1/models");
 
 
836
  console.log(" GET /status");
837
 
 
 
 
838
  await serve(
839
  (request: Request) => server.handleRequest(request),
840
- // [注意] 您的原始代码使用了 7860 端口,这里保持一致
841
- { port: 7860 }
842
- );
 
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
 
 
 
 
 
2
 
3
  // --- 常量定义 ---
4
  const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
5
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
6
  const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
7
 
8
+ // Gemini 支持的声音列表
9
+ const GEMINI_VOICES = [
10
+ { name: "Puck", language: "en-US", gender: "neutral" },
11
+ { name: "Charon", language: "en-US", gender: "neutral" },
12
+ { name: "Kore", language: "en-US", gender: "neutral" },
13
+ { name: "Fenrir", language: "en-US", gender: "neutral" },
14
+ { name: "Aoede", language: "en-US", gender: "neutral" },
15
+ ] as const;
16
+
17
+ type VoiceName = typeof GEMINI_VOICES[number]["name"];
18
+
19
  interface OpenAIMessage {
20
  role: "system" | "user" | "assistant";
21
  content: string | Array<{
 
34
  stream?: boolean;
35
  }
36
 
37
+ // TTS 相关接口
38
+ interface TTSRequest {
39
+ model: string;
40
+ input: string;
41
+ voice?: VoiceName;
42
+ response_format?: "mp3" | "opus" | "aac" | "flac";
43
+ speed?: number;
44
  }
45
 
46
+ interface TTSResponse {
47
+ audio: string; // base64 编码的音频数据
48
+ }
49
 
50
  class GoogleAIService {
51
  public apiKeys: string[];
 
74
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
75
  return key;
76
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  async fetchOfficialModels(): Promise<any[]> {
79
  const now = Date.now();
80
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
 
102
  console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
103
  return this.cachedModels;
104
  }
105
+
106
  return this.getFallbackModels();
107
  } catch (error) {
108
  console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
 
114
  return [
115
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
116
  { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
117
+ { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] }
 
 
118
  ];
119
  }
120
 
 
133
  return 'unknown';
134
  }
135
 
136
+ /**
137
+ * [关键改进] 提取并验证文档数据,增加大小检查和更稳健的解析
138
+ */
139
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
140
  const docType = this.getDocumentType(documentUrl);
 
141
  if (!documentUrl.startsWith("data:")) {
142
  if (documentUrl.startsWith("http")) {
143
  throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
144
  }
145
+ // 如果不是data url或http url,则假定为纯base64数据,但这是一种不推荐的格式
146
+ // 为了健壮性,我们强制要求使用标准的 data URL
147
  throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
148
  }
149
 
 
151
  if (parts.length !== 2) {
152
  throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
153
  }
 
154
 
155
+ const [mimeInfo, base64Data] = parts;
156
+ // **改进1: 检查文件大小**
157
+ // Base64 字符串的长度约是原始数据的 4/3。
158
  const approxSizeInBytes = base64Data.length * 0.75;
159
  if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
160
  throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
161
  }
162
 
163
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
 
164
  if (docType === 'txt' || docType === 'md') {
165
  try {
166
  const textContent = atob(base64Data);
 
170
  throw new Error(`Invalid base64 encoding for ${docType} document.`);
171
  }
172
  }
173
+
174
+ // 自动识别PDF的MIME类型
175
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
176
  return { mimeType: finalMimeType, data: base64Data, docType };
177
  }
178
+
179
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
180
  if (imageUrl.startsWith("data:image/")) {
181
  const [mimeInfo, base64Data] = imageUrl.split(",");
 
192
  const apiKey = this.getNextApiKey();
193
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
194
  const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
 
195
  console.log(`Processing document with model: ${documentModel}`);
196
 
197
  let contents;
 
203
 
204
  const messageParts = msg.content.map(part => {
205
  if (part.type === "text") return { text: part.text };
 
206
  if (part.type === "image_url" && part.image_url) {
207
  const { mimeType, data } = this.extractImageData(part.image_url.url);
208
  return { inlineData: { mimeType, data } };
209
  }
 
210
  if (part.type === "document" && part.document) {
211
  const docData = this.extractDocumentData(part.document.url);
212
  console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
213
 
214
  if (docData.docType === 'txt' || docData.docType === 'md') {
215
+ const prefix = docData.docType === 'md' ? 'Markdown document content:\\n' : 'Text document content:\\n';
216
  return { text: `${prefix}${docData.text}` };
217
  }
218
+
219
  if (docData.docType === 'pdf') {
220
  return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
221
  }
222
+
223
  return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
224
  }
225
  return { text: "" };
226
  });
227
+
228
  return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
229
  });
230
  } catch (error) {
 
268
  if (candidate.finishReason === "SAFETY") {
269
  throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
270
  }
271
+
272
  if (candidate.finishReason === "RECITATION") {
273
  throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
274
  }
275
 
276
  return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
277
  }
278
+
279
+ // TTS 功能
280
+ async generateSpeech(text: string, voice: VoiceName = "Puck"): Promise<string> {
281
+ const apiKey = this.getNextApiKey();
282
+
283
+ const requestBody = {
284
+ input: {
285
+ text: text
286
+ },
287
+ voice: {
288
+ name: voice,
289
+ languageCode: "en-US"
290
+ },
291
+ audioConfig: {
292
+ audioEncoding: "MP3",
293
+ speakingRate: 1.0,
294
+ pitch: 0.0,
295
+ volumeGainDb: 0.0
296
+ }
297
+ };
298
+
299
+ try {
300
+ const response = await fetch(
301
+ `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateSpeech?key=${apiKey}`,
302
+ {
303
+ method: "POST",
304
+ headers: { "Content-Type": "application/json" },
305
+ body: JSON.stringify(requestBody)
306
+ }
307
+ );
308
+
309
+ if (!response.ok) {
310
+ const errorText = await response.text();
311
+ throw new Error(`Gemini TTS API error: ${response.status} - ${errorText}`);
312
+ }
313
+
314
+ const data = await response.json();
315
+
316
+ if (!data.audioContent) {
317
+ throw new Error("No audio content returned from Gemini TTS");
318
+ }
319
+
320
+ return data.audioContent; // 返回 base64 编码的音频数据
321
+ } catch (error) {
322
+ console.error("Error generating speech:", error);
323
+ throw error;
324
+ }
325
+ }
326
+
327
+ // 获取可用的声音列表
328
+ getAvailableVoices(): typeof GEMINI_VOICES {
329
+ return GEMINI_VOICES;
330
+ }
331
+
332
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
333
  const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
334
  if (hasDocument) {
 
337
 
338
  const apiKey = this.getNextApiKey();
339
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
340
  const contents = messages.map(msg => {
341
  if (typeof msg.content === "string") {
342
  return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
 
363
  contents,
364
  generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
365
  };
366
+
367
  if (enableSearch) {
368
  requestBody.tools = [{ googleSearchRetrieval: {} }];
369
  }
 
377
  const errorText = await response.text();
378
  throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
379
  }
380
+
381
  const data = await response.json();
382
  if (!data.candidates || data.candidates.length === 0) {
383
  throw new Error("No response generated from Google AI");
384
  }
385
+
386
  const candidate = data.candidates[0];
387
  if (candidate.finishReason === "SAFETY") {
388
  throw new Error("Response blocked due to safety filters");
389
  }
390
+
391
  return candidate.content?.parts[0]?.text || "No response generated";
392
  }
393
 
 
417
  const errorText = await response.text();
418
  throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
419
  }
420
+
421
  const data = await response.json();
422
  if (!data.candidates || data.candidates.length === 0) {
423
  throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
 
444
  result.imageBase64 = imageBase64;
445
  result.imageUrl = `data:image/png;base64,${imageBase64}`;
446
  }
447
+
448
  return result;
449
  }
450
+
451
  async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
452
  const apiKey = this.getNextApiKey();
453
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
478
  if (candidate.finishReason === "SAFETY") {
479
  throw new Error("Response blocked due to safety filters");
480
  }
481
+
482
  return candidate.content?.parts[0]?.text || "No response generated";
483
  }
484
 
 
534
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
535
  }
536
 
537
+ private async handleTTS(request: Request): Promise<Response> {
 
 
 
 
 
 
 
 
 
538
  try {
539
+ const body: TTSRequest = await request.json();
540
+ const { input, voice = "Puck", model } = body;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
+ // 验证输入
543
+ if (!input || input.trim().length === 0) {
544
+ return new Response(
545
+ JSON.stringify({
546
+ error: {
547
+ message: "Input text is required",
548
+ type: "invalid_request_error",
549
+ code: null
550
+ }
551
+ }),
552
+ { status: 400, headers: { "Content-Type": "application/json" } }
553
+ );
554
+ }
 
 
555
 
556
+ // 验证声音
557
+ const availableVoices = this.googleAI.getAvailableVoices();
558
+ const isValidVoice = availableVoices.some(v => v.name === voice);
559
+ if (!isValidVoice) {
560
+ return new Response(
561
+ JSON.stringify({
562
+ error: {
563
+ message: `Invalid voice "${voice}". Available voices: ${availableVoices.map(v => v.name).join(", ")}`,
564
+ type: "invalid_request_error",
565
+ code: null
566
+ }
567
+ }),
568
+ { status: 400, headers: { "Content-Type": "application/json" } }
569
+ );
570
+ }
571
 
572
+ console.log(`TTS request: voice=${voice}, text length=${input.length}`);
573
+
574
+ // 生成语音
575
+ const audioBase64 = await this.googleAI.generateSpeech(input, voice);
576
+
577
+ // base64 转换为二进制数据
578
+ const audioData = Uint8Array.from(atob(audioBase64), c => c.charCodeAt(0));
579
+
580
+ return new Response(audioData, {
581
+ headers: {
582
+ "Content-Type": "audio/mpeg",
583
+ "Content-Length": audioData.length.toString()
584
  }
585
+ });
586
 
587
+ } catch (error) {
588
+ console.error("Error in TTS:", error.message);
589
+ return new Response(
590
+ JSON.stringify({
591
+ error: {
592
+ message: error.message,
593
+ type: "api_error",
594
+ code: null
595
+ }
596
+ }),
597
+ { status: 500, headers: { "Content-Type": "application/json" } }
598
+ );
599
+ }
600
+ }
601
 
602
+ private async handleVoices(): Promise<Response> {
603
+ try {
604
+ const voices = this.googleAI.getAvailableVoices();
605
+ const voicesResponse = {
606
+ object: "list",
607
+ data: voices.map(voice => ({
608
+ id: voice.name,
609
+ name: voice.name,
610
+ language: voice.language,
611
+ gender: voice.gender
612
+ }))
613
+ };
614
 
615
+ return new Response(JSON.stringify(voicesResponse), {
616
+ headers: { "Content-Type": "application/json" }
617
+ });
 
 
 
 
 
618
  } catch (error) {
619
+ console.error("Error fetching voices:", error);
620
+ return new Response(
621
+ JSON.stringify({ error: { message: "Failed to fetch voices." } }),
622
+ { status: 500, headers: { "Content-Type": "application/json" } }
623
+ );
 
 
 
 
 
 
624
  }
625
  }
626
+
627
  private async handleChatCompletions(request: Request): Promise<Response> {
628
  try {
629
  const body: OpenAIRequest = await request.json();
 
642
  );
643
 
644
  const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
645
+
646
  let inputImages: any[] = [];
647
  if (hasImages) {
648
  body.messages.forEach(msg => {
 
653
  }
654
  });
655
  }
 
 
656
 
657
+ let responseText: string;
658
+ // Routing logic based on keywords and content types
659
  if (hasDocument) {
660
  responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
661
  } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
 
698
  );
699
  }
700
  }
701
+
702
  private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
703
  const encoder = new TextEncoder();
704
  const streamId = `chatcmpl-${Date.now()}`;
 
708
  return new ReadableStream({
709
  start(controller) {
710
  const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
711
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\\n\\n`));
712
  },
713
  pull(controller) {
714
  if (contentQueue.length === 0) {
715
  const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
716
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\\n\\n`));
717
+ controller.enqueue(encoder.encode('data: [DONE]\\n\\n'));
718
  controller.close();
719
  return;
720
  }
721
  const char = contentQueue.shift();
722
  const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
723
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\\n\\n`));
724
  }
725
  });
726
  }
727
+
728
  private async handleModels(): Promise<Response> {
729
  try {
730
  const googleModels = await this.googleAI.fetchOfficialModels();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
  const models = {
732
  object: "list",
733
+ data: googleModels.map(model => {
734
+ const modelId = model.name.replace('models/', '');
735
+ return {
736
+ id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google",
737
+ description: model.description || model.displayName, maxTokens: model.inputTokenLimit || model.maxTokens
738
+ };
739
+ })
740
  };
 
741
  return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
742
  } catch (error) {
743
  console.error("Error fetching models:", error);
744
  return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
745
  }
746
  }
747
+
748
  private async handleStatus(): Promise<Response> {
749
  const status = {
750
+ status: "healthy", timestamp: new Date().toISOString(), version: "2.6.0",
751
  api_keys_loaded: this.googleAI.apiKeys.length,
752
  models_in_cache: this.googleAI.cachedModels.length,
753
+ models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never",
754
+ features: {
755
+ chat_completions: true,
756
+ image_generation: true,
757
+ document_processing: true,
758
+ text_to_speech: true,
759
+ voice_list: true
760
+ }
761
  };
762
  return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
763
  }
 
776
  const url = new URL(request.url);
777
  let response: Response;
778
 
779
+ // Handle routes
780
  if (url.pathname === "/health" || url.pathname === "/status") {
781
  response = await this.handleStatus();
782
  } else if (!this.authenticate(request)) {
783
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
 
 
784
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
785
  response = await this.handleChatCompletions(request);
786
  } else if (url.pathname === "/v1/models" && request.method === "GET") {
787
  response = await this.handleModels();
788
+ } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
789
+ response = await this.handleTTS(request);
790
+ } else if (url.pathname === "/v1/voices" && request.method === "GET") {
791
+ response = await this.handleVoices();
792
  } else {
793
  response = new Response("Not Found", { status: 404 });
794
  }
795
 
796
+ // Add CORS headers to all responses
797
  const finalHeaders = new Headers(response.headers);
798
  for (const [key, value] of Object.entries(corsHeaders)) {
799
  finalHeaders.set(key, value);
 
805
 
806
  // --- 服务器启动 ---
807
  const server = new OpenAICompatibleServer();
808
+ console.log("🚀 OpenAI Compatible Server with Google AI and TTS starting on port 7860...");
 
809
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
810
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
811
 
812
+ // Pre-fetch models at startup
813
  server.googleAI.fetchOfficialModels().then(models => {
814
  console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
815
  }).catch(error => {
816
  console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
817
  });
818
 
819
+ console.log("\\n🔗 Endpoints:");
820
  console.log(" POST /v1/chat/completions");
 
 
821
  console.log(" GET /v1/models");
822
+ console.log(" POST /v1/audio/speech (TTS)");
823
+ console.log(" GET /v1/voices");
824
  console.log(" GET /status");
825
 
826
+ const voices = server.googleAI.getAvailableVoices();
827
+ console.log(`\\n🎤 Available TTS voices: ${voices.map(v => v.name).join(", ")}`);
828
+
829
  await serve(
830
  (request: Request) => server.handleRequest(request),
831
+ { port: 7860 }
832
+ );