xt8 commited on
Commit
7ee1829
·
verified ·
1 Parent(s): 2f11958

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +572 -80
main.ts CHANGED
@@ -1,10 +1,10 @@
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
- import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
3
 
4
  // --- 常量定义 ---
5
- const MAX_DOCUMENT_SIZE_MB = 20;
6
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
7
- const MODELS_CACHE_DURATION = 60000;
8
 
9
  // --- 接口定义 ---
10
  interface OpenAIMessage {
@@ -12,8 +12,8 @@ interface OpenAIMessage {
12
  content: string | Array<{
13
  type: string;
14
  text?: string;
15
- image_url?: { url:string };
16
- document?: { url: string; type: string };
17
  }>;
18
  }
19
 
@@ -25,14 +25,16 @@ interface OpenAIRequest {
25
  stream?: boolean;
26
  }
27
 
 
28
  interface OpenAITTSRequest {
29
- model: 'tts-1' | 'tts-1-hd';
30
- input: string;
31
- voice: string;
32
- response_format?: 'mp3' | 'opus' | 'aac' | 'flac';
33
- speed?: number;
34
  }
35
 
 
36
  class GoogleAIService {
37
  public apiKeys: string[];
38
  public currentKeyIndex = 0;
@@ -49,6 +51,7 @@ class GoogleAIService {
49
  this.apiKeys.push(key);
50
  i++;
51
  }
 
52
  if (this.apiKeys.length === 0) {
53
  throw new Error("No Google AI API keys found in environment variables (e.g., GOOGLE_AI_KEY_1, GOOGLE_AI_KEY)");
54
  }
@@ -60,35 +63,112 @@ class GoogleAIService {
60
  return key;
61
  }
62
 
63
- async synthesizeSpeech(input: string, voiceName: string): Promise<Uint8Array> {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  const apiKey = this.getNextApiKey();
65
- console.log(`Synthesizing speech with voice: ${voiceName}`);
 
 
 
 
 
66
  const requestBody = {
67
- "input": { "text": input },
68
- "voice": { "name": voiceName },
69
- "audioConfig": { "audioEncoding": "MP3" }
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  };
 
71
  const response = await fetch(
72
- `https://texttospeech.googleapis.com/v1beta/text:synthesize?key=${apiKey}`,
73
- {
74
- method: "POST",
75
- headers: { "Content-Type": "application/json" },
76
- body: JSON.stringify(requestBody),
77
- }
78
  );
 
79
  if (!response.ok) {
80
- const errorBody = await response.json().catch(() => response.text());
81
- const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
82
- throw new Error(`Google TTS API request failed: ${response.status}: ${errorMessage}`);
 
83
  }
 
84
  const data = await response.json();
85
- if (!data.audioContent) {
86
- throw new Error("TTS synthesis failed, no audio content in response.");
87
- }
88
- return decode(data.audioContent);
 
 
 
 
 
89
  }
90
 
91
- // 省略其他 GoogleAIService 方法,它们与之前相同...
 
92
  async fetchOfficialModels(): Promise<any[]> {
93
  const now = Date.now();
94
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
@@ -127,7 +207,9 @@ class GoogleAIService {
127
  return [
128
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
129
  { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
130
- { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] }
 
 
131
  ];
132
  }
133
 
@@ -148,19 +230,37 @@ class GoogleAIService {
148
 
149
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
150
  const docType = this.getDocumentType(documentUrl);
151
- if (!documentUrl.startsWith("data:")) throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
 
 
 
 
 
 
 
152
  const parts = documentUrl.split(",");
153
- if (parts.length !== 2) throw new Error("Invalid data URL format for document.");
 
 
154
  const [mimeInfo, base64Data] = parts;
 
155
  const approxSizeInBytes = base64Data.length * 0.75;
156
- if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) throw new Error(`Document size exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
 
 
 
157
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
 
158
  if (docType === 'txt' || docType === 'md') {
159
  try {
160
  const textContent = atob(base64Data);
161
  return { mimeType, data: base64Data, text: textContent, docType };
162
- } catch (error) { throw new Error(`Invalid base64 encoding for ${docType} document.`); }
 
 
 
163
  }
 
164
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
165
  return { mimeType: finalMimeType, data: base64Data, docType };
166
  }
@@ -178,13 +278,265 @@ class GoogleAIService {
178
  }
179
 
180
  async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
181
- // ... code from previous answer ...
182
- return "Not implemented for brevity";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  }
184
 
185
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
186
- // ... code from previous answer ...
187
- return "Not implemented for brevity";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  }
190
 
@@ -211,59 +563,184 @@ class OpenAICompatibleServer {
211
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
212
  }
213
 
 
 
 
214
  private async handleAudioSpeech(request: Request): Promise<Response> {
215
  try {
216
- // 增加一个 Content-Type 检查,这是良好的实践
217
- if (request.headers.get("Content-Type") !== "application/json") {
218
- return new Response(JSON.stringify({ error: { message: "Content-Type must be application/json" } }), { status: 415, headers: { "Content-Type": "application/json" } });
219
- }
220
-
221
- const body: OpenAITTSRequest = await request.json();
222
-
223
- if (!body.input || !body.voice) {
224
- throw new Error("Missing required parameters: 'input' and 'voice' are required.");
225
- }
226
-
227
- const audioData = await this.googleAI.synthesizeSpeech(body.input, body.voice);
228
-
229
- return new Response(audioData, {
230
- status: 200,
231
- headers: { "Content-Type": "audio/mpeg" },
232
- });
233
  } catch (error) {
234
- console.error("Error in /v1/audio/speech:", error.message);
235
- const status = error.message.includes("required parameter") ? 400 : 500;
236
- return new Response(JSON.stringify({ error: { message: error.message, type: "api_error" } }), { status, headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
237
  }
238
  }
239
-
240
  private async handleChatCompletions(request: Request): Promise<Response> {
241
- // 省略此处的实现细节,与之前版本相同
242
  try {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  const responsePayload = {
244
- id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: "gemini-pro",
245
- choices: [{ index: 0, message: { role: "assistant", content: "Chat completions logic is correct." }, finish_reason: "stop" }],
246
  usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
247
  };
248
  return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
249
- } catch(error) {
250
- return new Response(JSON.stringify({ error: { message: error.message } }), { status: 500 });
 
 
 
 
 
 
 
 
 
 
 
 
251
  }
252
  }
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  private async handleModels(): Promise<Response> {
255
- // 省略此处的实现细节,与之前版本相同
256
  try {
257
- const models = { object: "list", data: [
258
- { id: "tts-1", object: "model", owned_by: "google" },
259
- { id: "tts-1-hd", object: "model", owned_by: "google" },
260
- { id: "gemini-1.5-pro", object: "model", owned_by: "google" }
261
- ]};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
263
  } catch (error) {
264
- return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
 
265
  }
266
  }
 
 
 
 
 
 
 
 
 
 
267
 
268
  async handleRequest(request: Request): Promise<Response> {
269
  const corsHeaders = {
@@ -279,38 +756,53 @@ class OpenAICompatibleServer {
279
  const url = new URL(request.url);
280
  let response: Response;
281
 
282
- if (!this.authenticate(request)) {
 
 
 
283
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
 
 
 
284
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
285
  response = await this.handleChatCompletions(request);
286
  } else if (url.pathname === "/v1/models" && request.method === "GET") {
287
  response = await this.handleModels();
288
- } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
289
- response = await this.handleAudioSpeech(request);
290
  } else {
291
  response = new Response("Not Found", { status: 404 });
292
  }
293
 
294
- // --- [ 这是关键的修正 ] ---
295
- // 直接修改返回的 Response 对象的 headers,而不是创建一个新的 Response。
296
  for (const [key, value] of Object.entries(corsHeaders)) {
297
- response.headers.set(key, value);
298
  }
299
 
300
- return response; // 返回被修改过的原始 response 对象
301
  }
302
  }
303
 
304
  // --- 服务器启动 ---
305
  const server = new OpenAICompatibleServer();
306
 
307
- console.log("🚀 OpenAI Compatible Server starting on port 7860...");
308
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
 
 
 
 
 
 
 
 
 
309
  console.log("\n🔗 Endpoints:");
310
  console.log(" POST /v1/chat/completions");
311
- console.log(" POST /v1/audio/speech");
312
  console.log(" GET /v1/models");
 
313
 
 
314
  await serve(
315
  (request: Request) => server.handleRequest(request),
316
  { port: 7860 }
 
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
+ import wav from 'wav'
3
 
4
  // --- 常量定义 ---
5
+ const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
6
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
7
+ const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
8
 
9
  // --- 接口定义 ---
10
  interface OpenAIMessage {
 
12
  content: string | Array<{
13
  type: string;
14
  text?: string;
15
+ image_url?: { url: string };
16
+ document?: { url: string; type: string }; // 支持多种文档类型
17
  }>;
18
  }
19
 
 
25
  stream?: boolean;
26
  }
27
 
28
+ // [新增] OpenAI TTS 请求接口定义
29
  interface OpenAITTSRequest {
30
+ model: string; // e.g., 'tts-1', 'tts-1-hd'
31
+ input: string; // The text to synthesize
32
+ voice: 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede';
33
+ response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; // Google TTS returns MP3, so we'll ignore others for now
34
+ speed?: number; // Not directly supported by Gemini TTS, will be ignored
35
  }
36
 
37
+
38
  class GoogleAIService {
39
  public apiKeys: string[];
40
  public currentKeyIndex = 0;
 
51
  this.apiKeys.push(key);
52
  i++;
53
  }
54
+
55
  if (this.apiKeys.length === 0) {
56
  throw new Error("No Google AI API keys found in environment variables (e.g., GOOGLE_AI_KEY_1, GOOGLE_AI_KEY)");
57
  }
 
63
  return key;
64
  }
65
 
66
+ async function saveWaveFile(
67
+ filename,
68
+ pcmData,
69
+ channels = 1,
70
+ rate = 24000,
71
+ sampleWidth = 2,
72
+ ) {
73
+ return new Promise((resolve, reject) => {
74
+ const writer = new wav.FileWriter(filename, {
75
+ channels,
76
+ sampleRate: rate,
77
+ bitDepth: sampleWidth * 8,
78
+ });
79
+
80
+ writer.on('finish', resolve);
81
+ writer.on('error', reject);
82
+
83
+ writer.write(pcmData);
84
+ writer.end();
85
+ });
86
+ }
87
+
88
+ // --- [新增] TTS 功能 ---
89
+
90
+ /**
91
+ * 映射 OpenAI 的语音名称到 Google Gemini TTS 的预置语音名称。
92
+ * 参考: https://ai.google.dev/gemini-api/docs/text-to-speech#supported_voices
93
+ */
94
+ private getGoogleVoice(openAIVoice: string): string {
95
+ const voiceMap: { [key: string]: string } = {
96
+ 'Puck': 'Puck', // A good default, versatile voice'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | 'Aoede'
97
+ 'Charon': 'Charon', // Another male voice option
98
+ 'Kore': 'Kore', // Female, narrative style
99
+ 'Fenrir': 'Fenrir', // Deep, male voice
100
+ 'Leda': 'Leda', // Energetic female voice
101
+ 'Aoede': 'Aoede', // Gentle female voice
102
+ // Fallback to a default if the voice is not in the map
103
+ 'default': 'Puck'
104
+ };
105
+ return voiceMap[openAIVoice] || voiceMap['default'];
106
+ }
107
+
108
+ /**
109
+ * [新增] 调用 Google Gemini TTS API 生成语音。
110
+ * @param input - 要转换为语音的文本。
111
+ * @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
112
+ * @param voice - OpenAI 格式的语音名称。
113
+ * @returns 返回包含音频数据的 ArrayBuffer。
114
+ */
115
+ async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
116
  const apiKey = this.getNextApiKey();
117
+ const googleVoice = this.getGoogleVoice(voice);
118
+ // 根据 curl 命令,模型是固定的 TTS 模型
119
+ const ttsModel = "gemini-2.5-flash-preview-tts";
120
+
121
+ console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
122
+
123
  const requestBody = {
124
+ "contents": [{
125
+ "parts":[{
126
+ "text": input
127
+ }]
128
+ }],
129
+ "generationConfig": {
130
+ "responseModalities": ["AUDIO"],
131
+ "speechConfig": {
132
+ "voiceConfig": {
133
+ "prebuiltVoiceConfig": {
134
+ "voiceName": googleVoice
135
+ }
136
+ }
137
+ }
138
+ },
139
+ "model": ttsModel,
140
  };
141
+
142
  const response = await fetch(
143
+ `https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`,
144
+ {
145
+ method: "POST",
146
+ headers: { "Content-Type": "application/json" },
147
+ body: JSON.stringify(requestBody),
148
+ }
149
  );
150
+
151
  if (!response.ok) {
152
+ const errorBody = await response.json().catch(() => response.text());
153
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
154
+ console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
155
+ throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
156
  }
157
+
158
  const data = await response.json();
159
+
160
+ // 提取 base64 编码的音频数据
161
+ const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
162
+ if (!audioContentBase64) {
163
+ throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
164
+ }
165
+ const audioBuffer = Buffer.from(audioContentBase64, 'base64');
166
+ const fileName = 'out.wav';
167
+ return await saveWaveFile(fileName, audioBuffer);
168
  }
169
 
170
+ // --- 现有代码保持不变 ---
171
+
172
  async fetchOfficialModels(): Promise<any[]> {
173
  const now = Date.now();
174
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
 
207
  return [
208
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
209
  { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
210
+ { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] },
211
+ // [新增] 在模型列表中添加TTS模型,使其在 /v1/models 接口可见
212
+ { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model for generating high-quality audio.", supportedGenerationMethods: ["generateContent"], id: "gemini-2.5-flash-preview-tts" }
213
  ];
214
  }
215
 
 
230
 
231
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
232
  const docType = this.getDocumentType(documentUrl);
233
+
234
+ if (!documentUrl.startsWith("data:")) {
235
+ if (documentUrl.startsWith("http")) {
236
+ throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
237
+ }
238
+ throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
239
+ }
240
+
241
  const parts = documentUrl.split(",");
242
+ if (parts.length !== 2) {
243
+ throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
244
+ }
245
  const [mimeInfo, base64Data] = parts;
246
+
247
  const approxSizeInBytes = base64Data.length * 0.75;
248
+ if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
249
+ throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
250
+ }
251
+
252
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
253
+
254
  if (docType === 'txt' || docType === 'md') {
255
  try {
256
  const textContent = atob(base64Data);
257
  return { mimeType, data: base64Data, text: textContent, docType };
258
+ } catch (error) {
259
+ console.error(`Failed to decode base64 content for ${docType}:`, error);
260
+ throw new Error(`Invalid base64 encoding for ${docType} document.`);
261
+ }
262
  }
263
+
264
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
265
  return { mimeType: finalMimeType, data: base64Data, docType };
266
  }
 
278
  }
279
 
280
  async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
281
+ const apiKey = this.getNextApiKey();
282
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
283
+ const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
284
+
285
+ console.log(`Processing document with model: ${documentModel}`);
286
+
287
+ let contents;
288
+ try {
289
+ contents = messages.map(msg => {
290
+ if (typeof msg.content === "string") {
291
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
292
+ }
293
+
294
+ const messageParts = msg.content.map(part => {
295
+ if (part.type === "text") return { text: part.text };
296
+
297
+ if (part.type === "image_url" && part.image_url) {
298
+ const { mimeType, data } = this.extractImageData(part.image_url.url);
299
+ return { inlineData: { mimeType, data } };
300
+ }
301
+
302
+ if (part.type === "document" && part.document) {
303
+ const docData = this.extractDocumentData(part.document.url);
304
+ console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
305
+
306
+ if (docData.docType === 'txt' || docData.docType === 'md') {
307
+ const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
308
+ return { text: `${prefix}${docData.text}` };
309
+ }
310
+ if (docData.docType === 'pdf') {
311
+ return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
312
+ }
313
+ return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
314
+ }
315
+ return { text: "" };
316
+ });
317
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
318
+ });
319
+ } catch (error) {
320
+ throw error;
321
+ }
322
+
323
+ const requestBody = {
324
+ contents,
325
+ generationConfig: { temperature: 0.7, maxOutputTokens: 8192 }
326
+ };
327
+
328
+ const response = await fetch(
329
+ `https://generativelanguage.googleapis.com/v1beta/${documentModel}:generateContent?key=${apiKey}`,
330
+ {
331
+ method: "POST",
332
+ headers: { "Content-Type": "application/json" },
333
+ body: JSON.stringify(requestBody),
334
+ }
335
+ );
336
+
337
+ if (!response.ok) {
338
+ const errorBody = await response.json().catch(() => response.text());
339
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
340
+ console.error(`Google API Error: ${response.status} - ${errorMessage}`);
341
+ throw new Error(`Google API request failed with status ${response.status}: ${errorMessage}`);
342
+ }
343
+
344
+ const data = await response.json();
345
+ const promptFeedback = data.promptFeedback;
346
+ if (promptFeedback && promptFeedback.blockReason) {
347
+ const reason = promptFeedback.blockReason;
348
+ const safetyRatings = promptFeedback.safetyRatings?.map((r: any) => `${r.category}: ${r.probability}`).join(', ') || 'N/A';
349
+ throw new Error(`Request blocked by Google API. Reason: ${reason}. Safety Ratings: [${safetyRatings}]`);
350
+ }
351
+
352
+ if (!data.candidates || data.candidates.length === 0) {
353
+ throw new Error("No response generated for document content. The content might be empty or unreadable.");
354
+ }
355
+
356
+ const candidate = data.candidates[0];
357
+ if (candidate.finishReason === "SAFETY") {
358
+ throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
359
+ }
360
+ if (candidate.finishReason === "RECITATION") {
361
+ throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
362
+ }
363
+
364
+ return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
365
  }
366
 
367
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
368
+ const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
369
+ if (hasDocument) {
370
+ return await this.generateContentWithDocument(messages, modelName);
371
+ }
372
+
373
+ const apiKey = this.getNextApiKey();
374
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
375
+
376
+ const contents = messages.map(msg => {
377
+ if (typeof msg.content === "string") {
378
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
379
+ } else {
380
+ const messageParts = msg.content.map(part => {
381
+ if (part.type === "text") {
382
+ return { text: part.text };
383
+ } else if (part.type === "image_url" && part.image_url) {
384
+ const imageData = part.image_url.url;
385
+ if (imageData.startsWith("data:image/")) {
386
+ const { mimeType, data } = this.extractImageData(imageData);
387
+ return { inlineData: { mimeType, data } };
388
+ } else {
389
+ return { fileData: { mimeType: "image/jpeg", fileUri: imageData } };
390
+ }
391
+ }
392
+ return { text: "" };
393
+ });
394
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts };
395
+ }
396
+ });
397
+
398
+ const requestBody: any = {
399
+ contents,
400
+ generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
401
+ };
402
+ if (enableSearch) {
403
+ requestBody.tools = [{ googleSearchRetrieval: {} }];
404
+ }
405
+
406
+ const response = await fetch(
407
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
408
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
409
+ );
410
+
411
+ if (!response.ok) {
412
+ const errorText = await response.text();
413
+ throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
414
+ }
415
+ const data = await response.json();
416
+ if (!data.candidates || data.candidates.length === 0) {
417
+ throw new Error("No response generated from Google AI");
418
+ }
419
+ const candidate = data.candidates[0];
420
+ if (candidate.finishReason === "SAFETY") {
421
+ throw new Error("Response blocked due to safety filters");
422
+ }
423
+ return candidate.content?.parts[0]?.text || "No response generated";
424
+ }
425
+
426
+ async generateOrEditImageWithGemini(prompt: string, modelName: string = "gemini-2.0-flash-preview-image-generation", inputImage?: { mimeType: string; data: string }): Promise<{ text?: string; imageBase64?: string; imageUrl?: string }> {
427
+ const apiKey = this.getNextApiKey();
428
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
429
+ const requestParts: any[] = [{ text: prompt }];
430
+
431
+ if (inputImage) {
432
+ requestParts.push({ inline_data: { mime_type: inputImage.mimeType, data: inputImage.data } });
433
+ console.log(`Editing image with model: ${fullModelName}`);
434
+ } else {
435
+ console.log(`Generating image with model: ${fullModelName}`);
436
+ }
437
+
438
+ const requestBody = {
439
+ contents: [{ parts: requestParts }],
440
+ generationConfig: { responseModalities: ["TEXT", "IMAGE"], temperature: 0.7 }
441
+ };
442
+
443
+ const response = await fetch(
444
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
445
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
446
+ );
447
+
448
+ if (!response.ok) {
449
+ const errorText = await response.text();
450
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
451
+ }
452
+ const data = await response.json();
453
+ if (!data.candidates || data.candidates.length === 0) {
454
+ throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
455
+ }
456
+
457
+ const candidate = data.candidates[0];
458
+ if (candidate.finishReason === "SAFETY") {
459
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} blocked due to safety filters`);
460
+ }
461
+
462
+ const responseParts = candidate.content?.parts || [];
463
+ let textResponse = "";
464
+ let imageBase64 = "";
465
+
466
+ for (const part of responseParts) {
467
+ if (part.text) textResponse += part.text;
468
+ if (part.inlineData?.data) imageBase64 = part.inlineData.data;
469
+ if (part.inline_data?.data) imageBase64 = part.inline_data.data;
470
+ }
471
+
472
+ const result: { text?: string; imageBase64?: string; imageUrl?: string } = {};
473
+ if (textResponse) result.text = textResponse;
474
+ if (imageBase64) {
475
+ result.imageBase64 = imageBase64;
476
+ result.imageUrl = `data:image/png;base64,${imageBase64}`;
477
+ }
478
+ return result;
479
+ }
480
+
481
+ async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
482
+ const apiKey = this.getNextApiKey();
483
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
484
+ const contents = messages.map(msg => ({ role: msg.role === 'assistant' ? 'model' : 'user', parts: [{ text: typeof msg.content === 'string' ? msg.content : '' }] }));
485
+
486
+ const requestBody = {
487
+ contents,
488
+ tools: [{ googleSearch: {} }],
489
+ generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
490
+ };
491
+
492
+ const response = await fetch(
493
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
494
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
495
+ );
496
+
497
+ if (!response.ok) {
498
+ console.warn(`Google Search API failed: ${response.status}, trying alternative.`);
499
+ return await this.generateContentWithSearchPrompt(messages, modelName);
500
+ }
501
+
502
+ const data = await response.json();
503
+ if (!data.candidates || data.candidates.length === 0) {
504
+ return await this.generateContentWithSearchPrompt(messages, modelName);
505
+ }
506
+
507
+ const candidate = data.candidates[0];
508
+ if (candidate.finishReason === "SAFETY") {
509
+ throw new Error("Response blocked due to safety filters");
510
+ }
511
+ return candidate.content?.parts[0]?.text || "No response generated";
512
+ }
513
+
514
+ async generateContentWithSearchPrompt(messages: OpenAIMessage[], modelName: string): Promise<string> {
515
+ const enhancedMessages = [...messages];
516
+ const lastMessage = enhancedMessages[enhancedMessages.length - 1];
517
+ if (typeof lastMessage.content === "string") {
518
+ lastMessage.content = `Please provide the most current and accurate information available about: ${lastMessage.content}.`;
519
+ }
520
+ return await this.generateContent(enhancedMessages, modelName, false);
521
+ }
522
+
523
+ async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
524
+ if (this.isImageGenerationModel(modelName)) {
525
+ try {
526
+ let inputImage: { mimeType: string; data: string } | undefined;
527
+ if (inputImages && inputImages.length > 0) {
528
+ inputImage = this.extractImageData(inputImages[0].url);
529
+ }
530
+ const result = await this.generateOrEditImageWithGemini(prompt, modelName, inputImage);
531
+ let response = "";
532
+ if (result.text) response += result.text + "\\\\n\\\\n";
533
+ if (result.imageUrl) response += `${inputImage ? 'Edited' : 'Generated'} image:\\\\n${result.imageUrl}`;
534
+ return response || `Image processing complete.`;
535
+ } catch (error) {
536
+ return `Image processing failed: ${error.message}`;
537
+ }
538
+ }
539
+ return `Model ${modelName} does not support image generation. Use a model like gemini-2.0-flash-preview-image-generation.`;
540
  }
541
  }
542
 
 
563
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
564
  }
565
 
566
+ /**
567
+ * [新增] 处理 OpenAI 兼容的 TTS 请求
568
+ */
569
  private async handleAudioSpeech(request: Request): Promise<Response> {
570
  try {
571
+ const body: OpenAITTSRequest = await request.json();
572
+
573
+ if (!body.input || !body.voice || !body.model) {
574
+ return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
575
+ }
576
+
577
+ const audioFile = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
578
+
579
+ // Google Gemini TTS API 生成的是 MP3 格式的音频。
580
+ return new Response(audioBuffer, {
581
+ headers: {
582
+ "Content-Type": "audio/wav",
583
+ "Access-Control-Allow-Origin": "*",
584
+ }
585
+ });
 
 
586
  } catch (error) {
587
+ console.error("Error in audio speech generation:", error.message);
588
+ return new Response(
589
+ JSON.stringify({
590
+ error: {
591
+ message: error.message,
592
+ type: "api_error",
593
+ code: null
594
+ }
595
+ }),
596
+ { status: 500, headers: { "Content-Type": "application/json" } }
597
+ );
598
  }
599
  }
600
+
601
  private async handleChatCompletions(request: Request): Promise<Response> {
 
602
  try {
603
+ const body: OpenAIRequest = await request.json();
604
+ const requestedModel = body.model || "gemini-1.5-pro";
605
+ const stream = body.stream || false;
606
+ console.log(`Request for model: ${requestedModel}, stream: ${stream}`);
607
+
608
+ const lastMessage = body.messages[body.messages.length - 1];
609
+ const content = typeof lastMessage.content === "string"
610
+ ? lastMessage.content
611
+ : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
612
+
613
+ const hasDocument = body.messages.some(msg =>
614
+ Array.isArray(msg.content) &&
615
+ msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url))
616
+ );
617
+
618
+ const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
619
+
620
+ let inputImages: any[] = [];
621
+ if (hasImages) {
622
+ body.messages.forEach(msg => {
623
+ if (Array.isArray(msg.content)) {
624
+ msg.content.forEach(part => {
625
+ if (part.type === "image_url" && part.image_url) inputImages.push({ url: part.image_url.url });
626
+ });
627
+ }
628
+ });
629
+ }
630
+
631
+ let responseText: string;
632
+
633
+ if (hasDocument) {
634
+ responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
635
+ } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
636
+ responseText = await this.googleAI.generateOrEditImage(content, requestedModel, inputImages);
637
+ } else if (this.googleAI.isImageGenerationModel(requestedModel)) {
638
+ responseText = await this.googleAI.generateOrEditImage(content, requestedModel);
639
+ } else if (content.toLowerCase().startsWith("/search:")) {
640
+ const query = content.substring(8).trim();
641
+ const searchMessages = [{ ...lastMessage, content: query }];
642
+ responseText = await this.googleAI.generateContentWithGrounding(searchMessages, requestedModel);
643
+ } else {
644
+ responseText = await this.googleAI.generateContent(body.messages, requestedModel, false);
645
+ }
646
+
647
+ if (stream) {
648
+ const streamResponse = await this.streamStringAsOpenAIResponse(responseText, requestedModel);
649
+ return new Response(streamResponse, {
650
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", "Access-Control-Allow-Origin": "*" }
651
+ });
652
+ } else {
653
  const responsePayload = {
654
+ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
655
+ choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
656
  usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
657
  };
658
  return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
659
+ }
660
+ } catch (error) {
661
+ console.error("Error in chat completions:", error.message);
662
+ const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
663
+ return new Response(
664
+ JSON.stringify({
665
+ error: {
666
+ message: error.message,
667
+ type: status === 400 ? "invalid_request_error" : "api_error",
668
+ code: null
669
+ }
670
+ }),
671
+ { status, headers: { "Content-Type": "application/json" } }
672
+ );
673
  }
674
  }
675
 
676
+ private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
677
+ const encoder = new TextEncoder();
678
+ const streamId = `chatcmpl-${Date.now()}`;
679
+ const creationTime = Math.floor(Date.now() / 1000);
680
+ let contentQueue = content.split('');
681
+
682
+ return new ReadableStream({
683
+ start(controller) {
684
+ const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
685
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
686
+ },
687
+ pull(controller) {
688
+ if (contentQueue.length === 0) {
689
+ const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
690
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
691
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
692
+ controller.close();
693
+ return;
694
+ }
695
+ const char = contentQueue.shift();
696
+ const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
697
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
698
+ }
699
+ });
700
+ }
701
+
702
  private async handleModels(): Promise<Response> {
 
703
  try {
704
+ const googleModels = await this.googleAI.fetchOfficialModels();
705
+ const fallbackModels = this.googleAI['getFallbackModels'](); // Access private method for a complete list
706
+
707
+ const allModels = [...googleModels, ...fallbackModels];
708
+ const uniqueModelMap = new Map();
709
+ allModels.forEach(model => {
710
+ const modelId = model.id || model.name.replace('models/', '');
711
+ if (!uniqueModelMap.has(modelId)) {
712
+ uniqueModelMap.set(modelId, {
713
+ id: modelId,
714
+ object: "model",
715
+ created: Math.floor(Date.now() / 1000),
716
+ owned_by: "google",
717
+ description: model.description || model.displayName,
718
+ maxTokens: model.inputTokenLimit || model.maxTokens
719
+ });
720
+ }
721
+ });
722
+
723
+ const models = {
724
+ object: "list",
725
+ data: Array.from(uniqueModelMap.values()),
726
+ };
727
+
728
  return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
729
  } catch (error) {
730
+ console.error("Error fetching models:", error);
731
+ return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
732
  }
733
  }
734
+
735
+ private async handleStatus(): Promise<Response> {
736
+ const status = {
737
+ status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
738
+ api_keys_loaded: this.googleAI.apiKeys.length,
739
+ models_in_cache: this.googleAI.cachedModels.length,
740
+ models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
741
+ };
742
+ return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
743
+ }
744
 
745
  async handleRequest(request: Request): Promise<Response> {
746
  const corsHeaders = {
 
756
  const url = new URL(request.url);
757
  let response: Response;
758
 
759
+ // Handle routes
760
+ if (url.pathname === "/health" || url.pathname === "/status") {
761
+ response = await this.handleStatus();
762
+ } else if (!this.authenticate(request)) {
763
  response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
764
+ // [修改] 添加 TTS 路由
765
+ } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
766
+ response = await this.handleAudioSpeech(request);
767
  } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
768
  response = await this.handleChatCompletions(request);
769
  } else if (url.pathname === "/v1/models" && request.method === "GET") {
770
  response = await this.handleModels();
 
 
771
  } else {
772
  response = new Response("Not Found", { status: 404 });
773
  }
774
 
775
+ // Add CORS headers to all responses
776
+ const finalHeaders = new Headers(response.headers);
777
  for (const [key, value] of Object.entries(corsHeaders)) {
778
+ finalHeaders.set(key, value);
779
  }
780
 
781
+ return new Response(response.body, { status: response.status, headers: finalHeaders });
782
  }
783
  }
784
 
785
  // --- 服务器启动 ---
786
  const server = new OpenAICompatibleServer();
787
 
788
+ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000...");
789
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
790
+ console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
791
+
792
+ // Pre-fetch models at startup
793
+ server.googleAI.fetchOfficialModels().then(models => {
794
+ console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
795
+ }).catch(error => {
796
+ console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
797
+ });
798
+
799
  console.log("\n🔗 Endpoints:");
800
  console.log(" POST /v1/chat/completions");
801
+ console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint"); // [修改] 更新启动日志
802
  console.log(" GET /v1/models");
803
+ console.log(" GET /status");
804
 
805
+ // [修改] 端口从 7860 改为 8000,与日志一致。您可以根据需要改回 7860。
806
  await serve(
807
  (request: Request) => server.handleRequest(request),
808
  { port: 7860 }