xt8 commited on
Commit
ef708c0
·
verified ·
1 Parent(s): 15bd9de

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +519 -239
main.ts CHANGED
@@ -1,4 +1,3 @@
1
- // main.ts
2
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
3
 
4
  // --- 常量定义 ---
@@ -13,7 +12,7 @@ interface OpenAIMessage {
13
  type: string;
14
  text?: string;
15
  image_url?: { url: string };
16
- document?: { url: string; type: string };
17
  }>;
18
  }
19
 
@@ -25,12 +24,13 @@ interface OpenAIRequest {
25
  stream?: boolean;
26
  }
27
 
 
28
  interface OpenAITTSRequest {
29
- model: string;
30
- input: string;
31
  voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
32
- response_format?: 'mp3' | 'opus' | 'aac' | 'flac';
33
- speed?: number;
34
  }
35
 
36
 
@@ -62,57 +62,121 @@ class GoogleAIService {
62
  return key;
63
  }
64
 
 
 
 
 
 
 
65
  private getGoogleVoice(openAIVoice: string): string {
66
  const voiceMap: { [key: string]: string } = {
67
- 'alloy': 'Kore', 'echo': 'Sal', 'fable': 'Polly', 'onyx': 'Onyx',
68
- 'nova': 'Sparkle', 'shimmer': 'Luna', 'default': 'Kore'
 
 
 
 
 
 
69
  };
70
  return voiceMap[openAIVoice] || voiceMap['default'];
71
  }
72
 
 
 
 
 
 
 
 
73
  async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
74
  const apiKey = this.getNextApiKey();
75
  const googleVoice = this.getGoogleVoice(voice);
 
76
  const ttsModel = "gemini-2.5-flash-preview-tts";
77
 
78
  console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
 
79
  const requestBody = {
80
- "contents": [{"parts":[{"text": input}]}],
81
- "generationConfig": {"responseModalities": ["AUDIO"],"speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": googleVoice}}}},
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  "model": ttsModel,
83
  };
84
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`,
85
- { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
 
 
 
 
 
 
 
 
86
  if (!response.ok) {
87
  const errorBody = await response.json().catch(() => response.text());
88
  const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
 
89
  throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
90
  }
 
91
  const data = await response.json();
 
 
92
  const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
93
- if (!audioContentBase64) throw new Error("No audio data returned from Google API.");
 
 
 
 
94
  const binaryString = atob(audioContentBase64);
95
- const bytes = new Uint8Array(binaryString.length);
96
- for (let i = 0; i < binaryString.length; i++) {
 
97
  bytes[i] = binaryString.charCodeAt(i);
98
  }
99
  return bytes.buffer;
100
  }
101
 
 
 
102
  async fetchOfficialModels(): Promise<any[]> {
103
  const now = Date.now();
104
- if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) return this.cachedModels;
 
 
 
105
  const apiKey = this.getNextApiKey();
106
  try {
107
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`);
 
 
 
 
108
  if (!response.ok) {
109
  console.warn(`Failed to fetch models from Google AI: ${response.status}. Using fallback models.`);
110
  return this.getFallbackModels();
111
  }
 
112
  const data = await response.json();
113
  if (data.models && Array.isArray(data.models)) {
114
- this.cachedModels = data.models.filter((model: any) => model.supportedGenerationMethods?.includes('generateContent'));
 
 
115
  this.modelsLastFetch = now;
 
116
  return this.cachedModels;
117
  }
118
  return this.getFallbackModels();
@@ -124,188 +188,338 @@ class GoogleAIService {
124
 
125
  private getFallbackModels(): any[] {
126
  return [
127
- { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens.", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000 },
128
- { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model.", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000 },
129
- { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating images.", supportedGenerationMethods: ["generateContent"], maxTokens: 100000 },
130
- { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model.", id: "gemini-2.5-flash-preview-tts" }
 
131
  ];
132
  }
133
 
134
  public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
135
- public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image-generation');
136
- public isImageEditingModel = (modelName: string): boolean => modelName.includes('image-generation');
137
- public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5');
138
 
139
  private getDocumentType(url: string): string {
140
  const lowerUrl = url.toLowerCase();
141
  if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
142
  if (lowerUrl.startsWith('data:text/plain') || lowerUrl.includes('.txt')) return 'txt';
143
  if (lowerUrl.startsWith('data:text/markdown') || lowerUrl.includes('.md')) return 'md';
 
 
144
  return 'unknown';
145
  }
146
 
147
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
148
  const docType = this.getDocumentType(documentUrl);
149
- if (!documentUrl.startsWith("data:")) throw new Error("Document must be a base64 data URL.");
150
- const [mimeInfo, base64Data] = documentUrl.split(",");
151
- if (base64Data.length * 0.75 > MAX_DOCUMENT_SIZE_BYTES) throw new Error(`Document size exceeds ${MAX_DOCUMENT_SIZE_MB}MB.`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
 
153
  if (docType === 'txt' || docType === 'md') {
154
- return { mimeType, data: base64Data, text: atob(base64Data), docType };
 
 
 
 
 
 
155
  }
156
- return { mimeType: docType === 'pdf' ? 'application/pdf' : mimeType, data: base64Data, docType };
 
 
157
  }
158
 
159
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
160
  if (imageUrl.startsWith("data:image/")) {
161
  const [mimeInfo, base64Data] = imageUrl.split(",");
162
- return { mimeType: mimeInfo.split(":")[1].split(";")[0], data: base64Data };
 
163
  } else if (imageUrl.startsWith("http")) {
164
- throw new Error("URL images are not supported. Please use base64 data URLs.");
 
 
165
  }
166
- return { mimeType: "image/jpeg", data: imageUrl };
167
  }
168
 
169
- private buildGoogleContent(messages: OpenAIMessage[]) {
170
- return messages.map(msg => {
171
- const role = msg.role === "assistant" ? "model" : "user";
172
- if (typeof msg.content === "string") {
173
- return { role, parts: [{ text: msg.content }] };
174
- }
175
- const parts = msg.content.map(part => {
176
- if (part.type === "text") return { text: part.text };
177
- if (part.type === "image_url" && part.image_url) {
178
- const { mimeType, data } = this.extractImageData(part.image_url.url);
179
- return { inlineData: { mimeType, data } };
 
180
  }
181
- if (part.type === "document" && part.document) {
182
- const docData = this.extractDocumentData(part.document.url);
183
- if (docData.docType === 'txt' || docData.docType === 'md') {
184
- return { text: `${docData.docType === 'md' ? 'Markdown' : 'Text'} document content:\n${docData.text}` };
 
 
 
185
  }
186
- if (docData.docType === 'pdf') {
187
- return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
- }
190
- return { text: "" };
 
191
  });
192
- return { role, parts: parts.filter(p => p && (p.text || p.inlineData)) };
193
- });
194
- }
195
 
196
- async generateContent(messages: OpenAIMessage[], modelName: string): Promise<string> {
197
- const apiKey = this.getNextApiKey();
198
- const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
199
- const contents = this.buildGoogleContent(messages);
200
- const requestBody = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } };
201
 
202
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
203
- { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
 
 
 
 
 
 
204
 
205
  if (!response.ok) {
206
- const errorText = await response.text();
207
- throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
 
 
208
  }
 
209
  const data = await response.json();
210
- if (data.promptFeedback?.blockReason) {
211
- throw new Error(`Request blocked by Google. Reason: ${data.promptFeedback.blockReason}`);
 
 
 
212
  }
213
- const candidate = data.candidates?.[0];
214
- if (!candidate) throw new Error("No response generated from Google AI.");
215
- if (candidate.finishReason === "SAFETY") throw new Error("Response blocked for safety reasons.");
216
- return candidate.content?.parts?.[0]?.text || "No text response generated.";
 
 
 
 
 
 
 
 
 
 
217
  }
 
 
 
 
 
 
218
 
219
- /**
220
- * [新增] 真正的流式内容生成函数
221
- * 使用 Google 的 streamGenerateContent 端点进行流式响���处理
222
- */
223
- async * streamGenerateContent(messages: OpenAIMessage[], modelName: string): AsyncGenerator<string> {
224
  const apiKey = this.getNextApiKey();
225
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
226
- const contents = this.buildGoogleContent(messages);
227
- const requestBody = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } };
228
 
229
- // [关键] 使用 :streamGenerateContent 端点
230
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:streamGenerateContent?key=${apiKey}`,
231
- { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- if (!response.ok || !response.body) {
234
- const errorText = await response.text();
235
- throw new Error(`Google AI streaming API error: ${response.status} - ${errorText}`);
 
 
 
236
  }
237
-
238
- // [关键] 读取并解析流
239
- const reader = response.body.getReader();
240
- const decoder = new TextDecoder();
241
- let buffer = "";
242
 
243
- while (true) {
244
- const { done, value } = await reader.read();
245
- if (done) break;
246
-
247
- buffer += decoder.decode(value, { stream: true });
248
-
249
- // Google 的流式响应可能在一个数据包里包含多个以 "data: " 开头的 JSON 对象
250
- const lines = buffer.split('\n');
251
- buffer = lines.pop() || ''; // Keep the last, possibly incomplete, line in buffer
252
-
253
- for (const line of lines) {
254
- if (line.startsWith('data: ')) {
255
- try {
256
- const jsonStr = line.substring(6);
257
- const chunk = JSON.parse(jsonStr);
258
-
259
- if (chunk.error) throw new Error(`Google API stream error: ${chunk.error.message}`);
260
-
261
- const text = chunk.candidates?.[0]?.content?.parts?.[0]?.text;
262
- if (text) {
263
- yield text; // 产生一个文本块
264
- }
265
- } catch (e) {
266
- console.warn("Could not parse stream chunk:", line, e.message);
267
- }
268
- }
269
- }
270
  }
 
 
 
 
 
 
 
 
 
271
  }
272
 
273
- async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
274
  const apiKey = this.getNextApiKey();
275
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
276
  const requestParts: any[] = [{ text: prompt }];
277
- let inputImage;
278
- if (inputImages && inputImages.length > 0) {
279
- inputImage = this.extractImageData(inputImages[0].url);
280
- requestParts.push({ inline_data: { mime_type: inputImage.mimeType, data: inputImage.data } });
 
 
281
  }
282
 
283
  const requestBody = {
284
  contents: [{ parts: requestParts }],
285
  generationConfig: { responseModalities: ["TEXT", "IMAGE"], temperature: 0.7 }
286
  };
287
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
288
- { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
289
 
290
- if (!response.ok) throw new Error(`Image processing failed: ${response.status} - ${await response.text()}`);
 
 
 
 
 
 
 
 
291
  const data = await response.json();
292
- const candidate = data.candidates?.[0];
293
- if (!candidate) throw new Error("No image response from Google.");
294
- if (candidate.finishReason === "SAFETY") throw new Error("Image request blocked for safety reasons.");
295
 
 
 
 
 
 
 
296
  let textResponse = "";
297
  let imageBase64 = "";
298
- (candidate.content?.parts || []).forEach((part: any) => {
299
- if (part.text) textResponse += part.text;
300
- if (part.inlineData?.data || part.inline_data?.data) {
301
- imageBase64 = part.inlineData?.data || part.inline_data.data;
302
- }
303
- });
304
 
305
- let result = "";
306
- if (textResponse) result += textResponse + "\\\\n\\\\n";
307
- if (imageBase64) result += `${inputImage ? 'Edited' : 'Generated'} image:\\\\n"data:image/png;base64,${imageBase64}"`;
308
- return result || "Image processing complete.";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  }
311
 
@@ -320,7 +534,8 @@ class OpenAICompatibleServer {
320
 
321
  private authenticate(request: Request): boolean {
322
  if (!this.authKey) return true;
323
- return request.headers.get("Authorization")?.replace("Bearer ", "") === this.authKey;
 
324
  }
325
 
326
  private isDocumentContent(url?: string): boolean {
@@ -331,114 +546,178 @@ class OpenAICompatibleServer {
331
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
332
  }
333
 
 
 
 
334
  private async handleAudioSpeech(request: Request): Promise<Response> {
335
- const body: OpenAITTSRequest = await request.json();
336
- if (!body.input || !body.voice || !body.model) {
337
- return new Response(JSON.stringify({ error: "Missing required fields: input, voice, model." }), { status: 400 });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  }
339
- const audioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
340
- return new Response(audioBuffer, { headers: { "Content-Type": "audio/mp3" } });
341
  }
342
 
343
  private async handleChatCompletions(request: Request): Promise<Response> {
344
  try {
345
  const body: OpenAIRequest = await request.json();
346
  const requestedModel = body.model || "gemini-1.5-pro";
 
 
347
 
348
- // [修改] 流式请求处理
349
- if (body.stream) {
350
- const googleStream = this.googleAI.streamGenerateContent(body.messages, requestedModel);
351
- const openAIStream = this.streamGoogleResponseAsOpenAI(googleStream, requestedModel);
352
- return new Response(openAIStream, {
353
- headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" }
354
- });
355
- }
356
-
357
- // [保持不变] 非流式请求处理
358
  const lastMessage = body.messages[body.messages.length - 1];
359
- const content = typeof lastMessage.content === "string" ? lastMessage.content : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
360
- const hasDocument = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url)));
361
- const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
 
 
 
 
 
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  let responseText: string;
364
- if (this.googleAI.isImageGenerationModel(requestedModel)) {
365
- const inputImages = hasImages ? body.messages.flatMap(msg => Array.isArray(msg.content) ? msg.content.filter(p => p.type === 'image_url' && p.image_url).map(p => ({ url: p.image_url!.url })) : []) : undefined;
 
 
366
  responseText = await this.googleAI.generateOrEditImage(content, requestedModel, inputImages);
367
- } else if(hasDocument || hasImages) { // Vision/Document models
368
- responseText = await this.googleAI.generateContent(body.messages, requestedModel);
369
- } else { // Standard text
370
- responseText = await this.googleAI.generateContent(body.messages, requestedModel);
 
 
 
 
371
  }
372
 
373
- const responsePayload = {
374
- id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
375
- choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
376
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
377
- };
378
- return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
379
-
 
 
 
 
 
 
380
  } catch (error) {
381
  console.error("Error in chat completions:", error.message);
382
- const status = error.message.includes("exceeds") || error.message.includes("Invalid") ? 400 : 500;
383
- return new Response(JSON.stringify({ error: { message: error.message, type: "api_error" } }),
384
- { status, headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
385
  }
386
  }
387
 
388
- /**
389
- * [新增] 真正的流式响应转换函数
390
- * 将 Google 的数据流转换为 OpenAI 兼容的 Server-Sent Events 流
391
- */
392
- private streamGoogleResponseAsOpenAI(googleStream: AsyncGenerator<string>, modelName: string): ReadableStream<Uint8Array> {
393
  const encoder = new TextEncoder();
394
  const streamId = `chatcmpl-${Date.now()}`;
395
  const creationTime = Math.floor(Date.now() / 1000);
 
396
 
397
  return new ReadableStream({
398
- async start(controller) {
399
- // 发送第一个包含角色的数据块
400
- const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
401
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
402
-
403
- // 迭代从 Google API 收到的文本块并转发
404
- for await (const textChunk of googleStream) {
405
- if (textChunk) {
406
- const chunk = {
407
- id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName,
408
- choices: [{ index: 0, delta: { content: textChunk }, finish_reason: null }]
409
- };
410
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
411
  }
 
 
 
412
  }
413
-
414
- // 发送结束信号
415
- const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
416
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
417
- controller.enqueue(encoder.encode('data: [DONE]\n\n'));
418
- controller.close();
419
- }
420
  });
421
  }
422
 
423
  private async handleModels(): Promise<Response> {
424
- const googleModels = await this.googleAI.fetchOfficialModels();
425
- const fallbackModels = this.googleAI['getFallbackModels']();
426
-
427
- const uniqueModelMap = new Map();
428
- [...googleModels, ...fallbackModels].forEach(model => {
429
- const modelId = model.id || model.name.replace('models/', '');
430
- if (!uniqueModelMap.has(modelId)) {
431
- uniqueModelMap.set(modelId, {
432
- id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google"
433
- });
434
- }
435
- });
436
- return new Response(JSON.stringify({ object: "list", data: Array.from(uniqueModelMap.values()) }), { headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
 
439
  private async handleStatus(): Promise<Response> {
440
  const status = {
441
- status: "healthy", timestamp: new Date().toISOString(), version: "2.5.1",
442
  api_keys_loaded: this.googleAI.apiKeys.length,
443
  models_in_cache: this.googleAI.cachedModels.length,
444
  models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
@@ -460,52 +739,53 @@ class OpenAICompatibleServer {
460
  const url = new URL(request.url);
461
  let response: Response;
462
 
463
- try {
464
- if (url.pathname === "/health" || url.pathname === "/status") {
465
- response = await this.handleStatus();
466
- } else if (!this.authenticate(request)) {
467
- response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
468
- } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
469
- response = await this.handleAudioSpeech(request);
470
- } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
471
- response = await this.handleChatCompletions(request);
472
- } else if (url.pathname === "/v1/models" && request.method === "GET") {
473
- response = await this.handleModels();
474
- } else {
475
- response = new Response("Not Found", { status: 404 });
476
- }
477
- } catch (error) {
478
- console.error("Unhandled error:", error);
479
- response = new Response(JSON.stringify({ error: { message: error.message || "An internal server error occurred." } }), { status: 500 });
480
  }
481
 
482
- // 为所有响应添加CORS
483
  const finalHeaders = new Headers(response.headers);
484
  for (const [key, value] of Object.entries(corsHeaders)) {
485
  finalHeaders.set(key, value);
486
  }
487
- return new Response(response.body, { status: response.status, statusText: response.statusText, headers: finalHeaders });
 
488
  }
489
  }
490
 
491
  // --- 服务器启动 ---
492
  const server = new OpenAICompatibleServer();
493
 
494
- console.log("🚀 OpenAI Compatible Server with Google AI starting...");
495
- console.log(`✅ Loaded ${server['googleAI'].apiKeys.length} API key(s).`);
 
496
 
497
- server['googleAI'].fetchOfficialModels().catch(error => {
498
- console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks.`);
 
 
 
499
  });
500
 
501
- const port = 7860;
502
- console.log(`Server listening on http://localhost:${port}`);
503
  console.log("\n🔗 Endpoints:");
504
- console.log(` POST /v1/chat/completions`);
505
- console.log(` POST /v1/audio/speech`);
506
- console.log(` GET /v1/models`);
507
- console.log(` GET /status`);
508
 
 
509
  await serve(
510
  (request: Request) => server.handleRequest(request),
511
  { port: 7860 }
 
 
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
 
3
  // --- 常量定义 ---
 
12
  type: string;
13
  text?: string;
14
  image_url?: { url: string };
15
+ document?: { url: string; type: string }; // 支持多种文档类型
16
  }>;
17
  }
18
 
 
24
  stream?: boolean;
25
  }
26
 
27
+ // [新增] OpenAI TTS 请求接口定义
28
  interface OpenAITTSRequest {
29
+ model: string; // e.g., 'tts-1', 'tts-1-hd'
30
+ input: string; // The text to synthesize
31
  voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
32
+ response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; // Google TTS returns MP3, so we'll ignore others for now
33
+ speed?: number; // Not directly supported by Gemini TTS, will be ignored
34
  }
35
 
36
 
 
62
  return key;
63
  }
64
 
65
+ // --- [新增] TTS 功能 ---
66
+
67
+ /**
68
+ * 映射 OpenAI 的语音名称到 Google Gemini TTS 的预置语音名称。
69
+ * 参考: https://ai.google.dev/gemini-api/docs/text-to-speech#supported_voices
70
+ */
71
  private getGoogleVoice(openAIVoice: string): string {
72
  const voiceMap: { [key: string]: string } = {
73
+ 'alloy': 'Kore', // A good default, versatile voice
74
+ 'echo': 'Sal', // Another male voice option
75
+ 'fable': 'Polly', // Female, narrative style
76
+ 'onyx': 'Onyx', // Deep, male voice
77
+ 'nova': 'Sparkle', // Energetic female voice
78
+ 'shimmer': 'Luna', // Gentle female voice
79
+ // Fallback to a default if the voice is not in the map
80
+ 'default': 'Kore'
81
  };
82
  return voiceMap[openAIVoice] || voiceMap['default'];
83
  }
84
 
85
+ /**
86
+ * [新增] 调用 Google Gemini TTS API 生成语音。
87
+ * @param input - 要转换为语音的文本。
88
+ * @param model - 请求的模型(在Google端,我们硬编码为TTS模型)。
89
+ * @param voice - OpenAI 格式的语音名称。
90
+ * @returns 返回包含音频数据的 ArrayBuffer。
91
+ */
92
  async generateSpeech(input: string, model: string, voice: string): Promise<ArrayBuffer> {
93
  const apiKey = this.getNextApiKey();
94
  const googleVoice = this.getGoogleVoice(voice);
95
+ // 根据 curl 命令,模型是固定的 TTS 模型
96
  const ttsModel = "gemini-2.5-flash-preview-tts";
97
 
98
  console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
99
+
100
  const requestBody = {
101
+ "contents": [{
102
+ "parts":[{
103
+ "text": input
104
+ }]
105
+ }],
106
+ "generationConfig": {
107
+ "responseModalities": ["AUDIO"],
108
+ "speechConfig": {
109
+ "voiceConfig": {
110
+ "prebuiltVoiceConfig": {
111
+ "voiceName": googleVoice
112
+ }
113
+ }
114
+ }
115
+ },
116
  "model": ttsModel,
117
  };
118
+
119
+ const response = await fetch(
120
+ `https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`,
121
+ {
122
+ method: "POST",
123
+ headers: { "Content-Type": "application/json" },
124
+ body: JSON.stringify(requestBody),
125
+ }
126
+ );
127
+
128
  if (!response.ok) {
129
  const errorBody = await response.json().catch(() => response.text());
130
  const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
131
+ console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
132
  throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
133
  }
134
+
135
  const data = await response.json();
136
+
137
+ // 提取 base64 编码的音频数据
138
  const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
139
+ if (!audioContentBase64) {
140
+ throw new Error("No audio data returned from Google API. The response might be blocked or empty.");
141
+ }
142
+
143
+ // 将 base64 字符串解码为二进制数据 (ArrayBuffer)
144
  const binaryString = atob(audioContentBase64);
145
+ const len = binaryString.length;
146
+ const bytes = new Uint8Array(len);
147
+ for (let i = 0; i < len; i++) {
148
  bytes[i] = binaryString.charCodeAt(i);
149
  }
150
  return bytes.buffer;
151
  }
152
 
153
+ // --- 现有代码保持不变 ---
154
+
155
  async fetchOfficialModels(): Promise<any[]> {
156
  const now = Date.now();
157
+ if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
158
+ return this.cachedModels;
159
+ }
160
+
161
  const apiKey = this.getNextApiKey();
162
  try {
163
+ const response = await fetch(
164
+ `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`,
165
+ { method: "GET", headers: { "Content-Type": "application/json" } }
166
+ );
167
+
168
  if (!response.ok) {
169
  console.warn(`Failed to fetch models from Google AI: ${response.status}. Using fallback models.`);
170
  return this.getFallbackModels();
171
  }
172
+
173
  const data = await response.json();
174
  if (data.models && Array.isArray(data.models)) {
175
+ this.cachedModels = data.models.filter((model: any) =>
176
+ model.supportedGenerationMethods?.includes('generateContent')
177
+ );
178
  this.modelsLastFetch = now;
179
+ console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
180
  return this.cachedModels;
181
  }
182
  return this.getFallbackModels();
 
188
 
189
  private getFallbackModels(): any[] {
190
  return [
191
+ { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
192
+ { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
193
+ { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] },
194
+ // [新增] 在模型列表中添加TTS模型,使其在 /v1/models 接口可见
195
+ { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model for generating high-quality audio.", supportedGenerationMethods: ["generateContent"], id: "gemini-2.5-flash-preview-tts" }
196
  ];
197
  }
198
 
199
  public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
200
+ public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
201
+ public isImageEditingModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
202
+ public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5') || modelName.toLowerCase().includes('pro') || modelName.toLowerCase().includes('flash');
203
 
204
  private getDocumentType(url: string): string {
205
  const lowerUrl = url.toLowerCase();
206
  if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
207
  if (lowerUrl.startsWith('data:text/plain') || lowerUrl.includes('.txt')) return 'txt';
208
  if (lowerUrl.startsWith('data:text/markdown') || lowerUrl.includes('.md')) return 'md';
209
+ if (lowerUrl.startsWith('data:application/msword') || lowerUrl.includes('.doc')) return 'doc';
210
+ if (lowerUrl.startsWith('data:application/vnd.openxmlformats-officedocument.wordprocessingml.document') || lowerUrl.includes('.docx')) return 'docx';
211
  return 'unknown';
212
  }
213
 
214
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
215
  const docType = this.getDocumentType(documentUrl);
216
+
217
+ if (!documentUrl.startsWith("data:")) {
218
+ if (documentUrl.startsWith("http")) {
219
+ throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
220
+ }
221
+ throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
222
+ }
223
+
224
+ const parts = documentUrl.split(",");
225
+ if (parts.length !== 2) {
226
+ throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
227
+ }
228
+ const [mimeInfo, base64Data] = parts;
229
+
230
+ const approxSizeInBytes = base64Data.length * 0.75;
231
+ if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
232
+ throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
233
+ }
234
+
235
  const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
236
+
237
  if (docType === 'txt' || docType === 'md') {
238
+ try {
239
+ const textContent = atob(base64Data);
240
+ return { mimeType, data: base64Data, text: textContent, docType };
241
+ } catch (error) {
242
+ console.error(`Failed to decode base64 content for ${docType}:`, error);
243
+ throw new Error(`Invalid base64 encoding for ${docType} document.`);
244
+ }
245
  }
246
+
247
+ const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
248
+ return { mimeType: finalMimeType, data: base64Data, docType };
249
  }
250
 
251
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
252
  if (imageUrl.startsWith("data:image/")) {
253
  const [mimeInfo, base64Data] = imageUrl.split(",");
254
+ const mimeType = mimeInfo.split(":")[1].split(";")[0];
255
+ return { mimeType, data: base64Data };
256
  } else if (imageUrl.startsWith("http")) {
257
+ throw new Error("URL images are not supported yet. Please provide base64 encoded images.");
258
+ } else {
259
+ return { mimeType: "image/jpeg", data: imageUrl };
260
  }
 
261
  }
262
 
263
+ async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
264
+ const apiKey = this.getNextApiKey();
265
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
266
+ const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
267
+
268
+ console.log(`Processing document with model: ${documentModel}`);
269
+
270
+ let contents;
271
+ try {
272
+ contents = messages.map(msg => {
273
+ if (typeof msg.content === "string") {
274
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
275
  }
276
+
277
+ const messageParts = msg.content.map(part => {
278
+ if (part.type === "text") return { text: part.text };
279
+
280
+ if (part.type === "image_url" && part.image_url) {
281
+ const { mimeType, data } = this.extractImageData(part.image_url.url);
282
+ return { inlineData: { mimeType, data } };
283
  }
284
+
285
+ if (part.type === "document" && part.document) {
286
+ const docData = this.extractDocumentData(part.document.url);
287
+ console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
288
+
289
+ if (docData.docType === 'txt' || docData.docType === 'md') {
290
+ const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
291
+ return { text: `${prefix}${docData.text}` };
292
+ }
293
+ if (docData.docType === 'pdf') {
294
+ return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
295
+ }
296
+ return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
297
  }
298
+ return { text: "" };
299
+ });
300
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
301
  });
302
+ } catch (error) {
303
+ throw error;
304
+ }
305
 
306
+ const requestBody = {
307
+ contents,
308
+ generationConfig: { temperature: 0.7, maxOutputTokens: 8192 }
309
+ };
 
310
 
311
+ const response = await fetch(
312
+ `https://generativelanguage.googleapis.com/v1beta/${documentModel}:generateContent?key=${apiKey}`,
313
+ {
314
+ method: "POST",
315
+ headers: { "Content-Type": "application/json" },
316
+ body: JSON.stringify(requestBody),
317
+ }
318
+ );
319
 
320
  if (!response.ok) {
321
+ const errorBody = await response.json().catch(() => response.text());
322
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
323
+ console.error(`Google API Error: ${response.status} - ${errorMessage}`);
324
+ throw new Error(`Google API request failed with status ${response.status}: ${errorMessage}`);
325
  }
326
+
327
  const data = await response.json();
328
+ const promptFeedback = data.promptFeedback;
329
+ if (promptFeedback && promptFeedback.blockReason) {
330
+ const reason = promptFeedback.blockReason;
331
+ const safetyRatings = promptFeedback.safetyRatings?.map((r: any) => `${r.category}: ${r.probability}`).join(', ') || 'N/A';
332
+ throw new Error(`Request blocked by Google API. Reason: ${reason}. Safety Ratings: [${safetyRatings}]`);
333
  }
334
+
335
+ if (!data.candidates || data.candidates.length === 0) {
336
+ throw new Error("No response generated for document content. The content might be empty or unreadable.");
337
+ }
338
+
339
+ const candidate = data.candidates[0];
340
+ if (candidate.finishReason === "SAFETY") {
341
+ throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
342
+ }
343
+ if (candidate.finishReason === "RECITATION") {
344
+ throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
345
+ }
346
+
347
+ return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
348
  }
349
+
350
+ async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
351
+ const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
352
+ if (hasDocument) {
353
+ return await this.generateContentWithDocument(messages, modelName);
354
+ }
355
 
 
 
 
 
 
356
  const apiKey = this.getNextApiKey();
357
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
 
358
 
359
+ const contents = messages.map(msg => {
360
+ if (typeof msg.content === "string") {
361
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
362
+ } else {
363
+ const messageParts = msg.content.map(part => {
364
+ if (part.type === "text") {
365
+ return { text: part.text };
366
+ } else if (part.type === "image_url" && part.image_url) {
367
+ const imageData = part.image_url.url;
368
+ if (imageData.startsWith("data:image/")) {
369
+ const { mimeType, data } = this.extractImageData(imageData);
370
+ return { inlineData: { mimeType, data } };
371
+ } else {
372
+ return { fileData: { mimeType: "image/jpeg", fileUri: imageData } };
373
+ }
374
+ }
375
+ return { text: "" };
376
+ });
377
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts };
378
+ }
379
+ });
380
 
381
+ const requestBody: any = {
382
+ contents,
383
+ generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
384
+ };
385
+ if (enableSearch) {
386
+ requestBody.tools = [{ googleSearchRetrieval: {} }];
387
  }
 
 
 
 
 
388
 
389
+ const response = await fetch(
390
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
391
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
392
+ );
393
+
394
+ if (!response.ok) {
395
+ const errorText = await response.text();
396
+ throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  }
398
+ const data = await response.json();
399
+ if (!data.candidates || data.candidates.length === 0) {
400
+ throw new Error("No response generated from Google AI");
401
+ }
402
+ const candidate = data.candidates[0];
403
+ if (candidate.finishReason === "SAFETY") {
404
+ throw new Error("Response blocked due to safety filters");
405
+ }
406
+ return candidate.content?.parts[0]?.text || "No response generated";
407
  }
408
 
409
+ async generateOrEditImageWithGemini(prompt: string, modelName: string = "gemini-2.0-flash-preview-image-generation", inputImage?: { mimeType: string; data: string }): Promise<{ text?: string; imageBase64?: string; imageUrl?: string }> {
410
  const apiKey = this.getNextApiKey();
411
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
412
  const requestParts: any[] = [{ text: prompt }];
413
+
414
+ if (inputImage) {
415
+ requestParts.push({ inline_data: { mime_type: inputImage.mimeType, data: inputImage.data } });
416
+ console.log(`Editing image with model: ${fullModelName}`);
417
+ } else {
418
+ console.log(`Generating image with model: ${fullModelName}`);
419
  }
420
 
421
  const requestBody = {
422
  contents: [{ parts: requestParts }],
423
  generationConfig: { responseModalities: ["TEXT", "IMAGE"], temperature: 0.7 }
424
  };
 
 
425
 
426
+ const response = await fetch(
427
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
428
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
429
+ );
430
+
431
+ if (!response.ok) {
432
+ const errorText = await response.text();
433
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
434
+ }
435
  const data = await response.json();
436
+ if (!data.candidates || data.candidates.length === 0) {
437
+ throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
438
+ }
439
 
440
+ const candidate = data.candidates[0];
441
+ if (candidate.finishReason === "SAFETY") {
442
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} blocked due to safety filters`);
443
+ }
444
+
445
+ const responseParts = candidate.content?.parts || [];
446
  let textResponse = "";
447
  let imageBase64 = "";
 
 
 
 
 
 
448
 
449
+ for (const part of responseParts) {
450
+ if (part.text) textResponse += part.text;
451
+ if (part.inlineData?.data) imageBase64 = part.inlineData.data;
452
+ if (part.inline_data?.data) imageBase64 = part.inline_data.data;
453
+ }
454
+
455
+ const result: { text?: string; imageBase64?: string; imageUrl?: string } = {};
456
+ if (textResponse) result.text = textResponse;
457
+ if (imageBase64) {
458
+ result.imageBase64 = imageBase64;
459
+ result.imageUrl = `data:image/png;base64,${imageBase64}`;
460
+ }
461
+ return result;
462
+ }
463
+
464
+ async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
465
+ const apiKey = this.getNextApiKey();
466
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
467
+ const contents = messages.map(msg => ({ role: msg.role === 'assistant' ? 'model' : 'user', parts: [{ text: typeof msg.content === 'string' ? msg.content : '' }] }));
468
+
469
+ const requestBody = {
470
+ contents,
471
+ tools: [{ googleSearch: {} }],
472
+ generationConfig: { temperature: 0.7, maxOutputTokens: 4096 }
473
+ };
474
+
475
+ const response = await fetch(
476
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
477
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
478
+ );
479
+
480
+ if (!response.ok) {
481
+ console.warn(`Google Search API failed: ${response.status}, trying alternative.`);
482
+ return await this.generateContentWithSearchPrompt(messages, modelName);
483
+ }
484
+
485
+ const data = await response.json();
486
+ if (!data.candidates || data.candidates.length === 0) {
487
+ return await this.generateContentWithSearchPrompt(messages, modelName);
488
+ }
489
+
490
+ const candidate = data.candidates[0];
491
+ if (candidate.finishReason === "SAFETY") {
492
+ throw new Error("Response blocked due to safety filters");
493
+ }
494
+ return candidate.content?.parts[0]?.text || "No response generated";
495
+ }
496
+
497
+ async generateContentWithSearchPrompt(messages: OpenAIMessage[], modelName: string): Promise<string> {
498
+ const enhancedMessages = [...messages];
499
+ const lastMessage = enhancedMessages[enhancedMessages.length - 1];
500
+ if (typeof lastMessage.content === "string") {
501
+ lastMessage.content = `Please provide the most current and accurate information available about: ${lastMessage.content}.`;
502
+ }
503
+ return await this.generateContent(enhancedMessages, modelName, false);
504
+ }
505
+
506
+ async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
507
+ if (this.isImageGenerationModel(modelName)) {
508
+ try {
509
+ let inputImage: { mimeType: string; data: string } | undefined;
510
+ if (inputImages && inputImages.length > 0) {
511
+ inputImage = this.extractImageData(inputImages[0].url);
512
+ }
513
+ const result = await this.generateOrEditImageWithGemini(prompt, modelName, inputImage);
514
+ let response = "";
515
+ if (result.text) response += result.text + "\\\\n\\\\n";
516
+ if (result.imageUrl) response += `${inputImage ? 'Edited' : 'Generated'} image:\\\\n${result.imageUrl}`;
517
+ return response || `Image processing complete.`;
518
+ } catch (error) {
519
+ return `Image processing failed: ${error.message}`;
520
+ }
521
+ }
522
+ return `Model ${modelName} does not support image generation. Use a model like gemini-2.0-flash-preview-image-generation.`;
523
  }
524
  }
525
 
 
534
 
535
  private authenticate(request: Request): boolean {
536
  if (!this.authKey) return true;
537
+ const authHeader = request.headers.get("Authorization");
538
+ return authHeader ? authHeader.replace("Bearer ", "") === this.authKey : false;
539
  }
540
 
541
  private isDocumentContent(url?: string): boolean {
 
546
  lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
547
  }
548
 
549
+ /**
550
+ * [新增] 处理 OpenAI 兼容的 TTS 请求
551
+ */
552
  private async handleAudioSpeech(request: Request): Promise<Response> {
553
+ try {
554
+ const body: OpenAITTSRequest = await request.json();
555
+
556
+ if (!body.input || !body.voice || !body.model) {
557
+ return new Response(JSON.stringify({ error: { message: "Missing required fields: input, voice, and model.", type: "invalid_request_error" } }), { status: 400 });
558
+ }
559
+
560
+ const audioBuffer = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
561
+
562
+ // Google Gemini TTS API 生成的是 MP3 格式的音频。
563
+ return new Response(audioBuffer, {
564
+ headers: {
565
+ "Content-Type": "audio/mp3",
566
+ "Access-Control-Allow-Origin": "*",
567
+ }
568
+ });
569
+ } catch (error) {
570
+ console.error("Error in audio speech generation:", error.message);
571
+ return new Response(
572
+ JSON.stringify({
573
+ error: {
574
+ message: error.message,
575
+ type: "api_error",
576
+ code: null
577
+ }
578
+ }),
579
+ { status: 500, headers: { "Content-Type": "application/json" } }
580
+ );
581
  }
 
 
582
  }
583
 
584
  private async handleChatCompletions(request: Request): Promise<Response> {
585
  try {
586
  const body: OpenAIRequest = await request.json();
587
  const requestedModel = body.model || "gemini-1.5-pro";
588
+ const stream = body.stream || false;
589
+ console.log(`Request for model: ${requestedModel}, stream: ${stream}`);
590
 
 
 
 
 
 
 
 
 
 
 
591
  const lastMessage = body.messages[body.messages.length - 1];
592
+ const content = typeof lastMessage.content === "string"
593
+ ? lastMessage.content
594
+ : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
595
+
596
+ const hasDocument = body.messages.some(msg =>
597
+ Array.isArray(msg.content) &&
598
+ msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url))
599
+ );
600
 
601
+ const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
602
+
603
+ let inputImages: any[] = [];
604
+ if (hasImages) {
605
+ body.messages.forEach(msg => {
606
+ if (Array.isArray(msg.content)) {
607
+ msg.content.forEach(part => {
608
+ if (part.type === "image_url" && part.image_url) inputImages.push({ url: part.image_url.url });
609
+ });
610
+ }
611
+ });
612
+ }
613
+
614
  let responseText: string;
615
+
616
+ if (hasDocument) {
617
+ responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
618
+ } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
619
  responseText = await this.googleAI.generateOrEditImage(content, requestedModel, inputImages);
620
+ } else if (this.googleAI.isImageGenerationModel(requestedModel)) {
621
+ responseText = await this.googleAI.generateOrEditImage(content, requestedModel);
622
+ } else if (content.toLowerCase().startsWith("/search:")) {
623
+ const query = content.substring(8).trim();
624
+ const searchMessages = [{ ...lastMessage, content: query }];
625
+ responseText = await this.googleAI.generateContentWithGrounding(searchMessages, requestedModel);
626
+ } else {
627
+ responseText = await this.googleAI.generateContent(body.messages, requestedModel, false);
628
  }
629
 
630
+ if (stream) {
631
+ const streamResponse = await this.streamStringAsOpenAIResponse(responseText, requestedModel);
632
+ return new Response(streamResponse, {
633
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", "Access-Control-Allow-Origin": "*" }
634
+ });
635
+ } else {
636
+ const responsePayload = {
637
+ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
638
+ choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
639
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
640
+ };
641
+ return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
642
+ }
643
  } catch (error) {
644
  console.error("Error in chat completions:", error.message);
645
+ const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
646
+ return new Response(
647
+ JSON.stringify({
648
+ error: {
649
+ message: error.message,
650
+ type: status === 400 ? "invalid_request_error" : "api_error",
651
+ code: null
652
+ }
653
+ }),
654
+ { status, headers: { "Content-Type": "application/json" } }
655
+ );
656
  }
657
  }
658
 
659
+ private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
 
 
 
 
660
  const encoder = new TextEncoder();
661
  const streamId = `chatcmpl-${Date.now()}`;
662
  const creationTime = Math.floor(Date.now() / 1000);
663
+ let contentQueue = content.split('');
664
 
665
  return new ReadableStream({
666
+ start(controller) {
667
+ const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
668
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
669
+ },
670
+ pull(controller) {
671
+ if (contentQueue.length === 0) {
672
+ const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
673
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
674
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
675
+ controller.close();
676
+ return;
 
 
677
  }
678
+ const char = contentQueue.shift();
679
+ const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
680
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
681
  }
 
 
 
 
 
 
 
682
  });
683
  }
684
 
685
  private async handleModels(): Promise<Response> {
686
+ try {
687
+ const googleModels = await this.googleAI.fetchOfficialModels();
688
+ const fallbackModels = this.googleAI['getFallbackModels'](); // Access private method for a complete list
689
+
690
+ const allModels = [...googleModels, ...fallbackModels];
691
+ const uniqueModelMap = new Map();
692
+ allModels.forEach(model => {
693
+ const modelId = model.id || model.name.replace('models/', '');
694
+ if (!uniqueModelMap.has(modelId)) {
695
+ uniqueModelMap.set(modelId, {
696
+ id: modelId,
697
+ object: "model",
698
+ created: Math.floor(Date.now() / 1000),
699
+ owned_by: "google",
700
+ description: model.description || model.displayName,
701
+ maxTokens: model.inputTokenLimit || model.maxTokens
702
+ });
703
+ }
704
+ });
705
+
706
+ const models = {
707
+ object: "list",
708
+ data: Array.from(uniqueModelMap.values()),
709
+ };
710
+
711
+ return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
712
+ } catch (error) {
713
+ console.error("Error fetching models:", error);
714
+ return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
715
+ }
716
  }
717
 
718
  private async handleStatus(): Promise<Response> {
719
  const status = {
720
+ status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
721
  api_keys_loaded: this.googleAI.apiKeys.length,
722
  models_in_cache: this.googleAI.cachedModels.length,
723
  models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
 
739
  const url = new URL(request.url);
740
  let response: Response;
741
 
742
+ // Handle routes
743
+ if (url.pathname === "/health" || url.pathname === "/status") {
744
+ response = await this.handleStatus();
745
+ } else if (!this.authenticate(request)) {
746
+ response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
747
+ // [修改] 添加 TTS 路由
748
+ } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
749
+ response = await this.handleAudioSpeech(request);
750
+ } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
751
+ response = await this.handleChatCompletions(request);
752
+ } else if (url.pathname === "/v1/models" && request.method === "GET") {
753
+ response = await this.handleModels();
754
+ } else {
755
+ response = new Response("Not Found", { status: 404 });
 
 
 
756
  }
757
 
758
+ // Add CORS headers to all responses
759
  const finalHeaders = new Headers(response.headers);
760
  for (const [key, value] of Object.entries(corsHeaders)) {
761
  finalHeaders.set(key, value);
762
  }
763
+
764
+ return new Response(response.body, { status: response.status, headers: finalHeaders });
765
  }
766
  }
767
 
768
  // --- 服务器启动 ---
769
  const server = new OpenAICompatibleServer();
770
 
771
+ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000...");
772
+ console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
773
+ console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
774
 
775
+ // Pre-fetch models at startup
776
+ server.googleAI.fetchOfficialModels().then(models => {
777
+ console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
778
+ }).catch(error => {
779
+ console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
780
  });
781
 
 
 
782
  console.log("\n🔗 Endpoints:");
783
+ console.log(" POST /v1/chat/completions");
784
+ console.log(" POST /v1/audio/speech <-- [NEW] OpenAI TTS compatible endpoint"); // [修改] 更新启动日志
785
+ console.log(" GET /v1/models");
786
+ console.log(" GET /status");
787
 
788
+ // [修改] 端口从 7860 改为 8000,与日志一致。您可以根据需要改回 7860。
789
  await serve(
790
  (request: Request) => server.handleRequest(request),
791
  { port: 7860 }