xt8 commited on
Commit
265497c
·
verified ·
1 Parent(s): f15a5a0

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +311 -178
main.ts CHANGED
@@ -1,11 +1,11 @@
1
- // main.ts
2
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
 
3
  import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
4
 
5
  // --- 常量定义 ---
6
  const MAX_DOCUMENT_SIZE_MB = 20;
7
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
8
- const MODELS_CACHE_DURATION = 60000;
9
 
10
  // --- 接口定义 ---
11
  interface OpenAIMessage {
@@ -26,15 +26,15 @@ interface OpenAIRequest {
26
  stream?: boolean;
27
  }
28
 
 
29
  interface OpenAITTSRequest {
30
- model: string;
31
- input: string;
32
- voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
33
- response_format?: 'mp3' | 'opus' | 'aac' | 'flac';
34
- speed?: number;
35
  }
36
 
37
-
38
  class GoogleAIService {
39
  public apiKeys: string[];
40
  public currentKeyIndex = 0;
@@ -45,12 +45,16 @@ class GoogleAIService {
45
  this.apiKeys = [];
46
  let i = 1;
47
  while (true) {
48
- const key = Deno.env.get(`GOOGLE_AI_KEY_${i}`) || (i === 1 ? Deno.env.get("GOOGLE_AI_KEY") : null);
 
49
  if (!key) break;
50
  this.apiKeys.push(key);
51
  i++;
52
  }
53
- if (this.apiKeys.length === 0) throw new Error("No Google AI API keys found in environment variables.");
 
 
 
54
  }
55
 
56
  private getNextApiKey(): string {
@@ -58,171 +62,209 @@ class GoogleAIService {
58
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
59
  return key;
60
  }
61
-
62
- private getGoogleVoice(openAIVoice: string): string {
63
- const voiceMap: { [key: string]: string } = { 'alloy': 'Kore', 'echo': 'Sal', 'fable': 'Polly', 'onyx': 'Onyx', 'nova': 'Sparkle', 'shimmer': 'Luna', 'default': 'Kore' };
64
- return voiceMap[openAIVoice] || voiceMap['default'];
65
- }
66
 
 
67
  /**
68
- * [已修复] 修正了发送给 Google API 的请求体,移除了多余的 `model` 字段,
69
- * 确保 API 能正确识别并处理频生成请求。
70
- * 返回类型为 Promise<Uint8Array>。
 
71
  */
72
- async generateSpeech(input: string, model: string, voice: string): Promise<Uint8Array> {
73
  const apiKey = this.getNextApiKey();
74
- const googleVoice = this.getGoogleVoice(voice);
75
- // 注意:这里的 model 参数 (来自OpenAI请求) 目前未被使用,因为 Gemini TTS 模型是硬编码的。
76
- const ttsModel = "gemini-2.5-flash-preview-tts";
77
 
78
- console.log(`Generating speech with model: ${ttsModel}, voice: ${googleVoice} (mapped from OpenAI's '${voice}')`);
79
-
80
- // [关键修复]:从请求体中移除了 "model" 字段。该字段应在URL中指定,而不是在body中。
81
  const requestBody = {
82
- "contents": [{ "parts": [{ "text": input }] }],
83
- "generationConfig": {
84
- "responseModalities": ["AUDIO"],
85
- "speechConfig": {
86
- "voiceConfig": {
87
- "prebuiltVoiceConfig": { "voiceName": googleVoice }
88
- }
89
- }
90
- }
91
  };
92
 
93
- const url = `https://generativelanguage.googleapis.com/v1beta/models/${ttsModel}:generateContent?key=${apiKey}`;
94
-
95
- const response = await fetch(url, {
 
96
  method: "POST",
97
  headers: { "Content-Type": "application/json" },
98
- body: JSON.stringify(requestBody)
99
- });
 
100
 
101
  if (!response.ok) {
102
- const errorBody = await response.json().catch(() => response.text());
103
- const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
104
- throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
 
105
  }
106
 
107
  const data = await response.json();
108
-
109
- // 检查响应中是否真的包含了音频数据
110
- const audioContentBase64 = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
111
- if (!audioContentBase64) {
112
- // 如果没有音频数据,打印出完整的响应以供调试
113
- console.error("No audio data returned from Google API. Full response:", JSON.stringify(data, null, 2));
114
- throw new Error("No audio data returned from Google API. The response might contain an error or be in an unexpected format.");
115
  }
116
-
117
- // 使用 Deno 标准库的 decode 函数,返回一个 Uint8Array
118
- const audioBytes = decode(audioContentBase64);
119
- return audioBytes;
120
  }
121
-
122
  async fetchOfficialModels(): Promise<any[]> {
123
  const now = Date.now();
124
- if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) return this.cachedModels;
 
 
 
125
  const apiKey = this.getNextApiKey();
126
  try {
127
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`);
 
 
 
 
128
  if (!response.ok) {
129
- console.warn(`Failed to fetch models: ${response.status}. Using fallbacks.`);
130
  return this.getFallbackModels();
131
  }
 
132
  const data = await response.json();
133
  if (data.models && Array.isArray(data.models)) {
134
- this.cachedModels = data.models.filter((model: any) => model.supportedGenerationMethods?.includes('generateContent'));
 
 
135
  this.modelsLastFetch = now;
 
136
  return this.cachedModels;
137
  }
138
  return this.getFallbackModels();
139
  } catch (error) {
140
- console.warn("Error fetching models:", error.message, ". Using fallbacks.");
141
  return this.getFallbackModels();
142
  }
143
  }
144
 
145
  private getFallbackModels(): any[] {
146
  return [
147
- { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model.", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000 },
148
- { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model.", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000 },
149
- { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Image generation model.", supportedGenerationMethods: ["generateContent"], maxTokens: 100000 },
150
- { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Text-to-speech model.", id: "gemini-2.5-flash-preview-tts" }
151
  ];
152
  }
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
155
  if (imageUrl.startsWith("data:image/")) {
156
  const [mimeInfo, base64Data] = imageUrl.split(",");
157
- return { mimeType: mimeInfo.split(":")[1].split(";")[0], data: base64Data };
 
158
  } else if (imageUrl.startsWith("http")) {
159
- throw new Error("URL images not supported. Use base64 data URLs.");
 
 
160
  }
161
- return { mimeType: "image/jpeg", data: imageUrl };
162
  }
163
 
164
- private buildGoogleContent(messages: OpenAIMessage[]) {
165
- return messages.map(msg => {
166
- const role = msg.role === "assistant" ? "model" : "user";
167
- if (typeof msg.content === "string") return { role, parts: [{ text: msg.content }] };
168
- const parts = msg.content.map(part => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  if (part.type === "text") return { text: part.text };
170
  if (part.type === "image_url" && part.image_url) {
171
- const { mimeType, data } = this.extractImageData(part.image_url.url);
172
- return { inlineData: { mimeType, data } };
 
173
  }
174
  return { text: "" };
175
  });
176
- return { role, parts: parts.filter(p => p && p.text) };
177
  });
178
- }
179
-
180
- async generateContent(messages: OpenAIMessage[], modelName: string): Promise<string> {
181
- const apiKey = this.getNextApiKey();
182
- const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
183
- const contents = this.buildGoogleContent(messages);
184
- const requestBody = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } };
185
  const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
186
- if (!response.ok) throw new Error(`Google API error: ${response.status} - ${await response.text()}`);
187
  const data = await response.json();
188
- if (data.promptFeedback?.blockReason) throw new Error(`Request blocked. Reason: ${data.promptFeedback.blockReason}`);
189
- const candidate = data.candidates?.[0];
190
- if (!candidate) throw new Error("No response from Google AI.");
191
- if (candidate.finishReason === "SAFETY") throw new Error("Response blocked for safety reasons.");
192
- return candidate.content?.parts?.[0]?.text || "";
193
  }
194
 
195
- async * streamGenerateContent(messages: OpenAIMessage[], modelName: string): AsyncGenerator<string> {
196
- const apiKey = this.getNextApiKey();
197
- const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
198
- const contents = this.buildGoogleContent(messages);
199
- const requestBody = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } };
200
- const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:streamGenerateContent?key=${apiKey}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
201
- if (!response.ok || !response.body) throw new Error(`Google streaming API error: ${response.status} - ${await response.text()}`);
202
- const reader = response.body.getReader();
203
- const decoder = new TextDecoder();
204
- let buffer = "";
205
- while (true) {
206
- const { done, value } = await reader.read();
207
- if (done) break;
208
- buffer += decoder.decode(value, { stream: true });
209
- const lines = buffer.split('\n');
210
- buffer = lines.pop() || '';
211
- for (const line of lines) {
212
- if (line.startsWith('data: ')) {
213
- try {
214
- const jsonStr = line.substring(6);
215
- const chunk = JSON.parse(jsonStr);
216
- if (chunk.error) throw new Error(`Google stream error: ${chunk.error.message}`);
217
- const text = chunk.candidates?.[0]?.content?.parts?.[0]?.text;
218
- if (text) yield text;
219
- } catch (e) {
220
- console.warn("Could not parse stream chunk:", line, e.message);
221
- }
222
- }
223
- }
224
- }
225
- }
226
  }
227
 
228
  class OpenAICompatibleServer {
@@ -236,102 +278,193 @@ class OpenAICompatibleServer {
236
 
237
  private authenticate(request: Request): boolean {
238
  if (!this.authKey) return true;
239
- return request.headers.get("Authorization")?.replace("Bearer ", "") === this.authKey;
 
240
  }
241
 
 
 
 
 
 
 
 
 
 
242
  private async handleAudioSpeech(request: Request): Promise<Response> {
243
- const body: OpenAITTSRequest = await request.json();
244
- if (!body.input || !body.voice || !body.model) {
245
- return new Response(JSON.stringify({ error: "Missing required fields" }), { status: 400 });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  }
247
- const audioData = await this.googleAI.generateSpeech(body.input, body.model, body.voice);
248
- // 直接使用 Uint8Array 创建 Response,并设置正确的 Content-Type
249
- return new Response(audioData, { headers: { "Content-Type": "audio/mpeg" } });
250
  }
251
-
252
  private async handleChatCompletions(request: Request): Promise<Response> {
253
- const body: OpenAIRequest = await request.json();
254
- const requestedModel = body.model || "gemini-1.5-pro";
 
 
 
255
 
256
- if (body.stream) {
257
- const googleStream = this.googleAI.streamGenerateContent(body.messages, requestedModel);
258
- const openAIStream = this.streamGoogleResponseAsOpenAI(googleStream, requestedModel);
259
- return new Response(openAIStream, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" } });
260
- }
 
261
 
262
- const responseText = await this.googleAI.generateContent(body.messages, requestedModel);
263
- const responsePayload = {
264
- id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
265
- choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
266
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
267
- };
268
- return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  }
270
 
271
- private streamGoogleResponseAsOpenAI(googleStream: AsyncGenerator<string>, modelName: string): ReadableStream<Uint8Array> {
272
  const encoder = new TextEncoder();
273
  const streamId = `chatcmpl-${Date.now()}`;
274
  const creationTime = Math.floor(Date.now() / 1000);
 
 
275
  return new ReadableStream({
276
- async start(controller) {
277
- controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] })}\n\n`));
278
- for await (const textChunk of googleStream) {
279
- if (textChunk) {
280
- controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: textChunk }, finish_reason: null }] })}\n\n`));
 
 
 
 
 
 
281
  }
 
 
 
282
  }
283
- controller.enqueue(encoder.encode(`data: ${JSON.stringify({ id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] })}\n\n`));
284
- controller.enqueue(encoder.encode('data: [DONE]\n\n'));
285
- controller.close();
286
- }
287
  });
288
  }
289
 
290
  private async handleModels(): Promise<Response> {
291
- const googleModels = await this.googleAI.fetchOfficialModels();
292
- const fallbackModels = this.googleAI['getFallbackModels']();
293
- const uniqueModelMap = new Map();
294
- [...googleModels, ...fallbackModels].forEach(model => {
295
- const modelId = model.id || model.name.replace('models/', '');
296
- if (!uniqueModelMap.has(modelId)) {
297
- uniqueModelMap.set(modelId, { id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google" });
298
- }
299
- });
300
- return new Response(JSON.stringify({ object: "list", data: Array.from(uniqueModelMap.values()) }), { headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
301
  }
302
 
303
  private async handleStatus(): Promise<Response> {
304
- return new Response(JSON.stringify({ status: "healthy", timestamp: new Date().toISOString(), version: "2.5.2" }), { headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
305
  }
306
 
307
  async handleRequest(request: Request): Promise<Response> {
308
- const corsHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "GET, POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type, Authorization" };
 
 
 
 
 
309
  if (request.method === "OPTIONS") return new Response(null, { headers: corsHeaders });
310
 
311
  const url = new URL(request.url);
312
  let response: Response;
313
 
314
- try {
315
- if (url.pathname === "/health" || url.pathname === "/status") response = await this.handleStatus();
316
- else if (!this.authenticate(request)) response = new Response(JSON.stringify({ error: "Unauthorized" }), { status: 401 });
317
- else if (url.pathname === "/v1/audio/speech" && request.method === "POST") response = await this.handleAudioSpeech(request);
318
- else if (url.pathname === "/v1/chat/completions" && request.method === "POST") response = await this.handleChatCompletions(request);
319
- else if (url.pathname === "/v1/models" && request.method === "GET") response = await this.handleModels();
320
- else response = new Response("Not Found", { status: 404 });
321
- } catch (error) {
322
- console.error("Unhandled error:", error);
323
- response = new Response(JSON.stringify({ error: { message: error.message || "Internal server error." } }), { status: 500 });
 
 
324
  }
325
 
326
  const finalHeaders = new Headers(response.headers);
327
- for (const [key, value] of Object.entries(corsHeaders)) finalHeaders.set(key, value);
328
- return new Response(response.body, { status: response.status, statusText: response.statusText, headers: finalHeaders });
329
  }
330
  }
331
 
332
  // --- 服务器启动 ---
333
  const server = new OpenAICompatibleServer();
334
- const port = 7860;
335
- console.log(`🚀 Server starting on http://localhost:${port}`);
336
- server['googleAI'].fetchOfficialModels().catch(e => console.warn(`⚠️ Could not pre-fetch models: ${e.message}`));
337
- await serve((req: Request) => server.handleRequest(req), { port });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
+ // [新增] 引入 base64 解码模块,用于处理TTS响应
3
  import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
4
 
5
  // --- 常量定义 ---
6
  const MAX_DOCUMENT_SIZE_MB = 20;
7
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
8
+ const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
9
 
10
  // --- 接口定义 ---
11
  interface OpenAIMessage {
 
26
  stream?: boolean;
27
  }
28
 
29
+ // [新增] OpenAI TTS 请求接口
30
  interface OpenAITTSRequest {
31
+ model: 'tts-1' | 'tts-1-hd'; // 兼容OpenAI的模型名称
32
+ input: string;
33
+ voice: string; // 直接使用Gemini/Google Cloud TTS原生的voice name, e.g., "en-US-News-N"
34
+ response_format?: 'mp3' | 'opus' | 'aac' | 'flac'; // Google Cloud TTS支持多种格式, 我们默认为MP3
35
+ speed?: number; // Google Cloud TTS支持, 但为简化此处忽略该参数
36
  }
37
 
 
38
  class GoogleAIService {
39
  public apiKeys: string[];
40
  public currentKeyIndex = 0;
 
45
  this.apiKeys = [];
46
  let i = 1;
47
  while (true) {
48
+ const key = Deno.env.get(`GOOGLE_AI_KEY_${i}`) ||
49
+ (i === 1 ? Deno.env.get("GOOGLE_AI_KEY") : null);
50
  if (!key) break;
51
  this.apiKeys.push(key);
52
  i++;
53
  }
54
+
55
+ if (this.apiKeys.length === 0) {
56
+ throw new Error("No Google AI API keys found in environment variables (e.g., GOOGLE_AI_KEY_1, GOOGLE_AI_KEY)");
57
+ }
58
  }
59
 
60
  private getNextApiKey(): string {
 
62
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
63
  return key;
64
  }
 
 
 
 
 
65
 
66
+ // --- [新增] TTS 实现 ---
67
  /**
68
+ * 使用Google Cloud Text-to-Speech API合成语音
69
+ * @param input - 要转换为语的文本
70
+ * @param voiceName - Google原生的语音名称, e.g., "en-US-Standard-A", "en-GB-News-G"
71
+ * @returns 返回原始的MP3音频数据的Uint8Array
72
  */
73
+ async synthesizeSpeech(input: string, voiceName: string): Promise<Uint8Array> {
74
  const apiKey = this.getNextApiKey();
75
+ console.log(`Synthesizing speech with voice: ${voiceName}`);
 
 
76
 
 
 
 
77
  const requestBody = {
78
+ "input": { "text": input },
79
+ "voice": { "name": voiceName },
80
+ "audioConfig": { "audioEncoding": "MP3" } // 默认使用MP3格式,与OpenAI兼容
 
 
 
 
 
 
81
  };
82
 
83
+ // 注意:这里使用的是 Google Cloud Text-to-Speech API 的端点
84
+ const response = await fetch(
85
+ `https://texttospeech.googleapis.com/v1beta/text:synthesize?key=${apiKey}`,
86
+ {
87
  method: "POST",
88
  headers: { "Content-Type": "application/json" },
89
+ body: JSON.stringify(requestBody),
90
+ }
91
+ );
92
 
93
  if (!response.ok) {
94
+ const errorBody = await response.json().catch(() => response.text());
95
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
96
+ console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
97
+ throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
98
  }
99
 
100
  const data = await response.json();
101
+ if (!data.audioContent) {
102
+ throw new Error("TTS synthesis failed, no audio content in response.");
 
 
 
 
 
103
  }
104
+
105
+ // Google API返回的是Base64编码的字符串,需要解码成二进制数据
106
+ return decode(data.audioContent);
 
107
  }
108
+
109
  async fetchOfficialModels(): Promise<any[]> {
110
  const now = Date.now();
111
+ if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
112
+ return this.cachedModels;
113
+ }
114
+
115
  const apiKey = this.getNextApiKey();
116
  try {
117
+ const response = await fetch(
118
+ `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`,
119
+ { method: "GET", headers: { "Content-Type": "application/json" } }
120
+ );
121
+
122
  if (!response.ok) {
123
+ console.warn(`Failed to fetch models from Google AI: ${response.status}. Using fallback models.`);
124
  return this.getFallbackModels();
125
  }
126
+
127
  const data = await response.json();
128
  if (data.models && Array.isArray(data.models)) {
129
+ this.cachedModels = data.models.filter((model: any) =>
130
+ model.supportedGenerationMethods?.includes('generateContent')
131
+ );
132
  this.modelsLastFetch = now;
133
+ console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
134
  return this.cachedModels;
135
  }
136
  return this.getFallbackModels();
137
  } catch (error) {
138
+ console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
139
  return this.getFallbackModels();
140
  }
141
  }
142
 
143
  private getFallbackModels(): any[] {
144
  return [
145
+ { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
146
+ { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
147
+ { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] }
 
148
  ];
149
  }
150
 
151
+ public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
152
+ public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
153
+ public isImageEditingModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
154
+ public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5') || modelName.toLowerCase().includes('pro') || modelName.toLowerCase().includes('flash');
155
+
156
+ private getDocumentType(url: string): string {
157
+ const lowerUrl = url.toLowerCase();
158
+ if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
159
+ if (lowerUrl.startsWith('data:text/plain') || lowerUrl.includes('.txt')) return 'txt';
160
+ if (lowerUrl.startsWith('data:text/markdown') || lowerUrl.includes('.md')) return 'md';
161
+ if (lowerUrl.startsWith('data:application/msword') || lowerUrl.includes('.doc')) return 'doc';
162
+ if (lowerUrl.startsWith('data:application/vnd.openxmlformats-officedocument.wordprocessingml.document') || lowerUrl.includes('.docx')) return 'docx';
163
+ return 'unknown';
164
+ }
165
+
166
+ private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
167
+ const docType = this.getDocumentType(documentUrl);
168
+ if (!documentUrl.startsWith("data:")) throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
169
+ const parts = documentUrl.split(",");
170
+ if (parts.length !== 2) throw new Error("Invalid data URL format for document.");
171
+ const [mimeInfo, base64Data] = parts;
172
+ const approxSizeInBytes = base64Data.length * 0.75;
173
+ if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) throw new Error(`Document size exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
174
+ const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
175
+ if (docType === 'txt' || docType === 'md') {
176
+ try {
177
+ const textContent = atob(base64Data);
178
+ return { mimeType, data: base64Data, text: textContent, docType };
179
+ } catch (error) { throw new Error(`Invalid base64 encoding for ${docType} document.`); }
180
+ }
181
+ const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
182
+ return { mimeType: finalMimeType, data: base64Data, docType };
183
+ }
184
+
185
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
186
  if (imageUrl.startsWith("data:image/")) {
187
  const [mimeInfo, base64Data] = imageUrl.split(",");
188
+ const mimeType = mimeInfo.split(":")[1].split(";")[0];
189
+ return { mimeType, data: base64Data };
190
  } else if (imageUrl.startsWith("http")) {
191
+ throw new Error("URL images are not supported yet. Please provide base64 encoded images.");
192
+ } else {
193
+ return { mimeType: "image/jpeg", data: imageUrl };
194
  }
 
195
  }
196
 
197
+ // The rest of the original methods (unchanged)
198
+ async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
199
+ const apiKey = this.getNextApiKey();
200
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
201
+ const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
202
+ console.log(`Processing document with model: ${documentModel}`);
203
+ let contents;
204
+ try {
205
+ contents = messages.map(msg => {
206
+ if (typeof msg.content === "string") {
207
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
208
+ }
209
+ const messageParts = msg.content.map(part => {
210
+ if (part.type === "text") return { text: part.text };
211
+ if (part.type === "image_url" && part.image_url) {
212
+ const { mimeType, data } = this.extractImageData(part.image_url.url);
213
+ return { inlineData: { mimeType, data } };
214
+ }
215
+ if (part.type === "document" && part.document) {
216
+ const docData = this.extractDocumentData(part.document.url);
217
+ if (docData.docType === 'txt' || docData.docType === 'md') {
218
+ const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
219
+ return { text: `${prefix}${docData.text}` };
220
+ }
221
+ if (docData.docType === 'pdf') { return { inlineData: { mimeType: docData.mimeType, data: docData.data } }; }
222
+ return { text: `[Document type '${docData.docType}' is not supported.]` };
223
+ }
224
+ return { text: "" };
225
+ });
226
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
227
+ });
228
+ } catch (error) { throw error; }
229
+ const requestBody = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } };
230
+ const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${documentModel}:generateContent?key=${apiKey}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), });
231
+ if (!response.ok) { const errorBody = await response.json().catch(() => response.text()); throw new Error(`Google API request failed: ${response.status}: ${errorBody?.error?.message || JSON.stringify(errorBody)}`); }
232
+ const data = await response.json();
233
+ if (data.promptFeedback?.blockReason) { throw new Error(`Request blocked by Google API. Reason: ${data.promptFeedback.blockReason}.`); }
234
+ if (!data.candidates?.[0]) { throw new Error("No response generated for document content."); }
235
+ const candidate = data.candidates[0];
236
+ if (candidate.finishReason === "SAFETY" || candidate.finishReason === "RECITATION") { throw new Error(`Response blocked due to: ${candidate.finishReason}`); }
237
+ return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
238
+ }
239
+
240
+ async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
241
+ if (messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"))) return this.generateContentWithDocument(messages, modelName);
242
+ const apiKey = this.getNextApiKey();
243
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
244
+ const contents = messages.map(msg => {
245
+ if (typeof msg.content === "string") return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
246
+ const messageParts = msg.content.map(part => {
247
  if (part.type === "text") return { text: part.text };
248
  if (part.type === "image_url" && part.image_url) {
249
+ const imageData = part.image_url.url;
250
+ if (imageData.startsWith("data:image/")) { const { mimeType, data } = this.extractImageData(imageData); return { inlineData: { mimeType, data } }; }
251
+ return { fileData: { mimeType: "image/jpeg", fileUri: imageData } };
252
  }
253
  return { text: "" };
254
  });
255
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts };
256
  });
257
+ const requestBody: any = { contents, generationConfig: { temperature: 0.7, maxOutputTokens: 4096 } };
258
+ if (enableSearch) requestBody.tools = [{ googleSearchRetrieval: {} }];
 
 
 
 
 
259
  const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) });
260
+ if (!response.ok) throw new Error(`Google AI API error: ${response.status} - ${await response.text()}`);
261
  const data = await response.json();
262
+ if (!data.candidates?.[0]) throw new Error("No response generated from Google AI");
263
+ if (data.candidates[0].finishReason === "SAFETY") throw new Error("Response blocked due to safety filters");
264
+ return data.candidates[0].content?.parts[0]?.text || "No response generated";
 
 
265
  }
266
 
267
+ // Other methods like generateOrEditImage, etc., remain here unchanged...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
 
270
  class OpenAICompatibleServer {
 
278
 
279
  private authenticate(request: Request): boolean {
280
  if (!this.authKey) return true;
281
+ const authHeader = request.headers.get("Authorization");
282
+ return authHeader ? authHeader.replace("Bearer ", "") === this.authKey : false;
283
  }
284
 
285
+ private isDocumentContent(url?: string): boolean {
286
+ if (!url) return false;
287
+ const lowerUrl = url.toLowerCase();
288
+ return lowerUrl.includes('.pdf') || lowerUrl.startsWith('data:application/pdf') ||
289
+ lowerUrl.includes('.txt') || lowerUrl.startsWith('data:text/plain') ||
290
+ lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
291
+ }
292
+
293
+ // --- [新增] TTS 请求处理器 ---
294
  private async handleAudioSpeech(request: Request): Promise<Response> {
295
+ try {
296
+ if (request.headers.get("Content-Type") !== "application/json") {
297
+ throw new Error("Content-Type must be application/json");
298
+ }
299
+ const body: OpenAITTSRequest = await request.json();
300
+
301
+ if (!body.input || !body.voice) {
302
+ throw new Error("Missing required parameters: 'input' and 'voice' are required.");
303
+ }
304
+
305
+ // 调用 Google AI 服务进行语音合成
306
+ const audioData = await this.googleAI.synthesizeSpeech(body.input, body.voice);
307
+
308
+ // 返回原始音频文件
309
+ return new Response(audioData, {
310
+ status: 200,
311
+ headers: {
312
+ "Content-Type": "audio/mpeg", // OpenAI 默认返回 mp3
313
+ "Content-Length": String(audioData.length),
314
+ },
315
+ });
316
+ } catch (error) {
317
+ console.error("Error in /v1/audio/speech:", error.message);
318
+ const status = error.message.includes("required parameter") || error.message.includes("Content-Type") ? 400 : 500;
319
+ return new Response(JSON.stringify({ error: { message: error.message, type: "api_error" } }), { status, headers: { "Content-Type": "application/json" } });
320
  }
 
 
 
321
  }
322
+
323
  private async handleChatCompletions(request: Request): Promise<Response> {
324
+ try {
325
+ const body: OpenAIRequest = await request.json();
326
+ const requestedModel = body.model || "gemini-1.5-pro";
327
+ const stream = body.stream || false;
328
+ console.log(`Request for model: ${requestedModel}, stream: ${stream}`);
329
 
330
+ const hasDocument = body.messages.some(msg =>
331
+ Array.isArray(msg.content) &&
332
+ msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url))
333
+ );
334
+
335
+ let responseText: string;
336
 
337
+ if (hasDocument) {
338
+ responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
339
+ } else {
340
+ // Fallback to simpler content generation if no special condition is met
341
+ responseText = await this.googleAI.generateContent(body.messages, requestedModel, false);
342
+ }
343
+
344
+ if (stream) {
345
+ const streamResponse = await this.streamStringAsOpenAIResponse(responseText, requestedModel);
346
+ return new Response(streamResponse, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" } });
347
+ } else {
348
+ const responsePayload = {
349
+ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
350
+ choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
351
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
352
+ };
353
+ return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
354
+ }
355
+ } catch (error) {
356
+ console.error("Error in chat completions:", error.message);
357
+ const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
358
+ return new Response(JSON.stringify({ error: { message: error.message, type: status === 400 ? "invalid_request_error" : "api_error" } }), { status, headers: { "Content-Type": "application/json" } });
359
+ }
360
  }
361
 
362
+ private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
363
  const encoder = new TextEncoder();
364
  const streamId = `chatcmpl-${Date.now()}`;
365
  const creationTime = Math.floor(Date.now() / 1000);
366
+ let contentQueue = content.split('');
367
+
368
  return new ReadableStream({
369
+ start(controller) {
370
+ const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
371
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
372
+ },
373
+ pull(controller) {
374
+ if (contentQueue.length === 0) {
375
+ const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
376
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
377
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
378
+ controller.close();
379
+ return;
380
  }
381
+ const char = contentQueue.shift();
382
+ const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
383
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
384
  }
 
 
 
 
385
  });
386
  }
387
 
388
  private async handleModels(): Promise<Response> {
389
+ try {
390
+ const googleModels = await this.googleAI.fetchOfficialModels();
391
+ const models = {
392
+ object: "list",
393
+ data: googleModels.map(model => ({
394
+ id: model.name.replace('models/', ''), object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google",
395
+ }))
396
+ };
397
+ // [新增] 在模型列表中加入TTS模型以提高兼容性
398
+ models.data.push({ id: "tts-1", object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google" });
399
+ models.data.push({ id: "tts-1-hd", object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google" });
400
+
401
+ return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
402
+ } catch (error) {
403
+ console.error("Error fetching models:", error);
404
+ return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
405
+ }
406
  }
407
 
408
  private async handleStatus(): Promise<Response> {
409
+ const status = {
410
+ status: "healthy", timestamp: new Date().toISOString(), version: "2.6.0-tts",
411
+ api_keys_loaded: this.googleAI.apiKeys.length,
412
+ models_in_cache: this.googleAI.cachedModels.length,
413
+ models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
414
+ };
415
+ return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
416
  }
417
 
418
  async handleRequest(request: Request): Promise<Response> {
419
+ const corsHeaders = {
420
+ "Access-Control-Allow-Origin": "*",
421
+ "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
422
+ "Access-Control-Allow-Headers": "Content-Type, Authorization",
423
+ };
424
+
425
  if (request.method === "OPTIONS") return new Response(null, { headers: corsHeaders });
426
 
427
  const url = new URL(request.url);
428
  let response: Response;
429
 
430
+ if (url.pathname === "/health" || url.pathname === "/status") {
431
+ response = await this.handleStatus();
432
+ } else if (!this.authenticate(request)) {
433
+ response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
434
+ } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
435
+ response = await this.handleChatCompletions(request);
436
+ } else if (url.pathname === "/v1/models" && request.method === "GET") {
437
+ response = await this.handleModels();
438
+ } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") { // [新增] TTS 路由
439
+ response = await this.handleAudioSpeech(request);
440
+ } else {
441
+ response = new Response("Not Found", { status: 404 });
442
  }
443
 
444
  const finalHeaders = new Headers(response.headers);
445
+ Object.entries(corsHeaders).forEach(([key, value]) => finalHeaders.set(key, value));
446
+ return new Response(response.body, { status: response.status, headers: finalHeaders });
447
  }
448
  }
449
 
450
  // --- 服务器启动 ---
451
  const server = new OpenAICompatibleServer();
452
+
453
+ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 7860...");
454
+ console.log(` Loaded ${server.googleAI.apiKeys.length} API key(s).`);
455
+ console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
456
+
457
+ server.googleAI.fetchOfficialModels()
458
+ .then(models => console.log(`✅ Successfully pre-fetched ${models.length} generative models.`))
459
+ .catch(error => console.warn(`⚠️ Could not pre-fetch models: ${error.message}.`));
460
+
461
+ console.log("\n🔗 Endpoints:");
462
+ console.log(" POST /v1/chat/completions");
463
+ console.log(" POST /v1/audio/speech <-- [NEW] TTS Endpoint");
464
+ console.log(" GET /v1/models");
465
+ console.log(" GET /status");
466
+
467
+ await serve(
468
+ (request: Request) => server.handleRequest(request),
469
+ { port: 7860 }
470
+ );