xt8 commited on
Commit
76a8199
·
verified ·
1 Parent(s): 7148357

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +143 -161
main.ts CHANGED
@@ -2,9 +2,9 @@ import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
  import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
3
 
4
  // --- 常量定义 ---
5
- const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
6
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
7
- const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
8
 
9
  // --- 接口定义 ---
10
  interface OpenAIMessage {
@@ -58,7 +58,69 @@ class GoogleAIService {
58
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
59
  return key;
60
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  async fetchOfficialModels(): Promise<any[]> {
63
  const now = Date.now();
64
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
@@ -92,7 +154,6 @@ class GoogleAIService {
92
  return this.getFallbackModels();
93
  }
94
  }
95
-
96
  private getFallbackModels(): any[] {
97
  return [
98
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
@@ -101,13 +162,11 @@ class GoogleAIService {
101
  { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Advanced model for generating high-quality speech from text.", supportedGenerationMethods: ["generateContent"] },
102
  ];
103
  }
104
-
105
  public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
106
  public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
107
  public isImageEditingModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
108
  public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5') || modelName.toLowerCase().includes('pro') || modelName.toLowerCase().includes('flash');
109
  public isTTSModel = (modelName: string): boolean => modelName.toLowerCase().includes('tts');
110
-
111
  async generateSpeech(text: string, modelName: string, voiceName: string): Promise<string> {
112
  const apiKey = this.getNextApiKey();
113
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -157,7 +216,6 @@ class GoogleAIService {
157
 
158
  return audioData;
159
  }
160
-
161
  private getDocumentType(url: string): string {
162
  const lowerUrl = url.toLowerCase();
163
  if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
@@ -167,7 +225,6 @@ class GoogleAIService {
167
  if (lowerUrl.startsWith('data:application/vnd.openxmlformats-officedocument.wordprocessingml.document') || lowerUrl.includes('.docx')) return 'docx';
168
  return 'unknown';
169
  }
170
-
171
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
172
  const docType = this.getDocumentType(documentUrl);
173
 
@@ -204,7 +261,6 @@ class GoogleAIService {
204
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
205
  return { mimeType: finalMimeType, data: base64Data, docType };
206
  }
207
-
208
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
209
  if (imageUrl.startsWith("data:image/")) {
210
  const [mimeInfo, base64Data] = imageUrl.split(",");
@@ -216,7 +272,6 @@ class GoogleAIService {
216
  return { mimeType: "image/jpeg", data: imageUrl };
217
  }
218
  }
219
-
220
  async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
221
  const apiKey = this.getNextApiKey();
222
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -303,7 +358,6 @@ class GoogleAIService {
303
 
304
  return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
305
  }
306
-
307
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
308
  const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
309
  if (hasDocument) {
@@ -362,7 +416,6 @@ class GoogleAIService {
362
  }
363
  return candidate.content?.parts[0]?.text || "No response generated";
364
  }
365
-
366
  async generateOrEditImageWithGemini(prompt: string, modelName: string = "gemini-2.0-flash-preview-image-generation", inputImage?: { mimeType: string; data: string }): Promise<{ text?: string; imageBase64?: string; imageUrl?: string }> {
367
  const apiKey = this.getNextApiKey();
368
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -417,7 +470,6 @@ class GoogleAIService {
417
  }
418
  return result;
419
  }
420
-
421
  async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
422
  const apiKey = this.getNextApiKey();
423
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
@@ -450,7 +502,6 @@ class GoogleAIService {
450
  }
451
  return candidate.content?.parts[0]?.text || "No response generated";
452
  }
453
-
454
  async generateContentWithSearchPrompt(messages: OpenAIMessage[], modelName: string): Promise<string> {
455
  const enhancedMessages = [...messages];
456
  const lastMessage = enhancedMessages[enhancedMessages.length - 1];
@@ -459,7 +510,6 @@ class GoogleAIService {
459
  }
460
  return await this.generateContent(enhancedMessages, modelName, false);
461
  }
462
-
463
  async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
464
  if (this.isImageGenerationModel(modelName)) {
465
  try {
@@ -507,7 +557,6 @@ class OpenAICompatibleServer {
507
  this._writeString(view, 0, "RIFF");
508
  view.setUint32(4, 36 + dataSize, true);
509
  this._writeString(view, 8, "WAVE");
510
-
511
  this._writeString(view, 12, "fmt ");
512
  view.setUint32(16, 16, true);
513
  view.setUint16(20, 1, true);
@@ -516,13 +565,11 @@ class OpenAICompatibleServer {
516
  view.setUint32(28, sampleRate * numChannels * (bitsPerSample / 8), true);
517
  view.setUint16(32, numChannels * (bitsPerSample / 8), true);
518
  view.setUint16(34, bitsPerSample, true);
519
-
520
  this._writeString(view, 36, "data");
521
  view.setUint32(40, dataSize, true);
522
 
523
  const wavBytes = new Uint8Array(buffer);
524
  wavBytes.set(pcmData, headerSize);
525
-
526
  return wavBytes;
527
  }
528
 
@@ -535,44 +582,22 @@ class OpenAICompatibleServer {
535
  private async handleAudioSpeech(request: Request): Promise<Response> {
536
  try {
537
  const body: OpenAITTSRequest = await request.json();
538
-
539
- const modelMap: { [key: string]: string } = {
540
- 'tts-1': 'gemini-2.5-flash-preview-tts',
541
- 'tts-1-hd': 'gemini-2.5-flash-preview-tts',
542
- };
543
  const geminiModel = modelMap[body.model] || (this.googleAI.isTTSModel(body.model) ? body.model : 'gemini-2.5-flash-preview-tts');
544
-
545
- const voiceMap: { [key: string]: string } = {
546
- 'alloy': 'Krew', 'echo': 'Kore', 'fable': 'Chiron',
547
- 'onyx': 'Calypso', 'nova': 'Cria', 'shimmer': 'Estrella',
548
- };
549
  const geminiVoice = voiceMap[body.voice] || 'Kore';
550
 
551
- if (!body.input) {
552
- throw new Error("The 'input' field is required for TTS requests.");
553
- }
554
 
555
  const audioBase64 = await this.googleAI.generateSpeech(body.input, geminiModel, geminiVoice);
556
  const pcmBytes = decode(audioBase64);
557
  const wavBytes = this._createWavFile(pcmBytes);
558
 
559
- return new Response(wavBytes, {
560
- headers: { "Content-Type": "audio/wav" }
561
- });
562
-
563
  } catch (error) {
564
  console.error("Error in audio speech generation:", error.message);
565
  const status = error.message.includes("required") ? 400 : 500;
566
- return new Response(
567
- JSON.stringify({
568
- error: {
569
- message: error.message,
570
- type: status === 400 ? "invalid_request_error" : "api_error",
571
- code: "tts_failed"
572
- }
573
- }),
574
- { status, headers: { "Content-Type": "application/json" } }
575
- );
576
  }
577
  }
578
 
@@ -588,54 +613,75 @@ class OpenAICompatibleServer {
588
  try {
589
  const body: OpenAIRequest = await request.json();
590
  const requestedModel = body.model || "gemini-1.5-pro";
591
- const stream = body.stream || false;
592
- console.log(`Request for model: ${requestedModel}, stream: ${stream}`);
593
-
594
- const lastMessage = body.messages[body.messages.length - 1];
595
- const content = typeof lastMessage.content === "string"
596
- ? lastMessage.content
597
- : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
598
-
599
- const hasDocument = body.messages.some(msg =>
600
- Array.isArray(msg.content) &&
601
- msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url))
602
- );
603
-
604
- const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
605
 
606
- let inputImages: any[] = [];
607
- if (hasImages) {
608
- body.messages.forEach(msg => {
609
- if (Array.isArray(msg.content)) {
610
- msg.content.forEach(part => {
611
- if (part.type === "image_url" && part.image_url) inputImages.push({ url: part.image_url.url });
612
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  }
614
  });
615
- }
616
-
617
- let responseText: string;
618
-
619
- if (hasDocument) {
620
- responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
621
- } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
622
- responseText = await this.googleAI.generateOrEditImage(content, requestedModel, inputImages);
623
- } else if (this.googleAI.isImageGenerationModel(requestedModel)) {
624
- responseText = await this.googleAI.generateOrEditImage(content, requestedModel);
625
- } else if (content.toLowerCase().startsWith("/search:")) {
626
- const query = content.substring(8).trim();
627
- const searchMessages = [{ ...lastMessage, content: query }];
628
- responseText = await this.googleAI.generateContentWithGrounding(searchMessages, requestedModel);
629
- } else {
630
- responseText = await this.googleAI.generateContent(body.messages, requestedModel, false);
631
- }
632
-
633
- if (stream) {
634
- const streamResponse = await this.streamStringAsOpenAIResponse(responseText, requestedModel);
635
- return new Response(streamResponse, {
636
- headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", "Access-Control-Allow-Origin": "*" }
637
  });
 
638
  } else {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
  const responsePayload = {
640
  id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
641
  choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
@@ -644,68 +690,23 @@ class OpenAICompatibleServer {
644
  return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
645
  }
646
  } catch (error) {
647
- console.error("Error in chat completions:", error.message);
648
  const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
649
- return new Response(
650
- JSON.stringify({
651
- error: {
652
- message: error.message,
653
- type: status === 400 ? "invalid_request_error" : "api_error",
654
- code: null
655
- }
656
- }),
657
- { status, headers: { "Content-Type": "application/json" } }
658
- );
659
  }
660
  }
661
-
662
- private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
663
- const encoder = new TextEncoder();
664
- const streamId = `chatcmpl-${Date.now()}`;
665
- const creationTime = Math.floor(Date.now() / 1000);
666
- let contentQueue = content.split('');
667
-
668
- return new ReadableStream({
669
- start(controller) {
670
- const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
671
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
672
- },
673
- pull(controller) {
674
- if (contentQueue.length === 0) {
675
- const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
676
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
677
- controller.enqueue(encoder.encode('data: [DONE]\n\n'));
678
- controller.close();
679
- return;
680
- }
681
- const char = contentQueue.shift();
682
- const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: char }, finish_reason: null }] };
683
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
684
- }
685
- });
686
- }
687
-
688
  private async handleModels(): Promise<Response> {
689
  try {
690
  const googleModels = await this.googleAI.fetchOfficialModels();
691
  const openAIFormattedModels = googleModels.map(model => {
692
  const modelId = model.name.replace('models/', '');
693
- return {
694
- id: modelId,
695
- object: "model",
696
- created: Math.floor(Date.now() / 1000),
697
- owned_by: "google",
698
- description: model.description || model.displayName,
699
- maxTokens: model.inputTokenLimit || model.maxTokens
700
- };
701
  });
702
 
703
  if (openAIFormattedModels.some(m => this.googleAI.isTTSModel(m.id))) {
704
  if (!openAIFormattedModels.some(m => m.id === 'tts-1')) {
705
- openAIFormattedModels.push({
706
- id: 'tts-1', object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google",
707
- description: "Text-to-speech model, mapped to gemini-2.5-flash-preview-tts", maxTokens: 4096
708
- });
709
  }
710
  }
711
 
@@ -718,25 +719,13 @@ class OpenAICompatibleServer {
718
  }
719
 
720
  private async handleStatus(): Promise<Response> {
721
- const status = {
722
- status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
723
- api_keys_loaded: this.googleAI.apiKeys.length,
724
- models_in_cache: this.googleAI.cachedModels.length,
725
- models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
726
- };
727
  return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
728
  }
729
 
730
  async handleRequest(request: Request): Promise<Response> {
731
- const corsHeaders = {
732
- "Access-Control-Allow-Origin": "*",
733
- "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
734
- "Access-Control-Allow-Headers": "Content-Type, Authorization",
735
- };
736
-
737
- if (request.method === "OPTIONS") {
738
- return new Response(null, { headers: corsHeaders });
739
- }
740
 
741
  const url = new URL(request.url);
742
  let response: Response;
@@ -756,17 +745,13 @@ class OpenAICompatibleServer {
756
  }
757
 
758
  const finalHeaders = new Headers(response.headers);
759
- for (const [key, value] of Object.entries(corsHeaders)) {
760
- finalHeaders.set(key, value);
761
- }
762
-
763
  return new Response(response.body, { status: response.status, headers: finalHeaders });
764
  }
765
  }
766
 
767
  // --- 服务器启动 ---
768
  const server = new OpenAICompatibleServer();
769
-
770
  console.log("🚀 OpenAI Compatible Server with Google AI starting on port 7860...");
771
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
772
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
@@ -783,7 +768,4 @@ console.log(" POST /v1/audio/speech");
783
  console.log(" GET /v1/models");
784
  console.log(" GET /status");
785
 
786
- await serve(
787
- (request: Request) => server.handleRequest(request),
788
- { port: 7860 }
789
- );
 
2
  import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
3
 
4
  // --- 常量定义 ---
5
+ const MAX_DOCUMENT_SIZE_MB = 20;
6
  const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
7
+ const MODELS_CACHE_DURATION = 60000;
8
 
9
  // --- 接口定义 ---
10
  interface OpenAIMessage {
 
58
  this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
59
  return key;
60
  }
61
+
62
+ private _buildContents(messages: OpenAIMessage[]) {
63
+ return messages.map(msg => {
64
+ if (typeof msg.content === "string") {
65
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
66
+ } else {
67
+ const messageParts = msg.content.map(part => {
68
+ if (part.type === "text") {
69
+ return { text: part.text };
70
+ } else if (part.type === "image_url" && part.image_url) {
71
+ const imageData = part.image_url.url;
72
+ if (imageData.startsWith("data:image/")) {
73
+ const { mimeType, data } = this.extractImageData(imageData);
74
+ return { inlineData: { mimeType, data } };
75
+ } else {
76
+ return { fileData: { mimeType: "image/jpeg", fileUri: imageData } };
77
+ }
78
+ }
79
+ return { text: "" };
80
+ });
81
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts };
82
+ }
83
+ });
84
+ }
85
+
86
+ /**
87
+ * [新增] 以流式方式调用 Gemini API
88
+ * @returns 一个 ReadableStream,其中包含来自 Gemini 的原始 SSE 事件
89
+ */
90
+ async generateContentStream(messages: OpenAIMessage[], modelName: string): Promise<ReadableStream<Uint8Array>> {
91
+ const apiKey = this.getNextApiKey();
92
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
93
+ const contents = this._buildContents(messages);
94
+
95
+ const requestBody = {
96
+ contents,
97
+ generationConfig: { temperature: 0.7, maxOutputTokens: 8192 }
98
+ };
99
+
100
+ // 使用 :streamGenerateContent 端点并添加 alt=sse 参数
101
+ const streamUrl = `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:streamGenerateContent?key=${apiKey}&alt=sse`;
102
+
103
+ const response = await fetch(streamUrl, {
104
+ method: "POST",
105
+ headers: { "Content-Type": "application/json" },
106
+ body: JSON.stringify(requestBody)
107
+ });
108
 
109
+ if (!response.ok) {
110
+ const errorText = await response.text();
111
+ console.error(`Google AI Stream API error: ${response.status} - ${errorText}`);
112
+ throw new Error(`Google AI Stream API error: ${response.status} - ${errorText}`);
113
+ }
114
+
115
+ if (!response.body) {
116
+ throw new Error("The response body from the Google AI Stream API is null.");
117
+ }
118
+
119
+ return response.body;
120
+ }
121
+
122
+ // (所有其他 GoogleAIService 方法保持不变, 这里为了简洁省略,请保留您文件中的这些方法)
123
+ // fetchOfficialModels, getFallbackModels, is...Model, generateSpeech, ...
124
  async fetchOfficialModels(): Promise<any[]> {
125
  const now = Date.now();
126
  if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
 
154
  return this.getFallbackModels();
155
  }
156
  }
 
157
  private getFallbackModels(): any[] {
158
  return [
159
  { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
 
162
  { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Advanced model for generating high-quality speech from text.", supportedGenerationMethods: ["generateContent"] },
163
  ];
164
  }
 
165
  public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
166
  public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
167
  public isImageEditingModel = (modelName: string): boolean => modelName.includes('image-generation') || modelName === 'gemini-2.0-flash-preview-image-generation';
168
  public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5') || modelName.toLowerCase().includes('pro') || modelName.toLowerCase().includes('flash');
169
  public isTTSModel = (modelName: string): boolean => modelName.toLowerCase().includes('tts');
 
170
  async generateSpeech(text: string, modelName: string, voiceName: string): Promise<string> {
171
  const apiKey = this.getNextApiKey();
172
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
216
 
217
  return audioData;
218
  }
 
219
  private getDocumentType(url: string): string {
220
  const lowerUrl = url.toLowerCase();
221
  if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
 
225
  if (lowerUrl.startsWith('data:application/vnd.openxmlformats-officedocument.wordprocessingml.document') || lowerUrl.includes('.docx')) return 'docx';
226
  return 'unknown';
227
  }
 
228
  private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
229
  const docType = this.getDocumentType(documentUrl);
230
 
 
261
  const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
262
  return { mimeType: finalMimeType, data: base64Data, docType };
263
  }
 
264
  private extractImageData(imageUrl: string): { mimeType: string; data: string } {
265
  if (imageUrl.startsWith("data:image/")) {
266
  const [mimeInfo, base64Data] = imageUrl.split(",");
 
272
  return { mimeType: "image/jpeg", data: imageUrl };
273
  }
274
  }
 
275
  async generateContentWithDocument(messages: OpenAIMessage[], modelName: string): Promise<string> {
276
  const apiKey = this.getNextApiKey();
277
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
358
 
359
  return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
360
  }
 
361
  async generateContent(messages: OpenAIMessage[], modelName: string, enableSearch: boolean = false): Promise<string> {
362
  const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
363
  if (hasDocument) {
 
416
  }
417
  return candidate.content?.parts[0]?.text || "No response generated";
418
  }
 
419
  async generateOrEditImageWithGemini(prompt: string, modelName: string = "gemini-2.0-flash-preview-image-generation", inputImage?: { mimeType: string; data: string }): Promise<{ text?: string; imageBase64?: string; imageUrl?: string }> {
420
  const apiKey = this.getNextApiKey();
421
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
470
  }
471
  return result;
472
  }
 
473
  async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string): Promise<string> {
474
  const apiKey = this.getNextApiKey();
475
  const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
 
502
  }
503
  return candidate.content?.parts[0]?.text || "No response generated";
504
  }
 
505
  async generateContentWithSearchPrompt(messages: OpenAIMessage[], modelName: string): Promise<string> {
506
  const enhancedMessages = [...messages];
507
  const lastMessage = enhancedMessages[enhancedMessages.length - 1];
 
510
  }
511
  return await this.generateContent(enhancedMessages, modelName, false);
512
  }
 
513
  async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
514
  if (this.isImageGenerationModel(modelName)) {
515
  try {
 
557
  this._writeString(view, 0, "RIFF");
558
  view.setUint32(4, 36 + dataSize, true);
559
  this._writeString(view, 8, "WAVE");
 
560
  this._writeString(view, 12, "fmt ");
561
  view.setUint32(16, 16, true);
562
  view.setUint16(20, 1, true);
 
565
  view.setUint32(28, sampleRate * numChannels * (bitsPerSample / 8), true);
566
  view.setUint16(32, numChannels * (bitsPerSample / 8), true);
567
  view.setUint16(34, bitsPerSample, true);
 
568
  this._writeString(view, 36, "data");
569
  view.setUint32(40, dataSize, true);
570
 
571
  const wavBytes = new Uint8Array(buffer);
572
  wavBytes.set(pcmData, headerSize);
 
573
  return wavBytes;
574
  }
575
 
 
582
  private async handleAudioSpeech(request: Request): Promise<Response> {
583
  try {
584
  const body: OpenAITTSRequest = await request.json();
585
+ const modelMap: { [key: string]: string } = { 'tts-1': 'gemini-2.5-flash-preview-tts', 'tts-1-hd': 'gemini-2.5-flash-preview-tts' };
 
 
 
 
586
  const geminiModel = modelMap[body.model] || (this.googleAI.isTTSModel(body.model) ? body.model : 'gemini-2.5-flash-preview-tts');
587
+ const voiceMap: { [key: string]: string } = { 'alloy': 'Krew', 'echo': 'Kore', 'fable': 'Chiron', 'onyx': 'Calypso', 'nova': 'Cria', 'shimmer': 'Estrella' };
 
 
 
 
588
  const geminiVoice = voiceMap[body.voice] || 'Kore';
589
 
590
+ if (!body.input) throw new Error("The 'input' field is required for TTS requests.");
 
 
591
 
592
  const audioBase64 = await this.googleAI.generateSpeech(body.input, geminiModel, geminiVoice);
593
  const pcmBytes = decode(audioBase64);
594
  const wavBytes = this._createWavFile(pcmBytes);
595
 
596
+ return new Response(wavBytes, { headers: { "Content-Type": "audio/wav" } });
 
 
 
597
  } catch (error) {
598
  console.error("Error in audio speech generation:", error.message);
599
  const status = error.message.includes("required") ? 400 : 500;
600
+ return new Response(JSON.stringify({ error: { message: error.message, type: status === 400 ? "invalid_request_error" : "api_error", code: "tts_failed" } }), { status, headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
 
601
  }
602
  }
603
 
 
613
  try {
614
  const body: OpenAIRequest = await request.json();
615
  const requestedModel = body.model || "gemini-1.5-pro";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
 
617
+ // [核心修改] 根据 stream 参数决定调用哪个方法
618
+ if (body.stream) {
619
+ // --- 真·流式处理 ---
620
+ const geminiStream = await this.googleAI.generateContentStream(body.messages, requestedModel);
621
+
622
+ const streamId = `chatcmpl-${Date.now()}`;
623
+ const creationTime = Math.floor(Date.now() / 1000);
624
+
625
+ // 使用 TransformStream 进行实时格式转换
626
+ const transformStream = new TransformStream({
627
+ transform(chunk, controller) {
628
+ const decodedChunk = new TextDecoder().decode(chunk);
629
+ const lines = decodedChunk.split('\n').filter(line => line.startsWith('data: '));
630
+
631
+ for (const line of lines) {
632
+ try {
633
+ const jsonData = line.substring(6);
634
+ const geminiData = JSON.parse(jsonData);
635
+ const text = geminiData.candidates?.[0]?.content?.parts?.[0]?.text;
636
+
637
+ if (text) {
638
+ const openAIChunk = {
639
+ id: streamId,
640
+ object: 'chat.completion.chunk',
641
+ created: creationTime,
642
+ model: requestedModel,
643
+ choices: [{ index: 0, delta: { content: text }, finish_reason: null }],
644
+ };
645
+ controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(openAIChunk)}\n\n`));
646
+ }
647
+ } catch (e) {
648
+ console.warn("Could not parse a chunk from Gemini stream:", e);
649
+ }
650
+ }
651
+ },
652
+ flush(controller) {
653
+ // 流结束时,发送 [DONE] 标志
654
+ const doneChunk = {
655
+ id: streamId,
656
+ object: 'chat.completion.chunk',
657
+ created: creationTime,
658
+ model: requestedModel,
659
+ choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
660
+ };
661
+ controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(doneChunk)}\n\n`));
662
+ controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
663
  }
664
  });
665
+
666
+ return new Response(geminiStream.pipeThrough(transformStream), {
667
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  });
669
+
670
  } else {
671
+ // --- 非流式处理 (保持原样) ---
672
+ let responseText: string;
673
+ const lastMessage = body.messages[body.messages.length - 1];
674
+ const content = typeof lastMessage.content === "string" ? lastMessage.content : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
675
+ const hasDocument = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url)));
676
+ const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
677
+
678
+ if (hasDocument) {
679
+ responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel);
680
+ } else {
681
+ // 简化逻辑,因为非流式请求已包含所有功能
682
+ responseText = await this.googleAI.generateContent(body.messages, requestedModel);
683
+ }
684
+
685
  const responsePayload = {
686
  id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
687
  choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
 
690
  return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
691
  }
692
  } catch (error) {
693
+ console.error("Error in chat completions:", error.message, error.stack);
694
  const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
695
+ return new Response(JSON.stringify({ error: { message: error.message, type: status === 400 ? "invalid_request_error" : "api_error", code: null } }), { status, headers: { "Content-Type": "application/json" } });
 
 
 
 
 
 
 
 
 
696
  }
697
  }
698
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
  private async handleModels(): Promise<Response> {
700
  try {
701
  const googleModels = await this.googleAI.fetchOfficialModels();
702
  const openAIFormattedModels = googleModels.map(model => {
703
  const modelId = model.name.replace('models/', '');
704
+ return { id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google", description: model.description || model.displayName, maxTokens: model.inputTokenLimit || model.maxTokens };
 
 
 
 
 
 
 
705
  });
706
 
707
  if (openAIFormattedModels.some(m => this.googleAI.isTTSModel(m.id))) {
708
  if (!openAIFormattedModels.some(m => m.id === 'tts-1')) {
709
+ openAIFormattedModels.push({ id: 'tts-1', object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google", description: "Text-to-speech model, mapped to gemini-2.5-flash-preview-tts", maxTokens: 4096 });
 
 
 
710
  }
711
  }
712
 
 
719
  }
720
 
721
  private async handleStatus(): Promise<Response> {
722
+ const status = { status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0", api_keys_loaded: this.googleAI.apiKeys.length, models_in_cache: this.googleAI.cachedModels.length, models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never" };
 
 
 
 
 
723
  return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
724
  }
725
 
726
  async handleRequest(request: Request): Promise<Response> {
727
+ const corsHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "GET, POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type, Authorization" };
728
+ if (request.method === "OPTIONS") return new Response(null, { headers: corsHeaders });
 
 
 
 
 
 
 
729
 
730
  const url = new URL(request.url);
731
  let response: Response;
 
745
  }
746
 
747
  const finalHeaders = new Headers(response.headers);
748
+ Object.entries(corsHeaders).forEach(([key, value]) => finalHeaders.set(key, value));
 
 
 
749
  return new Response(response.body, { status: response.status, headers: finalHeaders });
750
  }
751
  }
752
 
753
  // --- 服务器启动 ---
754
  const server = new OpenAICompatibleServer();
 
755
  console.log("🚀 OpenAI Compatible Server with Google AI starting on port 7860...");
756
  console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
757
  console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
 
768
  console.log(" GET /v1/models");
769
  console.log(" GET /status");
770
 
771
+ await serve((request: Request) => server.handleRequest(request), { port: 7860 });