drriver commited on
Commit
fb48735
·
verified ·
1 Parent(s): 027d6df

Upload 9 files

Browse files
Files changed (9) hide show
  1. Dockerfile +14 -0
  2. app/__init__.py +0 -0
  3. app/gemini.py +346 -0
  4. app/index.html +306 -0
  5. app/main.py +1506 -0
  6. app/models.py +46 -0
  7. app/utils.py +275 -0
  8. requirements.txt +7 -0
  9. version.txt +1 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY ./app /app/app
6
+ COPY requirements.txt .
7
+ COPY version.txt .
8
+
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # 环境变量 (在 Hugging Face Spaces 中设置)
12
+ # ENV GEMINI_API_KEYS=your_key_1,your_key_2,your_key_3
13
+
14
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/gemini.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ import asyncio
5
+ import time
6
+ from app.models import ChatCompletionRequest, Message # 相对导入
7
+ from dataclasses import dataclass
8
+ from typing import Optional, Dict, Any, List
9
+ import httpx
10
+ import logging
11
+ from app.utils import format_log_message
12
+
13
+ logger = logging.getLogger('my_logger')
14
+
15
+ # 是否启用假流式请求 默认启用
16
+ FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
17
+ # 假流式请求的空内容返回间隔(秒)
18
+ FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
19
+
20
+ @dataclass
21
+ class GeneratedText:
22
+ text: str
23
+ finish_reason: Optional[str] = None
24
+
25
+
26
+ class ResponseWrapper:
27
+ def __init__(self, data: Dict[Any, Any]): # 正确的初始化方法名
28
+ self._data = data
29
+ self._text = self._extract_text()
30
+ self._finish_reason = self._extract_finish_reason()
31
+ self._prompt_token_count = self._extract_prompt_token_count()
32
+ self._candidates_token_count = self._extract_candidates_token_count()
33
+ self._total_token_count = self._extract_total_token_count()
34
+ self._thoughts = self._extract_thoughts()
35
+ self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
36
+
37
+ def _extract_thoughts(self) -> Optional[str]:
38
+ try:
39
+ for part in self._data['candidates'][0]['content']['parts']:
40
+ if 'thought' in part:
41
+ return part['text']
42
+ return ""
43
+ except (KeyError, IndexError):
44
+ return ""
45
+
46
+ def _extract_text(self) -> str:
47
+ try:
48
+ for part in self._data['candidates'][0]['content']['parts']:
49
+ if 'thought' not in part:
50
+ return part['text']
51
+ return ""
52
+ except (KeyError, IndexError):
53
+ return ""
54
+
55
+ def _extract_finish_reason(self) -> Optional[str]:
56
+ try:
57
+ return self._data['candidates'][0].get('finishReason')
58
+ except (KeyError, IndexError):
59
+ return None
60
+
61
+ def _extract_prompt_token_count(self) -> Optional[int]:
62
+ try:
63
+ return self._data['usageMetadata'].get('promptTokenCount')
64
+ except (KeyError):
65
+ return None
66
+
67
+ def _extract_candidates_token_count(self) -> Optional[int]:
68
+ try:
69
+ return self._data['usageMetadata'].get('candidatesTokenCount')
70
+ except (KeyError):
71
+ return None
72
+
73
+ def _extract_total_token_count(self) -> Optional[int]:
74
+ try:
75
+ return self._data['usageMetadata'].get('totalTokenCount')
76
+ except (KeyError):
77
+ return None
78
+
79
+ @property
80
+ def text(self) -> str:
81
+ return self._text
82
+
83
+ @property
84
+ def finish_reason(self) -> Optional[str]:
85
+ return self._finish_reason
86
+
87
+ @property
88
+ def prompt_token_count(self) -> Optional[int]:
89
+ return self._prompt_token_count
90
+
91
+ @property
92
+ def candidates_token_count(self) -> Optional[int]:
93
+ return self._candidates_token_count
94
+
95
+ @property
96
+ def total_token_count(self) -> Optional[int]:
97
+ return self._total_token_count
98
+
99
+ @property
100
+ def thoughts(self) -> Optional[str]:
101
+ return self._thoughts
102
+
103
+ @property
104
+ def json_dumps(self) -> str:
105
+ return self._json_dumps
106
+
107
+
108
+ class GeminiClient:
109
+
110
+ AVAILABLE_MODELS = []
111
+ EXTRA_MODELS = os.environ.get("EXTRA_MODELS", "").split(",")
112
+
113
+ def __init__(self, api_key: str):
114
+ self.api_key = api_key
115
+
116
+ async def stream_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
117
+ extra_log = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'N/A'}
118
+ log_msg = format_log_message('INFO', "流式请求开始", extra=extra_log)
119
+ logger.info(log_msg)
120
+
121
+ # 检查是否启用假流式请求
122
+ if FAKE_STREAMING:
123
+ log_msg = format_log_message('INFO', "使用假流式请求模式(发送换行符保持连接)", extra=extra_log)
124
+ logger.info(log_msg)
125
+
126
+ try:
127
+ # 这个方法不再直接使用self.api_key,而是由main.py提供API密钥列表和管理
128
+ # 在这里,我们只负责持续发送换行符,直到main.py那边获取到响应
129
+
130
+ # 持续发送换行符,直到外部取消此生成器
131
+ start_time = time.time()
132
+ while True:
133
+ # 发送换行符作为保活消息
134
+ yield "\n"
135
+ # 等待一段时间
136
+ await asyncio.sleep(FAKE_STREAMING_INTERVAL)
137
+
138
+ # 如果等待时间过长(超过300秒),防止无限等待
139
+ if time.time() - start_time > 300:
140
+ log_msg = format_log_message('WARNING', "假流式请求等待时间过长,强制结束", extra=extra_log)
141
+ logger.warning(log_msg)
142
+ # 抛出超时异常,让外部处理
143
+ error_msg = "假流式请求等待时间过长,所有API密钥均已尝试"
144
+ extra_log_timeout = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'TIMEOUT', 'error_message': error_msg}
145
+ log_msg = format_log_message('ERROR', error_msg, extra=extra_log_timeout)
146
+ logger.error(log_msg)
147
+ raise TimeoutError(error_msg)
148
+
149
+ except Exception as e:
150
+ if not isinstance(e, asyncio.CancelledError): # 忽略取消异常的日志记录
151
+ error_msg = f"假流式处理期间发生错误: {str(e)}"
152
+ extra_log_error = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
153
+ log_msg = format_log_message('ERROR', error_msg, extra=extra_log_error)
154
+ logger.error(log_msg)
155
+ raise e
156
+ finally:
157
+ log_msg = format_log_message('INFO', "假流式请求结束", extra=extra_log)
158
+ logger.info(log_msg)
159
+ else:
160
+ # 原始流式请求处理逻辑
161
+ api_version = "v1alpha" if "think" in request.model else "v1beta"
162
+ url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:streamGenerateContent?key={self.api_key}&alt=sse"
163
+ headers = {
164
+ "Content-Type": "application/json",
165
+ }
166
+ data = {
167
+ "contents": contents,
168
+ "generationConfig": {
169
+ "temperature": request.temperature,
170
+ "maxOutputTokens": request.max_tokens,
171
+ },
172
+ "safetySettings": safety_settings,
173
+ }
174
+ if system_instruction:
175
+ data["system_instruction"] = system_instruction
176
+
177
+ async with httpx.AsyncClient() as client:
178
+ async with client.stream("POST", url, headers=headers, json=data, timeout=600) as response:
179
+ buffer = b""
180
+ try:
181
+ async for line in response.aiter_lines():
182
+ if not line.strip():
183
+ continue
184
+ if line.startswith("data: "):
185
+ line = line[len("data: "):]
186
+ buffer += line.encode('utf-8')
187
+ try:
188
+ data = json.loads(buffer.decode('utf-8'))
189
+ buffer = b""
190
+ if 'candidates' in data and data['candidates']:
191
+ candidate = data['candidates'][0]
192
+ if 'content' in candidate:
193
+ content = candidate['content']
194
+ if 'parts' in content and content['parts']:
195
+ parts = content['parts']
196
+ text = ""
197
+ for part in parts:
198
+ if 'text' in part:
199
+ text += part['text']
200
+ if text:
201
+ yield text
202
+
203
+ if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
204
+ error_msg = f"模型的响应被截断: {candidate.get('finishReason')}"
205
+ extra_log_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
206
+ log_msg = format_log_message('WARNING', error_msg, extra=extra_log_error)
207
+ logger.warning(log_msg)
208
+ raise ValueError(error_msg)
209
+
210
+ if 'safetyRatings' in candidate:
211
+ for rating in candidate['safetyRatings']:
212
+ if rating['probability'] == 'HIGH':
213
+ error_msg = f"模型的响应被截断: {rating['category']}"
214
+ extra_log_safety = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
215
+ log_msg = format_log_message('WARNING', error_msg, extra=extra_log_safety)
216
+ logger.warning(log_msg)
217
+ raise ValueError(error_msg)
218
+ except json.JSONDecodeError:
219
+ continue
220
+ except Exception as e:
221
+ error_msg = f"流式处理期间发生错误: {str(e)}"
222
+ extra_log_stream_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
223
+ log_msg = format_log_message('ERROR', error_msg, extra=extra_log_stream_error)
224
+ logger.error(log_msg)
225
+ raise e
226
+ except Exception as e:
227
+ raise e
228
+ finally:
229
+ log_msg = format_log_message('INFO', "流式请求结束", extra=extra_log)
230
+ logger.info(log_msg)
231
+
232
+ def complete_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
233
+ extra_log = {'key': self.api_key[:8], 'request_type': 'non-stream', 'model': request.model, 'status_code': 'N/A'}
234
+ log_msg = format_log_message('INFO', "非流式请求开始", extra=extra_log)
235
+ logger.info(log_msg)
236
+
237
+ api_version = "v1alpha" if "think" in request.model else "v1beta"
238
+ url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:generateContent?key={self.api_key}"
239
+ headers = {
240
+ "Content-Type": "application/json",
241
+ }
242
+ data = {
243
+ "contents": contents,
244
+ "generationConfig": {
245
+ "temperature": request.temperature,
246
+ "maxOutputTokens": request.max_tokens,
247
+ },
248
+ "safetySettings": safety_settings,
249
+ }
250
+ if system_instruction:
251
+ data["system_instruction"] = system_instruction
252
+
253
+ try:
254
+ response = requests.post(url, headers=headers, json=data)
255
+ response.raise_for_status()
256
+
257
+ log_msg = format_log_message('INFO', "非流式请求成功完成", extra=extra_log)
258
+ logger.info(log_msg)
259
+
260
+ return ResponseWrapper(response.json())
261
+ except Exception as e:
262
+ raise
263
+
264
+ def convert_messages(self, messages, use_system_prompt=False):
265
+ gemini_history = []
266
+ errors = []
267
+ system_instruction_text = ""
268
+ is_system_phase = use_system_prompt
269
+ for i, message in enumerate(messages):
270
+ role = message.role
271
+ content = message.content
272
+
273
+ if isinstance(content, str):
274
+ if is_system_phase and role == 'system':
275
+ if system_instruction_text:
276
+ system_instruction_text += "\n" + content
277
+ else:
278
+ system_instruction_text = content
279
+ else:
280
+ is_system_phase = False
281
+
282
+ if role in ['user', 'system']:
283
+ role_to_use = 'user'
284
+ elif role == 'assistant':
285
+ role_to_use = 'model'
286
+ else:
287
+ errors.append(f"Invalid role: {role}")
288
+ continue
289
+
290
+ if gemini_history and gemini_history[-1]['role'] == role_to_use:
291
+ gemini_history[-1]['parts'].append({"text": content})
292
+ else:
293
+ gemini_history.append(
294
+ {"role": role_to_use, "parts": [{"text": content}]})
295
+ elif isinstance(content, list):
296
+ parts = []
297
+ for item in content:
298
+ if item.get('type') == 'text':
299
+ parts.append({"text": item.get('text')})
300
+ elif item.get('type') == 'image_url':
301
+ image_data = item.get('image_url', {}).get('url', '')
302
+ if image_data.startswith('data:image/'):
303
+ try:
304
+ mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
305
+ parts.append({
306
+ "inline_data": {
307
+ "mime_type": mime_type,
308
+ "data": base64_data
309
+ }
310
+ })
311
+ except (IndexError, ValueError):
312
+ errors.append(
313
+ f"Invalid data URI for image: {image_data}")
314
+ else:
315
+ errors.append(
316
+ f"Invalid image URL format for item: {item}")
317
+
318
+ if parts:
319
+ if role in ['user', 'system']:
320
+ role_to_use = 'user'
321
+ elif role == 'assistant':
322
+ role_to_use = 'model'
323
+ else:
324
+ errors.append(f"Invalid role: {role}")
325
+ continue
326
+ if gemini_history and gemini_history[-1]['role'] == role_to_use:
327
+ gemini_history[-1]['parts'].extend(parts)
328
+ else:
329
+ gemini_history.append(
330
+ {"role": role_to_use, "parts": parts})
331
+ if errors:
332
+ return errors
333
+ else:
334
+ return gemini_history, {"parts": [{"text": system_instruction_text}]}
335
+
336
+ @staticmethod
337
+ async def list_available_models(api_key) -> list:
338
+ url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(
339
+ api_key)
340
+ async with httpx.AsyncClient() as client:
341
+ response = await client.get(url)
342
+ response.raise_for_status()
343
+ data = response.json()
344
+ models = [model["name"] for model in data.get("models", [])]
345
+ models.extend(GeminiClient.EXTRA_MODELS)
346
+ return models
app/index.html ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Gemini API 代理服务</title>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <style>
8
+ body {
9
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
10
+ max-width: 1200px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ line-height: 1.6;
14
+ background-color: #f8f9fa;
15
+ }
16
+ h1, h2, h3 {
17
+ color: #333;
18
+ text-align: center;
19
+ margin-bottom: 20px;
20
+ }
21
+ .info-box {
22
+ background-color: #fff;
23
+ border: 1px solid #dee2e6;
24
+ border-radius: 8px;
25
+ padding: 20px;
26
+ margin-bottom: 20px;
27
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
28
+ }
29
+ .status {
30
+ color: #28a745;
31
+ font-weight: bold;
32
+ font-size: 18px;
33
+ margin-bottom: 20px;
34
+ text-align: center;
35
+ }
36
+ .stats-grid {
37
+ display: grid;
38
+ grid-template-columns: repeat(3, 1fr);
39
+ gap: 15px;
40
+ margin-top: 15px;
41
+ margin-bottom: 20px;
42
+ }
43
+ .stat-card {
44
+ background-color: #e9ecef;
45
+ padding: 15px;
46
+ border-radius: 8px;
47
+ text-align: center;
48
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
49
+ transition: transform 0.2s;
50
+ }
51
+ .stat-card:hover {
52
+ transform: translateY(-2px);
53
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
54
+ }
55
+ .stat-value {
56
+ font-size: 24px;
57
+ font-weight: bold;
58
+ color: #007bff;
59
+ }
60
+ .stat-label {
61
+ font-size: 14px;
62
+ color: #6c757d;
63
+ margin-top: 5px;
64
+ }
65
+ .section-title {
66
+ color: #495057;
67
+ border-bottom: 1px solid #dee2e6;
68
+ padding-bottom: 10px;
69
+ margin-bottom: 20px;
70
+ }
71
+ .log-container {
72
+ background-color: #f5f5f5;
73
+ border: 1px solid #ddd;
74
+ border-radius: 8px;
75
+ padding: 15px;
76
+ margin-top: 20px;
77
+ max-height: 500px;
78
+ overflow-y: auto;
79
+ font-family: monospace;
80
+ font-size: 14px;
81
+ line-height: 1.5;
82
+ }
83
+ .log-entry {
84
+ margin-bottom: 8px;
85
+ padding: 8px;
86
+ border-radius: 4px;
87
+ }
88
+ .log-entry.INFO {
89
+ background-color: #e8f4f8;
90
+ border-left: 4px solid #17a2b8;
91
+ }
92
+ .log-entry.WARNING {
93
+ background-color: #fff3cd;
94
+ border-left: 4px solid #ffc107;
95
+ }
96
+ .log-entry.ERROR {
97
+ background-color: #f8d7da;
98
+ border-left: 4px solid #dc3545;
99
+ }
100
+ .log-entry.DEBUG {
101
+ background-color: #d1ecf1;
102
+ border-left: 4px solid #17a2b8;
103
+ }
104
+ .log-timestamp {
105
+ color: #6c757d;
106
+ font-size: 12px;
107
+ margin-right: 10px;
108
+ }
109
+ .log-level {
110
+ font-weight: bold;
111
+ margin-right: 10px;
112
+ }
113
+ .log-level.INFO {
114
+ color: #17a2b8;
115
+ }
116
+ .log-level.WARNING {
117
+ color: #ffc107;
118
+ }
119
+ .log-level.ERROR {
120
+ color: #dc3545;
121
+ }
122
+ .log-level.DEBUG {
123
+ color: #17a2b8;
124
+ }
125
+ .log-message {
126
+ color: #212529;
127
+ }
128
+ .refresh-button {
129
+ display: block;
130
+ margin: 20px auto;
131
+ padding: 10px 20px;
132
+ background-color: #007bff;
133
+ color: white;
134
+ border: none;
135
+ border-radius: 4px;
136
+ font-size: 16px;
137
+ cursor: pointer;
138
+ transition: background-color 0.2s;
139
+ }
140
+ .refresh-button:hover {
141
+ background-color: #0069d9;
142
+ }
143
+ .log-filter {
144
+ display: flex;
145
+ justify-content: center;
146
+ margin-bottom: 15px;
147
+ gap: 10px;
148
+ }
149
+ .log-filter button {
150
+ padding: 5px 10px;
151
+ border: 1px solid #ddd;
152
+ border-radius: 4px;
153
+ background-color: #f8f9fa;
154
+ cursor: pointer;
155
+ }
156
+ .log-filter button.active {
157
+ background-color: #007bff;
158
+ color: white;
159
+ border-color: #007bff;
160
+ }
161
+ </style>
162
+ </head>
163
+ <body>
164
+ <h1>🤖 Gemini API 代理服务</h1>
165
+
166
+ <div class="info-box">
167
+ <h2 class="section-title">🟢 运行状态</h2>
168
+ <p class="status">服务运行中</p>
169
+
170
+ <div class="stats-grid">
171
+ <div class="stat-card">
172
+ <div class="stat-value">{{ key_count }}</div>
173
+ <div class="stat-label">可用API密钥数量</div>
174
+ </div>
175
+ <div class="stat-card">
176
+ <div class="stat-value">{{ model_count }}</div>
177
+ <div class="stat-label">可用模型数量</div>
178
+ </div>
179
+ <div class="stat-card">
180
+ <div class="stat-value">{{ retry_count }}</div>
181
+ <div class="stat-label">最大重试次数</div>
182
+ </div>
183
+ </div>
184
+
185
+ <h3 class="section-title">API调用统计</h3>
186
+ <div class="stats-grid">
187
+ <div class="stat-card">
188
+ <div class="stat-value">{{ last_24h_calls }}</div>
189
+ <div class="stat-label">24小时内调用次数</div>
190
+ </div>
191
+ <div class="stat-card">
192
+ <div class="stat-value">{{ hourly_calls }}</div>
193
+ <div class="stat-label">一小时内调用次数</div>
194
+ </div>
195
+ <div class="stat-card">
196
+ <div class="stat-value">{{ minute_calls }}</div>
197
+ <div class="stat-label">一分钟内调用次数</div>
198
+ </div>
199
+ </div>
200
+ </div>
201
+
202
+ <div class="info-box">
203
+ <h2 class="section-title">⚙️ 环境配置</h2>
204
+ <div class="stats-grid">
205
+ <div class="stat-card">
206
+ <div class="stat-value">{{ max_requests_per_minute }}</div>
207
+ <div class="stat-label">每分钟请求限制</div>
208
+ </div>
209
+ <div class="stat-card">
210
+ <div class="stat-value">{{ max_requests_per_day_per_ip }}</div>
211
+ <div class="stat-label">每IP每日请求限制</div>
212
+ </div>
213
+ <div class="stat-card">
214
+ <div class="stat-value">{{ current_time }}</div>
215
+ <div class="stat-label">当前服务器时间</div>
216
+ </div>
217
+ </div>
218
+ </div>
219
+
220
+ <div class="info-box">
221
+ <h2 class="section-title">📦 版本信息</h2>
222
+ <div class="version-info" style="text-align: center; margin-bottom: 15px;">
223
+ <div style="font-size: 18px; margin-bottom: 10px;">
224
+ 当前版本: <span style="font-weight: bold; color: #007bff;">{{ local_version }}</span>
225
+ </div>
226
+ {% if has_update %}
227
+ <div style="display: flex; align-items: center; justify-content: center; margin-top: 15px;">
228
+ <div style="background-color: #fef6e0; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px 15px; display: inline-flex; align-items: center;">
229
+ <span style="color: #ff9800; margin-right: 10px;">
230
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
231
+ <circle cx="12" cy="12" r="10"></circle>
232
+ <line x1="12" y1="8" x2="12" y2="12"></line>
233
+ <line x1="12" y1="16" x2="12.01" y2="16"></line>
234
+ </svg>
235
+ </span>
236
+ <span>
237
+ <strong>发现新版本!</strong> 最新版本: <span style="font-weight: bold; color: #28a745;">{{ remote_version }}</span>
238
+ </span>
239
+ </div>
240
+ </div>
241
+ {% endif %}
242
+ </div>
243
+ </div>
244
+
245
+ <div class="info-box">
246
+ <h2 class="section-title"> 系统日志</h2>
247
+ <div class="log-filter">
248
+ <button class="active" data-level="ALL">全部</button>
249
+ <button data-level="INFO">信息</button>
250
+ <button data-level="WARNING">警告</button>
251
+ <button data-level="ERROR">错误</button>
252
+ </div>
253
+ <div class="log-container">
254
+ {% for log in logs %}
255
+ <div class="log-entry {{ log.level }}" data-level="{{ log.level }}">
256
+ <span class="log-timestamp">{{ log.timestamp }}</span>
257
+ <span class="log-level {{ log.level }}">{{ log.level }}</span>
258
+ <span class="log-message">
259
+ {% if log.key != 'N/A' %}[{{ log.key }}]{% endif %}
260
+ {% if log.request_type != 'N/A' %}{{ log.request_type }}{% endif %}
261
+ {% if log.model != 'N/A' %}[{{ log.model }}]{% endif %}
262
+ {% if log.status_code != 'N/A' %}{{ log.status_code }}{% endif %}
263
+ : {{ log.message }}
264
+ {% if log.error_message %}
265
+ - {{ log.error_message }}
266
+ {% endif %}
267
+ </span>
268
+ </div>
269
+ {% endfor %}
270
+ </div>
271
+ <button class="refresh-button" onclick="window.location.reload()">刷新日志</button>
272
+ </div>
273
+
274
+ <script>
275
+ // 日志过滤功能
276
+ document.querySelectorAll('.log-filter button').forEach(button => {
277
+ button.addEventListener('click', function() {
278
+ // 移除所有按钮的active类
279
+ document.querySelectorAll('.log-filter button').forEach(btn => {
280
+ btn.classList.remove('active');
281
+ });
282
+
283
+ // 为当前按钮添加active类
284
+ this.classList.add('active');
285
+
286
+ const level = this.getAttribute('data-level');
287
+
288
+ // 显示或隐藏日志条目
289
+ document.querySelectorAll('.log-entry').forEach(entry => {
290
+ if (level === 'ALL' || entry.getAttribute('data-level') === level) {
291
+ entry.style.display = 'block';
292
+ } else {
293
+ entry.style.display = 'none';
294
+ }
295
+ });
296
+ });
297
+ });
298
+
299
+ // 页面加载时自动滚动到日志底部
300
+ window.onload = function() {
301
+ const logContainer = document.querySelector('.log-container');
302
+ logContainer.scrollTop = logContainer.scrollHeight;
303
+ };
304
+ </script>
305
+ </body>
306
+ </html>
app/main.py ADDED
@@ -0,0 +1,1506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request, Depends, status
2
+ from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+ from .models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
6
+ from .gemini import GeminiClient, ResponseWrapper
7
+ from .utils import handle_gemini_error, protect_from_abuse, APIKeyManager, test_api_key, format_log_message, log_manager
8
+ import os
9
+ import json
10
+ import asyncio
11
+ from typing import Literal, Dict, Any, Optional
12
+ import random
13
+ import requests
14
+ from datetime import datetime, timedelta
15
+ from apscheduler.schedulers.background import BackgroundScheduler
16
+ import sys
17
+ import logging
18
+ from collections import defaultdict
19
+ import pathlib
20
+ import hashlib
21
+ import time
22
+ FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
23
+ # 假流式请求的空内容返回间隔(秒)
24
+ FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
25
+ logging.getLogger("uvicorn").disabled = True
26
+ logging.getLogger("uvicorn.access").disabled = True
27
+
28
+ # 配置 logger
29
+ logger = logging.getLogger("my_logger")
30
+ logger.setLevel(logging.DEBUG)
31
+
32
+ # 设置模板目录
33
+ BASE_DIR = pathlib.Path(__file__).parent
34
+ templates = Jinja2Templates(directory=str(BASE_DIR))
35
+
36
+ app = FastAPI()
37
+
38
+ # --------------- 缓存管理类 ---------------
39
+
40
+ class ResponseCacheManager:
41
+ """管理API响应缓存的类"""
42
+
43
+ def __init__(self, expiry_time: int, max_entries: int, remove_after_use: bool = True,
44
+ cache_dict: Dict[str, Dict[str, Any]] = None):
45
+ self.cache = cache_dict if cache_dict is not None else {} # 使用传入的缓存字典或创建新字典
46
+ self.expiry_time = expiry_time
47
+ self.max_entries = max_entries
48
+ self.remove_after_use = remove_after_use
49
+
50
+ def get(self, cache_key: str):
51
+ """获取缓存项,如果存在且未过期"""
52
+ now = time.time()
53
+ if cache_key in self.cache and now < self.cache[cache_key].get('expiry_time', 0):
54
+ cached_item = self.cache[cache_key]
55
+
56
+ # 获取响应但先不删除
57
+ response = cached_item['response']
58
+
59
+ # 返回响应
60
+ return response, True
61
+
62
+ return None, False
63
+
64
+ def store(self, cache_key: str, response, client_ip: str = None):
65
+ """存储响应到缓存"""
66
+ now = time.time()
67
+ self.cache[cache_key] = {
68
+ 'response': response,
69
+ 'expiry_time': now + self.expiry_time,
70
+ 'created_at': now,
71
+ 'client_ip': client_ip
72
+ }
73
+
74
+ log('info', f"响应已缓存: {cache_key[:8]}...",
75
+ extra={'cache_operation': 'store', 'request_type': 'non-stream'})
76
+
77
+ # 如果缓存超过限制,清理最旧的
78
+ self.clean_if_needed()
79
+
80
+ def clean_expired(self):
81
+ """清理所有过期的缓存项"""
82
+ now = time.time()
83
+ expired_keys = [k for k, v in self.cache.items() if now > v.get('expiry_time', 0)]
84
+
85
+ for key in expired_keys:
86
+ del self.cache[key]
87
+ log('info', f"清理过期缓存: {key[:8]}...", extra={'cache_operation': 'clean'})
88
+
89
+ def clean_if_needed(self):
90
+ """如果缓存数量超过限制,清理最旧的项目"""
91
+ if len(self.cache) <= self.max_entries:
92
+ return
93
+
94
+ # 按创建时间排序
95
+ sorted_keys = sorted(self.cache.keys(),
96
+ key=lambda k: self.cache[k].get('created_at', 0))
97
+
98
+ # 计算需要删除的数量
99
+ to_remove = len(self.cache) - self.max_entries
100
+
101
+ # 删除最旧的项
102
+ for key in sorted_keys[:to_remove]:
103
+ del self.cache[key]
104
+ log('info', f"缓存容量限制,删除旧缓存: {key[:8]}...", extra={'cache_operation': 'limit'})
105
+
106
+ # --------------- 活跃请求管理类 ---------------
107
+
108
+ class ActiveRequestsManager:
109
+ """管理活跃API请求的类"""
110
+
111
+ def __init__(self, requests_pool: Dict[str, asyncio.Task] = None):
112
+ self.active_requests = requests_pool if requests_pool is not None else {} # 存储活跃请求
113
+
114
+ def add(self, key: str, task: asyncio.Task):
115
+ """添加新的活跃请求任务"""
116
+ task.creation_time = time.time() # 添加创建时间属性
117
+ self.active_requests[key] = task
118
+
119
+ def get(self, key: str):
120
+ """获取活跃请求任务"""
121
+ return self.active_requests.get(key)
122
+
123
+ def remove(self, key: str):
124
+ """移除活跃请求任务"""
125
+ if key in self.active_requests:
126
+ del self.active_requests[key]
127
+ return True
128
+ return False
129
+
130
+ def remove_by_prefix(self, prefix: str):
131
+ """移除所有以特定前缀开头的活跃请求任务"""
132
+ keys_to_remove = [k for k in self.active_requests.keys() if k.startswith(prefix)]
133
+ for key in keys_to_remove:
134
+ self.remove(key)
135
+ return len(keys_to_remove)
136
+
137
+ def clean_completed(self):
138
+ """清理所有已完成或已取消的任务"""
139
+ keys_to_remove = []
140
+
141
+ for key, task in self.active_requests.items():
142
+ if task.done() or task.cancelled():
143
+ keys_to_remove.append(key)
144
+
145
+ for key in keys_to_remove:
146
+ self.remove(key)
147
+
148
+ # if keys_to_remove:
149
+ # log('info', f"清理已完成请求任务: {len(keys_to_remove)}个", cleanup='active_requests')
150
+
151
+ def clean_long_running(self, max_age_seconds: int = 300):
152
+ """清理长时间运行的任务"""
153
+ now = time.time()
154
+ long_running_keys = []
155
+
156
+ for key, task in list(self.active_requests.items()):
157
+ if (hasattr(task, 'creation_time') and
158
+ task.creation_time < now - max_age_seconds and
159
+ not task.done() and not task.cancelled()):
160
+
161
+ long_running_keys.append(key)
162
+ task.cancel() # 取消长时间运行的任务
163
+
164
+ if long_running_keys:
165
+ log('warning', f"取消长时间运行的任务: {len(long_running_keys)}个", cleanup='long_running_tasks')
166
+
167
+
168
+
169
+ # --------------- 全局实例 ---------------
170
+
171
+ PASSWORD = os.environ.get("PASSWORD", "123").strip('"')
172
+ MAX_REQUESTS_PER_MINUTE = int(os.environ.get("MAX_REQUESTS_PER_MINUTE", "30"))
173
+ MAX_REQUESTS_PER_DAY_PER_IP = int(
174
+ os.environ.get("MAX_REQUESTS_PER_DAY_PER_IP", "600"))
175
+ # MAX_RETRIES = int(os.environ.get('MaxRetries', '3').strip() or '3')
176
+ RETRY_DELAY = 1
177
+ MAX_RETRY_DELAY = 16
178
+ MAX_RETRY_DELAY = 16
179
+ safety_settings = [
180
+ {
181
+ "category": "HARM_CATEGORY_HARASSMENT",
182
+ "threshold": "BLOCK_NONE"
183
+ },
184
+ {
185
+ "category": "HARM_CATEGORY_HATE_SPEECH",
186
+ "threshold": "BLOCK_NONE"
187
+ },
188
+ {
189
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
190
+ "threshold": "BLOCK_NONE"
191
+ },
192
+ {
193
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
194
+ "threshold": "BLOCK_NONE"
195
+ },
196
+ {
197
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
198
+ "threshold": 'BLOCK_NONE'
199
+ }
200
+ ]
201
+ safety_settings_g2 = [
202
+ {
203
+ "category": "HARM_CATEGORY_HARASSMENT",
204
+ "threshold": "OFF"
205
+ },
206
+ {
207
+ "category": "HARM_CATEGORY_HATE_SPEECH",
208
+ "threshold": "OFF"
209
+ },
210
+ {
211
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
212
+ "threshold": "OFF"
213
+ },
214
+ {
215
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
216
+ "threshold": "OFF"
217
+ },
218
+ {
219
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
220
+ "threshold": 'OFF'
221
+ }
222
+ ]
223
+
224
+ key_manager = APIKeyManager() # 实例化 APIKeyManager,栈会在 __init__ 中初始化
225
+ current_api_key = key_manager.get_available_key()
226
+
227
+ # 初始化缓存管理器
228
+ CACHE_EXPIRY_TIME = int(os.environ.get("CACHE_EXPIRY_TIME", "1200")) # 默认20分钟
229
+ MAX_CACHE_ENTRIES = int(os.environ.get("MAX_CACHE_ENTRIES", "500")) # 默认最多缓存500条响应
230
+ REMOVE_CACHE_AFTER_USE = os.environ.get("REMOVE_CACHE_AFTER_USE", "true").lower() in ["true", "1", "yes"]
231
+
232
+ # 创建全局缓存字典,将作为缓存管理器的内部存储
233
+ # 注意:所有缓存操作都应通过 response_cache_manager 进行,不要直接操作此字典
234
+ response_cache: Dict[str, Dict[str, Any]] = {}
235
+
236
+ # 初始化缓存管理器,使用全局字典作为存储
237
+ response_cache_manager = ResponseCacheManager(
238
+ expiry_time=CACHE_EXPIRY_TIME,
239
+ max_entries=MAX_CACHE_ENTRIES,
240
+ remove_after_use=REMOVE_CACHE_AFTER_USE,
241
+ cache_dict=response_cache # 使用同一个字典实例,确保统一
242
+ )
243
+
244
+ # 活跃请求池 - 将作为活跃请求管理器的内部存储
245
+ # 注意:所有活跃请求操作都应通过 active_requests_manager 进行,不要直接操作此字典
246
+ active_requests_pool: Dict[str, asyncio.Task] = {}
247
+
248
+ # 初始化活跃请求管理器
249
+ active_requests_manager = ActiveRequestsManager(requests_pool=active_requests_pool)
250
+
251
+ # 添加API调用计数器
252
+ api_call_stats = {
253
+ 'last_24h': defaultdict(int), # 按小时统计过去24小时
254
+ 'hourly': defaultdict(int), # 按小时统计过去一小时
255
+ 'minute': defaultdict(int), # 按分钟统计过去一分钟
256
+ }
257
+
258
+ # 清理过期统计数据的函数
259
+ def clean_expired_stats():
260
+ now = datetime.now()
261
+
262
+ # 清理24小时前的数据
263
+ for hour_key in list(api_call_stats['last_24h'].keys()):
264
+ try:
265
+ hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
266
+ if (now - hour_time).total_seconds() > 24 * 3600: # 超过24小时
267
+ del api_call_stats['last_24h'][hour_key]
268
+ except ValueError:
269
+ # 如果键格式不正确,直接删除
270
+ del api_call_stats['last_24h'][hour_key]
271
+
272
+ # 清理一小时前的小时统计数据
273
+ one_hour_ago = now - timedelta(hours=1)
274
+ for hour_key in list(api_call_stats['hourly'].keys()):
275
+ try:
276
+ hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
277
+ if hour_time < one_hour_ago:
278
+ del api_call_stats['hourly'][hour_key]
279
+ except ValueError:
280
+ # 如果键格式不正确,直接删除
281
+ del api_call_stats['hourly'][hour_key]
282
+
283
+ # 清理一分钟前的分钟统计数据
284
+ one_minute_ago = now - timedelta(minutes=1)
285
+ for minute_key in list(api_call_stats['minute'].keys()):
286
+ try:
287
+ minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
288
+ if minute_time < one_minute_ago:
289
+ del api_call_stats['minute'][minute_key]
290
+ except ValueError:
291
+ # 如果键格式不正确,直接删除
292
+ del api_call_stats['minute'][minute_key]
293
+
294
+ # 更新API调用统计的函数
295
+ def update_api_call_stats():
296
+ now = datetime.now()
297
+ hour_key = now.strftime('%Y-%m-%d %H:00')
298
+ minute_key = now.strftime('%Y-%m-%d %H:%M')
299
+
300
+ # 检查并清理过期统计
301
+ clean_expired_stats()
302
+
303
+ # 更新统计
304
+ api_call_stats['last_24h'][hour_key] += 1
305
+ api_call_stats['hourly'][hour_key] += 1
306
+ api_call_stats['minute'][minute_key] += 1
307
+
308
+ log('info', "API调用统计已更新: 24小时=%s, 1小时=%s, 1分钟=%s" % (sum(api_call_stats['last_24h'].values()), sum(api_call_stats['hourly'].values()), sum(api_call_stats['minute'].values())))
309
+
310
+
311
+ def switch_api_key():
312
+ global current_api_key
313
+ key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
314
+ if key:
315
+ current_api_key = key
316
+ log('info', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
317
+ else:
318
+ log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
319
+
320
+
321
+ async def check_keys():
322
+ available_keys = []
323
+ for key in key_manager.api_keys:
324
+ is_valid = await test_api_key(key)
325
+ status_msg = "有效" if is_valid else "无效"
326
+ log('info', f"API Key {key[:10]}... {status_msg}.")
327
+ if is_valid:
328
+ available_keys.append(key)
329
+ if not available_keys:
330
+ log('error', "没有可用的 API 密钥!", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
331
+ return available_keys
332
+
333
+
334
+ # 存储版本信息的全局变量
335
+ local_version = "0.0.0"
336
+ remote_version = "0.0.0"
337
+ has_update = False
338
+
339
+ # 检查版本更新
340
+ async def check_version():
341
+ global local_version, remote_version, has_update
342
+ try:
343
+ # 读取本地版本
344
+ with open("version.txt", "r") as f:
345
+ version_line = f.read().strip()
346
+ local_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
347
+
348
+ # 获取远程版本
349
+ github_url = "https://raw.githubusercontent.com/wyeeeee/hajimi/refs/heads/main/version.txt"
350
+ response = requests.get(github_url, timeout=5)
351
+ if response.status_code == 200:
352
+ version_line = response.text.strip()
353
+ remote_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
354
+
355
+ # 比较版本号
356
+ local_parts = [int(x) for x in local_version.split(".")]
357
+ remote_parts = [int(x) for x in remote_version.split(".")]
358
+
359
+ # 确保两个列表长度相同
360
+ while len(local_parts) < len(remote_parts):
361
+ local_parts.append(0)
362
+ while len(remote_parts) < len(local_parts):
363
+ remote_parts.append(0)
364
+
365
+ # 比较版本号
366
+ for i in range(len(local_parts)):
367
+ if remote_parts[i] > local_parts[i]:
368
+ has_update = True
369
+ break
370
+ elif remote_parts[i] < local_parts[i]:
371
+ break
372
+
373
+ log('info', f"版本检查: 本地版本 {local_version}, 远程版本 {remote_version}, 有更新: {has_update}")
374
+ else:
375
+ log('warning', f"无法获取远程版本信息,HTTP状态码: {response.status_code}")
376
+ except Exception as e:
377
+ log('error', f"版本检查失败: {str(e)}")
378
+
379
+
380
+ # --------------- 工具函数 ---------------
381
+
382
+ def log(level: str, message: str, **extra):
383
+ """简化日志记录的统一函数"""
384
+ msg = format_log_message(level.upper(), message, extra=extra)
385
+ getattr(logger, level.lower())(msg)
386
+
387
+ def translate_error(message: str) -> str:
388
+ if "quota exceeded" in message.lower():
389
+ return "API 密钥配额已用尽"
390
+ if "invalid argument" in message.lower():
391
+ return "无效参数"
392
+ if "internal server error" in message.lower():
393
+ return "服务器内部错误"
394
+ if "service unavailable" in message.lower():
395
+ return "服务不可用"
396
+ return message
397
+
398
+ def create_chat_response(model: str, choices: list, id: str = None) -> ChatCompletionResponse:
399
+ """创建标准响应对象的工厂函数"""
400
+ return ChatCompletionResponse(
401
+ id=id or f"chatcmpl-{int(time.time()*1000)}",
402
+ object="chat.completion",
403
+ created=int(time.time()),
404
+ model=model,
405
+ choices=choices
406
+ )
407
+
408
+ def create_error_response(model: str, error_message: str) -> ChatCompletionResponse:
409
+ """创建错误响应对象的工厂函数"""
410
+ return create_chat_response(
411
+ model=model,
412
+ choices=[{
413
+ "index": 0,
414
+ "message": {
415
+ "role": "assistant",
416
+ "content": error_message
417
+ },
418
+ "finish_reason": "error"
419
+ }]
420
+ )
421
+
422
+ def handle_exception(exc_type, exc_value, exc_traceback):
423
+ if issubclass(exc_type, KeyboardInterrupt):
424
+ sys.excepthook(exc_type, exc_value, exc_traceback)
425
+ return
426
+ error_message = translate_error(str(exc_value))
427
+ log('error', f"未捕获的异常: {error_message}", status_code=500, error_message=error_message)
428
+
429
+ sys.excepthook = handle_exception
430
+
431
+ @app.on_event("startup")
432
+ async def startup_event():
433
+ log('info', "Starting Gemini API proxy...")
434
+
435
+ # 启动缓存清理定时任务
436
+ schedule_cache_cleanup()
437
+
438
+ # 检查版本
439
+ await check_version()
440
+
441
+ available_keys = await check_keys()
442
+ if available_keys:
443
+ key_manager.api_keys = available_keys
444
+ key_manager._reset_key_stack() # 启动时也确保创建随机栈
445
+ key_manager.show_all_keys()
446
+ log('info', f"可用 API 密钥数量:{len(key_manager.api_keys)}")
447
+ # MAX_RETRIES = len(key_manager.api_keys)
448
+ log('info', f"最大重试次数设置为:{len(key_manager.api_keys)}") # 添加日志
449
+ if key_manager.api_keys:
450
+ all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
451
+ GeminiClient.AVAILABLE_MODELS = [model.replace(
452
+ "models/", "") for model in all_models]
453
+ log('info', "Available models loaded.")
454
+
455
+ @app.get("/v1/models", response_model=ModelList)
456
+ def list_models():
457
+ log('info', "Received request to list models", extra={'request_type': 'list_models', 'status_code': 200})
458
+ return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in GeminiClient.AVAILABLE_MODELS])
459
+
460
+
461
+ async def verify_password(request: Request):
462
+ if PASSWORD:
463
+ auth_header = request.headers.get("Authorization")
464
+ if not auth_header or not auth_header.startswith("Bearer "):
465
+ raise HTTPException(
466
+ status_code=401, detail="Unauthorized: Missing or invalid token")
467
+ token = auth_header.split(" ")[1]
468
+ if token != PASSWORD:
469
+ raise HTTPException(
470
+ status_code=401, detail="Unauthorized: Invalid token")
471
+
472
+
473
+ @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
474
+ async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(verify_password)):
475
+ # 获取客户端IP
476
+ client_ip = http_request.client.host if http_request.client else "unknown"
477
+
478
+ # 流式请求直接处理,不使用缓存
479
+ if request.stream:
480
+ return await process_request(request, http_request, "stream")
481
+
482
+ # 生成完整缓存键 - 用于精确匹配
483
+ cache_key = generate_cache_key(request)
484
+
485
+ # 记录请求缓存键信息
486
+ log('info', f"请求缓存键: {cache_key[:8]}...",
487
+ extra={'cache_key': cache_key[:8], 'request_type': 'non-stream'})
488
+
489
+ # 检查精确缓存是否存在且未过期
490
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
491
+ if cache_hit:
492
+ # 精确缓存命中
493
+ log('info', f"精确缓存命中: {cache_key[:8]}...",
494
+ extra={'cache_operation': 'hit', 'request_type': 'non-stream'})
495
+
496
+ # 同时清理相关的活跃任务,避免后续请求等待已经不需要的任务
497
+ active_requests_manager.remove_by_prefix(f"cache:{cache_key}")
498
+
499
+ # 安全删除缓存
500
+ if cache_key in response_cache_manager.cache:
501
+ del response_cache_manager.cache[cache_key]
502
+ log('info', f"缓存使用后已删除: {cache_key[:8]}...",
503
+ extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
504
+
505
+ # 返回缓存响应
506
+ return cached_response
507
+
508
+ # 构建包含缓存键的活跃请求池键
509
+ pool_key = f"cache:{cache_key}"
510
+
511
+ # 查找所有使用相同缓存键的活跃任务
512
+ active_task = active_requests_manager.get(pool_key)
513
+ if active_task and not active_task.done():
514
+ log('info', f"发现相同请求的进行中任务",
515
+ extra={'request_type': 'non-stream', 'model': request.model})
516
+
517
+ # 等待已有任务完成
518
+ try:
519
+ # 设置超时,避免无限等待
520
+ await asyncio.wait_for(active_task, timeout=180)
521
+
522
+ # 通过缓存管理器获取已完成任务的结果
523
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
524
+ if cache_hit:
525
+ # 安全删除缓存
526
+ if cache_key in response_cache_manager.cache:
527
+ del response_cache_manager.cache[cache_key]
528
+ log('info', f"使用已完成任务的缓存后删除: {cache_key[:8]}...",
529
+ extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
530
+
531
+ return cached_response
532
+
533
+ # 如果缓存已被清除或不存在,使用任务结果
534
+ if active_task.done() and not active_task.cancelled():
535
+ result = active_task.result()
536
+ if result:
537
+ # log('info', f"使用已完成任务的原始结果",
538
+ # extra={'request_type': 'non-stream', 'model': request.model})
539
+
540
+ # 使用原始结果时,我们需要创建一个新的响应对象
541
+ # 避免使用可能已被其他请求修改的对象
542
+ new_response = ChatCompletionResponse(
543
+ id=f"chatcmpl-{int(time.time()*1000)}",
544
+ object="chat.completion",
545
+ created=int(time.time()),
546
+ model=result.model,
547
+ choices=result.choices
548
+ )
549
+
550
+ # 不要缓存此结果,因为它很可能是一个已存在但被使用后清除的缓存
551
+ return new_response
552
+ except (asyncio.TimeoutError, asyncio.CancelledError) as e:
553
+ # 任务超时或被取消的情况下,记录日志然后让代码继续执行
554
+ error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消"
555
+ log('warning', f"等待已有任务{error_type}: {pool_key}",
556
+ extra={'request_type': 'non-stream', 'model': request.model})
557
+
558
+ # 从活跃请求池移除该任务
559
+ if active_task.done() or active_task.cancelled():
560
+ active_requests_manager.remove(pool_key)
561
+ log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}",
562
+ extra={'request_type': 'non-stream'})
563
+
564
+ # 创建请求处理任务
565
+ process_task = asyncio.create_task(
566
+ process_request(request, http_request, "non-stream", cache_key=cache_key, client_ip=client_ip)
567
+ )
568
+
569
+ # 将任务添加到活跃请求池
570
+ active_requests_manager.add(pool_key, process_task)
571
+
572
+ # 等待任务完成
573
+ try:
574
+ response = await process_task
575
+ return response
576
+ except Exception as e:
577
+ # 如果任务失败,从活跃请求池中移除
578
+ active_requests_manager.remove(pool_key)
579
+
580
+ # 检查是否已有缓存的结果(可能是由另一个任务创建的)
581
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
582
+ if cache_hit:
583
+ log('info', f"任务失败但找到缓存,使用缓存结果: {cache_key[:8]}...",
584
+ extra={'request_type': 'non-stream', 'model': request.model})
585
+ return cached_response
586
+
587
+ # 重新抛出异常
588
+ raise
589
+
590
+ async def process_request(chat_request: ChatCompletionRequest, http_request: Request, request_type: Literal['stream', 'non-stream'], cache_key: str = None, client_ip: str = None):
591
+ """处理API请求的主函数,根据需要处理流式或非流式请求"""
592
+ global current_api_key
593
+
594
+ # 请求前基本检查
595
+ protect_from_abuse(
596
+ http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
597
+ if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
598
+ error_msg = "无效的模型"
599
+ extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
600
+ log('error', error_msg, extra=extra_log)
601
+ raise HTTPException(
602
+ status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
603
+
604
+ # 重置已尝试的密钥
605
+ key_manager.reset_tried_keys_for_request()
606
+
607
+ # 转换消息格式
608
+ contents, system_instruction = GeminiClient.convert_messages(
609
+ GeminiClient, chat_request.messages)
610
+
611
+ # 设置重试次数(使用可用API密钥数量作为最大重试次数)
612
+ retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1
613
+
614
+ # 尝试使用不同API密钥
615
+ for attempt in range(1, retry_attempts + 1):
616
+ # 获取下一个密钥
617
+ current_api_key = key_manager.get_available_key()
618
+
619
+ # 检查API密钥是否可用
620
+ if current_api_key is None:
621
+ log('warning', "没有可用的 API 密钥,跳过本次尝试",
622
+ extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
623
+ break
624
+
625
+ # 记录当前尝试的密钥信息
626
+ log('info', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...",
627
+ extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
628
+
629
+ # 服务器错误重试逻辑
630
+ server_error_retries = 3
631
+ for server_retry in range(1, server_error_retries + 1):
632
+ try:
633
+ # 根据请求类型分别处理
634
+ if chat_request.stream:
635
+ try:
636
+ return await process_stream_request(
637
+ chat_request,
638
+ http_request,
639
+ contents,
640
+ system_instruction,
641
+ current_api_key
642
+ )
643
+ except Exception as e:
644
+ # 捕获流式请求的异常,但不立即返回错误
645
+ # 记录错误并继续尝试下一个API密钥
646
+ error_detail = handle_gemini_error(e, current_api_key, key_manager)
647
+ log('error', f"流式请求失败: {error_detail}",
648
+ extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
649
+ # 不返回错误,而是抛出异常让外层循环处理
650
+ raise
651
+ else:
652
+ return await process_nonstream_request(
653
+ chat_request,
654
+ http_request,
655
+ request_type,
656
+ contents,
657
+ system_instruction,
658
+ current_api_key,
659
+ cache_key,
660
+ client_ip
661
+ )
662
+ except HTTPException as e:
663
+ if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
664
+ log('error', "客户端连接中断",
665
+ extra={'key': current_api_key[:8], 'request_type': request_type,
666
+ 'model': chat_request.model, 'status_code': 408})
667
+ raise
668
+ else:
669
+ raise
670
+ except Exception as e:
671
+ # 使用统一的API错误处理函数
672
+ error_result = await handle_api_error(
673
+ e,
674
+ current_api_key,
675
+ key_manager,
676
+ request_type,
677
+ chat_request.model,
678
+ server_retry - 1
679
+ )
680
+
681
+ # 如果需要删除缓存,清除缓存
682
+ if error_result.get('remove_cache', False) and cache_key and cache_key in response_cache_manager.cache:
683
+ log('info', f"因API错误,删除缓存: {cache_key[:8]}...",
684
+ extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
685
+ del response_cache_manager.cache[cache_key]
686
+
687
+ if error_result.get('should_retry', False):
688
+ # 服务器错误需要重试(等待已在handle_api_error中完成)
689
+ continue
690
+ elif error_result.get('should_switch_key', False) and attempt < retry_attempts:
691
+ # 跳出服务器错误重试循环,获取下一个可用密钥
692
+ log('info', f"API密钥 {current_api_key[:8]}... 失败,准备尝试下一个密钥",
693
+ extra={'key': current_api_key[:8], 'request_type': request_type})
694
+ break
695
+ else:
696
+ # 无法处理的错误或已达到重试上限
697
+ break
698
+
699
+ # 如果所有尝试都失败
700
+ msg = "所有API密钥均请求失败,请稍后重试"
701
+ log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
702
+
703
+ # 对于流式请求,创建一个特殊的StreamingResponse返回错误
704
+ if chat_request.stream:
705
+ async def error_generator():
706
+ error_json = json.dumps({'error': {'message': msg, 'type': 'api_error'}})
707
+ yield f"data: {error_json}\n\n"
708
+ yield "data: [DONE]\n\n"
709
+
710
+ return StreamingResponse(error_generator(), media_type="text/event-stream")
711
+ else:
712
+ # 非流式请求使用标准HTTP异常
713
+ raise HTTPException(
714
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)
715
+
716
+
717
+ @app.exception_handler(Exception)
718
+ async def global_exception_handler(request: Request, exc: Exception):
719
+ error_message = translate_error(str(exc))
720
+ extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
721
+ log('error', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
722
+ return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
723
+
724
+
725
+ @app.get("/", response_class=HTMLResponse)
726
+ async def root(request: Request):
727
+ # 先清理过期数据,确保统计数据是最新的
728
+ clean_expired_stats()
729
+ response_cache_manager.clean_expired() # 使用管理器清理缓存
730
+ active_requests_manager.clean_completed() # 使用管理器清理活跃请求
731
+ await check_version()
732
+ # 获取当前统计数据
733
+ now = datetime.now()
734
+
735
+ # 计算过去24小时的调用总数
736
+ last_24h_calls = sum(api_call_stats['last_24h'].values())
737
+
738
+ # 计算过去一小时内的调用总数
739
+ one_hour_ago = now - timedelta(hours=1)
740
+ hourly_calls = 0
741
+ for hour_key, count in api_call_stats['hourly'].items():
742
+ try:
743
+ hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
744
+ if hour_time >= one_hour_ago:
745
+ hourly_calls += count
746
+ except ValueError:
747
+ continue
748
+
749
+ # 计算过去一分钟内的调用总数
750
+ one_minute_ago = now - timedelta(minutes=1)
751
+ minute_calls = 0
752
+ for minute_key, count in api_call_stats['minute'].items():
753
+ try:
754
+ minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
755
+ if minute_time >= one_minute_ago:
756
+ minute_calls += count
757
+ except ValueError:
758
+ continue
759
+
760
+ # 获取最近的日志
761
+ recent_logs = log_manager.get_recent_logs(50) # 获取最近50条日志
762
+
763
+ # 获取缓存统计
764
+ total_cache = len(response_cache_manager.cache)
765
+ valid_cache = sum(1 for _, data in response_cache_manager.cache.items()
766
+ if time.time() < data.get('expiry_time', 0))
767
+ cache_by_model = {}
768
+
769
+ # 分析缓存数据
770
+ for _, cache_data in response_cache_manager.cache.items():
771
+ if time.time() < cache_data.get('expiry_time', 0):
772
+ # 按模型统计缓存
773
+ model = cache_data.get('response', {}).model
774
+ if model:
775
+ if model in cache_by_model:
776
+ cache_by_model[model] += 1
777
+ else:
778
+ cache_by_model[model] = 1
779
+
780
+ # 获取请求历史统计
781
+ history_count = len(client_request_history)
782
+
783
+ # 获取活跃请求统计
784
+ active_count = len(active_requests_manager.active_requests)
785
+ active_done = sum(1 for task in active_requests_manager.active_requests.values() if task.done())
786
+ active_pending = active_count - active_done
787
+
788
+ # 准备模板上下文
789
+ context = {
790
+ "key_count": len(key_manager.api_keys),
791
+ "model_count": len(GeminiClient.AVAILABLE_MODELS),
792
+ "retry_count": len(key_manager.api_keys),
793
+ "last_24h_calls": last_24h_calls,
794
+ "hourly_calls": hourly_calls,
795
+ "minute_calls": minute_calls,
796
+ "max_requests_per_minute": MAX_REQUESTS_PER_MINUTE,
797
+ "max_requests_per_day_per_ip": MAX_REQUESTS_PER_DAY_PER_IP,
798
+ "current_time": datetime.now().strftime('%H:%M:%S'),
799
+ "logs": recent_logs,
800
+ # 添加版本信息
801
+ "local_version": local_version,
802
+ "remote_version": remote_version,
803
+ "has_update": has_update,
804
+ # 添加缓存信息
805
+ "cache_entries": total_cache,
806
+ "valid_cache": valid_cache,
807
+ "expired_cache": total_cache - valid_cache,
808
+ "cache_expiry_time": CACHE_EXPIRY_TIME,
809
+ "max_cache_entries": MAX_CACHE_ENTRIES,
810
+ "cache_by_model": cache_by_model,
811
+ "request_history_count": history_count,
812
+ "enable_reconnect_detection": ENABLE_RECONNECT_DETECTION,
813
+ "remove_cache_after_use": REMOVE_CACHE_AFTER_USE,
814
+ # 添加活跃请求池信息
815
+ "active_count": active_count,
816
+ "active_done": active_done,
817
+ "active_pending": active_pending,
818
+ }
819
+
820
+ # 使用Jinja2模板引擎正确渲染HTML
821
+ return templates.TemplateResponse("index.html", {"request": request, **context})
822
+
823
+ # 客户端IP到最近请求的映射,用于识别重连请求
824
+ client_request_history: Dict[str, Dict[str, Any]] = {}
825
+ # 请求历史记录保留时间(秒)
826
+ REQUEST_HISTORY_EXPIRY_TIME = int(os.environ.get("REQUEST_HISTORY_EXPIRY_TIME", "600")) # 默认10分钟
827
+ # 是否启用重连检测
828
+ ENABLE_RECONNECT_DETECTION = os.environ.get("ENABLE_RECONNECT_DETECTION", "true").lower() in ["true", "1", "yes"]
829
+
830
+ # 定期清理缓存的定时任务
831
+ def schedule_cache_cleanup():
832
+ scheduler = BackgroundScheduler()
833
+ scheduler.add_job(response_cache_manager.clean_expired, 'interval', minutes=1) # 每分钟清理过期缓存
834
+ scheduler.add_job(active_requests_manager.clean_completed, 'interval', seconds=30) # 每30秒清理已完成的活跃请求
835
+ scheduler.add_job(active_requests_manager.clean_long_running, 'interval', minutes=5, args=[300]) # 每5分钟清理运行超过5分钟的任务
836
+ scheduler.add_job(clean_expired_stats, 'interval', minutes=5) # 每5分钟清理过期的统计数��
837
+ scheduler.start()
838
+
839
+ # 生成请求的唯一缓存键
840
+ def generate_cache_key(chat_request: ChatCompletionRequest) -> str:
841
+ # 创建包含请求关键信息的字典
842
+ request_data = {
843
+ 'model': chat_request.model,
844
+ 'messages': []
845
+ }
846
+
847
+ # 添加消息内容
848
+ for msg in chat_request.messages:
849
+ if isinstance(msg.content, str):
850
+ message_data = {'role': msg.role, 'content': msg.content}
851
+ request_data['messages'].append(message_data)
852
+ elif isinstance(msg.content, list):
853
+ content_list = []
854
+ for item in msg.content:
855
+ if item.get('type') == 'text':
856
+ content_list.append({'type': 'text', 'text': item.get('text')})
857
+ # 对于图像数据,我们只使用标识符而不是全部数据
858
+ elif item.get('type') == 'image_url':
859
+ image_data = item.get('image_url', {}).get('url', '')
860
+ if image_data.startswith('data:image/'):
861
+ # 对于base64图像,使用前32字符作为标识符
862
+ content_list.append({'type': 'image_url', 'hash': hashlib.md5(image_data[:32].encode()).hexdigest()})
863
+ else:
864
+ content_list.append({'type': 'image_url', 'url': image_data})
865
+ request_data['messages'].append({'role': msg.role, 'content': content_list})
866
+
867
+ # 将字典转换为JSON字符串并计算哈希值
868
+ json_data = json.dumps(request_data, sort_keys=True)
869
+ return hashlib.md5(json_data.encode()).hexdigest()
870
+
871
+ # 拆分process_request为更小的函数
872
+
873
+ async def process_stream_request(
874
+ chat_request: ChatCompletionRequest,
875
+ http_request: Request,
876
+ contents,
877
+ system_instruction,
878
+ current_api_key: str
879
+ ) -> StreamingResponse:
880
+ """处理流式API请求"""
881
+
882
+ # 创建一个直接流式响应的生成器函数
883
+ async def stream_response_generator():
884
+ # 如果启用了假流式模式,使用随机遍历API密钥的方式
885
+ if FAKE_STREAMING:
886
+ # 创建一个队列用于在任务之间传递数据
887
+ queue = asyncio.Queue()
888
+ keep_alive_task = None
889
+ api_request_task = None
890
+
891
+ try:
892
+ # 创建一个保持连接的任务,持续发送换行符
893
+ async def keep_alive_sender():
894
+ try:
895
+ # 创建一个Gemini客户端用于发送保持连接的换行符
896
+ keep_alive_client = GeminiClient(current_api_key)
897
+
898
+ # 启动保持连接的生成器
899
+ keep_alive_generator = keep_alive_client.stream_chat(
900
+ chat_request,
901
+ contents,
902
+ safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
903
+ system_instruction
904
+ )
905
+
906
+ # 持续发送换行符直到被取消
907
+ async for line in keep_alive_generator:
908
+ if line == "\n":
909
+ # 将换行符格式化为SSE格式
910
+ formatted_chunk = {
911
+ "id": "chatcmpl-keepalive",
912
+ "object": "chat.completion.chunk",
913
+ "created": int(time.time()),
914
+ "model": chat_request.model,
915
+ "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]
916
+ }
917
+ # 将格式化的换行符放入队列
918
+ await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
919
+ except asyncio.CancelledError:
920
+ log('info', "保持连接任务被取消",
921
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
922
+ raise
923
+ except Exception as e:
924
+ log('error', f"保持连接任务出错: {str(e)}",
925
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
926
+ # 将错误放入队列
927
+ await queue.put(None)
928
+ raise
929
+
930
+ # 创建一个任务来随机遍历API密钥并请求内容
931
+ async def api_request_handler():
932
+ success = False
933
+ try:
934
+ # 重置已尝试的密钥
935
+ key_manager.reset_tried_keys_for_request()
936
+
937
+ # 获取可用的API密钥
938
+ available_keys = key_manager.api_keys.copy()
939
+ random.shuffle(available_keys) # 随机打乱密钥顺序
940
+
941
+ # 遍历所有API密钥尝试获取响应
942
+ for attempt, api_key in enumerate(available_keys, 1):
943
+ try:
944
+ log('info', f"假流式模式: 尝试API密钥 {api_key[:8]}... ({attempt}/{len(available_keys)})",
945
+ extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
946
+
947
+ # 创建一个新的客户端使用当前API密钥
948
+ non_stream_client = GeminiClient(api_key)
949
+
950
+ # 使用非流式方式请求内容
951
+ response_content = await asyncio.to_thread(
952
+ non_stream_client.complete_chat,
953
+ chat_request,
954
+ contents,
955
+ safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
956
+ system_instruction
957
+ )
958
+
959
+ # 检查响应是否有效
960
+ if response_content and response_content.text:
961
+ log('info', f"假流式模式: API密钥 {api_key[:8]}... 成功获取响应",
962
+ extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
963
+
964
+ # 将完整响应分割成小块,模拟流式返回
965
+ full_text = response_content.text
966
+ chunk_size = max(len(full_text) // 10, 1) # 至少分成10块,每块至少1个字符
967
+
968
+ for i in range(0, len(full_text), chunk_size):
969
+ chunk = full_text[i:i+chunk_size]
970
+ formatted_chunk = {
971
+ "id": "chatcmpl-someid",
972
+ "object": "chat.completion.chunk",
973
+ "created": int(time.time()),
974
+ "model": chat_request.model,
975
+ "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
976
+ }
977
+ # 将格式化的内容块放入队列
978
+ await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
979
+
980
+ success = True
981
+ # 更新API调用统计
982
+ update_api_call_stats()
983
+ break # 成功获取响应,退出循环
984
+ else:
985
+ log('warning', f"假流式模式: API密钥 {api_key[:8]}... 返回空响应",
986
+ extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
987
+ except Exception as e:
988
+ error_detail = handle_gemini_error(e, api_key, key_manager)
989
+ log('error', f"假流式模式: API密钥 {api_key[:8]}... 请求失败: {error_detail}",
990
+ extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
991
+ # 继续尝试下一个API密钥
992
+
993
+ # 如果所有API密钥都尝试失败
994
+ if not success:
995
+ error_msg = "所有API密钥均请求失败,请稍后重试"
996
+ log('error', error_msg,
997
+ extra={'key': 'ALL', 'request_type': 'fake-stream', 'model': chat_request.model})
998
+
999
+ # 添加错误信息到队列
1000
+ error_json = {
1001
+ "id": "chatcmpl-error",
1002
+ "object": "chat.completion.chunk",
1003
+ "created": int(time.time()),
1004
+ "model": chat_request.model,
1005
+ "choices": [{"delta": {"content": f"\n\n[错误: {error_msg}]"}, "index": 0, "finish_reason": "error"}]
1006
+ }
1007
+ await queue.put(f"data: {json.dumps(error_json)}\n\n")
1008
+
1009
+ # 添加完成标记到队列
1010
+ await queue.put("data: [DONE]\n\n")
1011
+ # 添加None表示队列结束
1012
+ await queue.put(None)
1013
+
1014
+ except asyncio.CancelledError:
1015
+ log('info', "API请求任务被取消",
1016
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1017
+ # 添加None表示队列结束
1018
+ await queue.put(None)
1019
+ raise
1020
+ except Exception as e:
1021
+ log('error', f"API请求任务出错: {str(e)}",
1022
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1023
+ # 添加错误信息到队列
1024
+ error_json = {
1025
+ "id": "chatcmpl-error",
1026
+ "object": "chat.completion.chunk",
1027
+ "created": int(time.time()),
1028
+ "model": chat_request.model,
1029
+ "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
1030
+ }
1031
+ await queue.put(f"data: {json.dumps(error_json)}\n\n")
1032
+ await queue.put("data: [DONE]\n\n")
1033
+ # 添加None表示队列结束
1034
+ await queue.put(None)
1035
+ raise
1036
+
1037
+ # 启动保持连接的任务
1038
+ keep_alive_task = asyncio.create_task(keep_alive_sender())
1039
+ # 启动API请求任务
1040
+ api_request_task = asyncio.create_task(api_request_handler())
1041
+
1042
+ # 从队列中获取数据并发送给客户端
1043
+ while True:
1044
+ chunk = await queue.get()
1045
+ if chunk is None: # None表示队列结束
1046
+ break
1047
+ yield chunk
1048
+
1049
+ # 如果API请求任务已完成,取消保持连接任务
1050
+ if api_request_task.done() and not keep_alive_task.done():
1051
+ keep_alive_task.cancel()
1052
+
1053
+ except asyncio.CancelledError:
1054
+ log('info', "流式响应生成器被取消",
1055
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1056
+ # 取消所有任务
1057
+ if keep_alive_task and not keep_alive_task.done():
1058
+ keep_alive_task.cancel()
1059
+ if api_request_task and not api_request_task.done():
1060
+ api_request_task.cancel()
1061
+ except Exception as e:
1062
+ log('error', f"流式响应生成器出错: {str(e)}",
1063
+ extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1064
+ # 取消所有任务
1065
+ if keep_alive_task and not keep_alive_task.done():
1066
+ keep_alive_task.cancel()
1067
+ if api_request_task and not api_request_task.done():
1068
+ api_request_task.cancel()
1069
+ # 发送错误信息给客户端
1070
+ error_json = {
1071
+ "id": "chatcmpl-error",
1072
+ "object": "chat.completion.chunk",
1073
+ "created": int(time.time()),
1074
+ "model": chat_request.model,
1075
+ "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
1076
+ }
1077
+ yield f"data: {json.dumps(error_json)}\n\n"
1078
+ yield "data: [DONE]\n\n"
1079
+ finally:
1080
+ # 确保所有任务都被取消
1081
+ if keep_alive_task and not keep_alive_task.done():
1082
+ keep_alive_task.cancel()
1083
+ if api_request_task and not api_request_task.done():
1084
+ api_request_task.cancel()
1085
+ else:
1086
+ # 原始流式请求处理逻辑
1087
+ gemini_client = GeminiClient(current_api_key)
1088
+ success = False
1089
+
1090
+ try:
1091
+ # 直接迭代生成器并发送响应块
1092
+ async for chunk in gemini_client.stream_chat(
1093
+ chat_request,
1094
+ contents,
1095
+ safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
1096
+ system_instruction
1097
+ ):
1098
+ # 空字符串跳过
1099
+ if not chunk:
1100
+ continue
1101
+
1102
+ formatted_chunk = {
1103
+ "id": "chatcmpl-someid",
1104
+ "object": "chat.completion.chunk",
1105
+ "created": int(time.time()),
1106
+ "model": chat_request.model,
1107
+ "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
1108
+ }
1109
+ success = True # ��要有一个chunk成功,就标记为成功
1110
+ yield f"data: {json.dumps(formatted_chunk)}\n\n"
1111
+
1112
+ # 如果成功获取到响应,更新API调用统计
1113
+ if success:
1114
+ update_api_call_stats()
1115
+
1116
+ yield "data: [DONE]\n\n"
1117
+
1118
+ except asyncio.CancelledError:
1119
+ extra_log_cancel = {'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model, 'error_message': '客户端已断开连接'}
1120
+ log('info', "客户端连接已中断", extra=extra_log_cancel)
1121
+ except Exception as e:
1122
+ error_detail = handle_gemini_error(e, current_api_key, key_manager)
1123
+ log('error', f"流式请求失败: {error_detail}",
1124
+ extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
1125
+ # 发送错误信息给客户端
1126
+ error_json = {
1127
+ "id": "chatcmpl-error",
1128
+ "object": "chat.completion.chunk",
1129
+ "created": int(time.time()),
1130
+ "model": chat_request.model,
1131
+ "choices": [{"delta": {"content": f"\n\n[错误: {error_detail}]"}, "index": 0, "finish_reason": "error"}]
1132
+ }
1133
+ yield f"data: {json.dumps(error_json)}\n\n"
1134
+ yield "data: [DONE]\n\n"
1135
+ # 重新抛出异常,这样process_request可以捕获它
1136
+ raise e
1137
+
1138
+ return StreamingResponse(stream_response_generator(), media_type="text/event-stream")
1139
+
1140
+ async def run_gemini_completion(
1141
+ gemini_client,
1142
+ chat_request: ChatCompletionRequest,
1143
+ contents,
1144
+ system_instruction,
1145
+ request_type: str,
1146
+ current_api_key: str
1147
+ ):
1148
+ """运行Gemini非流式请求"""
1149
+ # 记录函数调用状态
1150
+ run_fn = run_gemini_completion
1151
+
1152
+ try:
1153
+ # 创建一个不会被客户端断开影响的任务
1154
+ response_future = asyncio.create_task(
1155
+ asyncio.to_thread(
1156
+ gemini_client.complete_chat,
1157
+ chat_request,
1158
+ contents,
1159
+ safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
1160
+ system_instruction
1161
+ )
1162
+ )
1163
+
1164
+ # 使用shield防止任务被外部取消
1165
+ response_content = await asyncio.shield(response_future)
1166
+
1167
+ # 只在第一次调用时记录完成日志
1168
+ if not hasattr(run_fn, 'logged_complete'):
1169
+ log('info', "非流式请求成功完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1170
+ run_fn.logged_complete = True
1171
+ return response_content
1172
+ except asyncio.CancelledError:
1173
+ # 即使任务被取消,我们也确保正在进行的API请求能够完成
1174
+ if 'response_future' in locals() and not response_future.done():
1175
+ try:
1176
+ # 使用shield确保任务不被取消,并等待它完成
1177
+ response_content = await asyncio.shield(response_future)
1178
+ log('info', "API请求在客户端断开后完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1179
+ return response_content
1180
+ except Exception as e:
1181
+ extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': f'API请求在客户端断开后失败: {str(e)}'}
1182
+ log('info', "API调用因客户端断开而失败", extra=extra_log_gemini_cancel)
1183
+ raise
1184
+
1185
+ # 如果任务尚未开始或已经失败,记录日志
1186
+ extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
1187
+ log('info', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
1188
+ raise
1189
+
1190
+ async def check_client_disconnect(http_request: Request, current_api_key: str, request_type: str, model: str):
1191
+ """检查客户端是否断开连接"""
1192
+ while True:
1193
+ if await http_request.is_disconnected():
1194
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': model, 'error_message': '检测到客户端断开连接'}
1195
+ log('info', "客户端连接已中断,等待API请求完成", extra=extra_log)
1196
+ return True
1197
+ await asyncio.sleep(0.5)
1198
+
1199
+ async def handle_client_disconnect(
1200
+ gemini_task: asyncio.Task,
1201
+ chat_request: ChatCompletionRequest,
1202
+ request_type: str,
1203
+ current_api_key: str,
1204
+ cache_key: str = None,
1205
+ client_ip: str = None
1206
+ ):
1207
+
1208
+ try:
1209
+ # 等待API任务完成,使用shield防止它被取消
1210
+ response_content = await asyncio.shield(gemini_task)
1211
+
1212
+ # 检查响应文本��否为空
1213
+ if response_content is None or response_content.text == "":
1214
+ if response_content is None:
1215
+ log('info', "客户端断开后API任务返回None",
1216
+ extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1217
+ else:
1218
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
1219
+ log('info', "客户端断开后Gemini API 返回空响应", extra=extra_log)
1220
+
1221
+ # 删除任何现有缓存,因为响应为空
1222
+ if cache_key and cache_key in response_cache_manager.cache:
1223
+ log('info', f"因空响应,删除缓存: {cache_key[:8]}...",
1224
+ extra={'cache_operation': 'remove-on-empty', 'request_type': request_type})
1225
+ del response_cache_manager.cache[cache_key]
1226
+
1227
+ # 返回错误响应而不是None
1228
+ return create_error_response(chat_request.model, "AI未返回任何内容,请重试")
1229
+
1230
+ # 首先检查是否有现有缓存
1231
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
1232
+ if cache_hit:
1233
+ log('info', f"客户端断开但找到已存在缓存,将删除: {cache_key[:8]}...",
1234
+ extra={'cache_operation': 'disconnect-found-cache', 'request_type': request_type})
1235
+
1236
+ # 安全删除缓存
1237
+ if cache_key in response_cache_manager.cache:
1238
+ del response_cache_manager.cache[cache_key]
1239
+
1240
+ # 不返回缓存,而是创建新响应并缓存
1241
+
1242
+ # 创建新响应
1243
+ # log('info', f"客户端断开后创建新缓存: {cache_key[:8] if cache_key else 'none'}...",
1244
+ # extra={'cache_operation': 'create-after-disconnect', 'request_type': request_type})
1245
+ response = create_response(chat_request, response_content)
1246
+
1247
+ # 客户端已断开,此响应不会实际发送,可以考虑将其缓存以供后续使用
1248
+ # 如果确实需要缓存,则可以取消下面的注释
1249
+ # cache_response(response, cache_key, client_ip)
1250
+
1251
+ return response
1252
+ except asyncio.CancelledError:
1253
+ # 对于取消异常,仍然尝试继续完成任务
1254
+ log('info', "客户端断开后任务被取消,但我们仍会尝试完成",
1255
+ extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1256
+
1257
+ # 检查任务是否已经完成
1258
+ if gemini_task.done() and not gemini_task.cancelled():
1259
+ try:
1260
+ response_content = gemini_task.result()
1261
+
1262
+ # 首先检查是否有现有缓存
1263
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
1264
+ if cache_hit:
1265
+ log('info', f"任务被取消但找到已存在缓存,将删除: {cache_key[:8]}...",
1266
+ extra={'cache_operation': 'cancel-found-cache', 'request_type': request_type})
1267
+
1268
+ # 安全删除缓存
1269
+ if cache_key in response_cache_manager.cache:
1270
+ del response_cache_manager.cache[cache_key]
1271
+
1272
+ # 创建但不缓存响应
1273
+ response = create_response(chat_request, response_content)
1274
+ return response
1275
+ except Exception as inner_e:
1276
+ log('error', f"客户端断开后从已完成任务获取结果失败: {str(inner_e)}",
1277
+ extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1278
+
1279
+ # 删除缓存,因为出现错误
1280
+ if cache_key and cache_key in response_cache_manager.cache:
1281
+ log('info', f"因任务获取结果失败,删除缓存: {cache_key[:8]}...",
1282
+ extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
1283
+ del response_cache_manager.cache[cache_key]
1284
+
1285
+ # 创建错误响应而不是返回None
1286
+ return create_error_response(chat_request.model, "请求处理过程中发生错误,请重试")
1287
+ except Exception as e:
1288
+ # 处理API任务异常
1289
+ error_msg = str(e)
1290
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': error_msg}
1291
+ log('error', f"客户端断开后处理API响应时出错: {error_msg}", extra=extra_log)
1292
+
1293
+ # 删除缓存,因为出现错误
1294
+ if cache_key and cache_key in response_cache_manager.cache:
1295
+ log('info', f"因API响应错误,删除缓存: {cache_key[:8]}...",
1296
+ extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
1297
+ del response_cache_manager.cache[cache_key]
1298
+
1299
+ # 创建错误响应而不是返回None
1300
+ return create_error_response(chat_request.model, f"请求处理错误: {error_msg}")
1301
+
1302
+ async def process_nonstream_request(
1303
+ chat_request: ChatCompletionRequest,
1304
+ http_request: Request,
1305
+ request_type: str,
1306
+ contents,
1307
+ system_instruction,
1308
+ current_api_key: str,
1309
+ cache_key: str = None,
1310
+ client_ip: str = None
1311
+ ):
1312
+ """处理非流式API请求"""
1313
+ gemini_client = GeminiClient(current_api_key)
1314
+
1315
+ # 创建任务
1316
+ gemini_task = asyncio.create_task(
1317
+ run_gemini_completion(
1318
+ gemini_client,
1319
+ chat_request,
1320
+ contents,
1321
+ system_instruction,
1322
+ request_type,
1323
+ current_api_key
1324
+ )
1325
+ )
1326
+
1327
+ disconnect_task = asyncio.create_task(
1328
+ check_client_disconnect(
1329
+ http_request,
1330
+ current_api_key,
1331
+ request_type,
1332
+ chat_request.model
1333
+ )
1334
+ )
1335
+
1336
+ try:
1337
+ # 先等待看是否API任务先完成,或者客户端先断开连接
1338
+ done, pending = await asyncio.wait(
1339
+ [gemini_task, disconnect_task],
1340
+ return_when=asyncio.FIRST_COMPLETED
1341
+ )
1342
+
1343
+ if disconnect_task in done:
1344
+ # 客户端已断开连接,但我们仍继续完成API请求以便缓存结果
1345
+ return await handle_client_disconnect(
1346
+ gemini_task,
1347
+ chat_request,
1348
+ request_type,
1349
+ current_api_key,
1350
+ cache_key,
1351
+ client_ip
1352
+ )
1353
+ else:
1354
+ # API任务先完成,取消断开检测任务
1355
+ disconnect_task.cancel()
1356
+
1357
+ # 获取响应内容
1358
+ response_content = await gemini_task
1359
+
1360
+ # 检查缓存是否已经存在,如果存在则不再创建新缓存
1361
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
1362
+ if cache_hit:
1363
+ log('info', f"缓存已存在,直接返回: {cache_key[:8]}...",
1364
+ extra={'cache_operation': 'use-existing', 'request_type': request_type})
1365
+
1366
+ # 安全删除缓存
1367
+ if cache_key in response_cache_manager.cache:
1368
+ del response_cache_manager.cache[cache_key]
1369
+ log('info', f"缓存使用后已删除: {cache_key[:8]}...",
1370
+ extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
1371
+
1372
+ return cached_response
1373
+
1374
+ # 创建响应
1375
+ response = create_response(chat_request, response_content)
1376
+
1377
+ # 缓存响应
1378
+ cache_response(response, cache_key, client_ip)
1379
+
1380
+ # 立即删除缓存,确保只能使用一次
1381
+ if cache_key and cache_key in response_cache_manager.cache:
1382
+ del response_cache_manager.cache[cache_key]
1383
+ log('info', f"缓存创建后立即删除: {cache_key[:8]}...",
1384
+ extra={'cache_operation': 'store-and-remove', 'request_type': request_type})
1385
+
1386
+ # 返回响应
1387
+ return response
1388
+
1389
+ except asyncio.CancelledError:
1390
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消"}
1391
+ log('info', "请求取消", extra=extra_log)
1392
+
1393
+ # 在请求被取消时先检查缓存中是否已有结果
1394
+ cached_response, cache_hit = response_cache_manager.get(cache_key)
1395
+ if cache_hit:
1396
+ log('info', f"请求取消但找到有效缓存,使用缓存响应: {cache_key[:8]}...",
1397
+ extra={'cache_operation': 'use-cache-on-cancel', 'request_type': request_type})
1398
+
1399
+ # 安全删除缓存
1400
+ if cache_key in response_cache_manager.cache:
1401
+ del response_cache_manager.cache[cache_key]
1402
+ log('info', f"缓存使用后已删除: {cache_key[:8]}...",
1403
+ extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
1404
+
1405
+ return cached_response
1406
+
1407
+ # 尝试完成正在进行的API请求
1408
+ if not gemini_task.done():
1409
+ log('info', "请求取消但API请求尚未完成,继续等待...",
1410
+ extra={'key': current_api_key[:8], 'request_type': request_type})
1411
+
1412
+ # 使用shield确保任务不会被取消
1413
+ response_content = await asyncio.shield(gemini_task)
1414
+
1415
+ # 创建响应
1416
+ response = create_response(chat_request, response_content)
1417
+
1418
+ # 不缓存这个响应,直接返回
1419
+ return response
1420
+ else:
1421
+ # 任务已完成,获取结果
1422
+ response_content = gemini_task.result()
1423
+
1424
+ # 创建响应
1425
+ response = create_response(chat_request, response_content)
1426
+
1427
+ # 不缓存这个响应,直接返回
1428
+ return response
1429
+
1430
+ except HTTPException as e:
1431
+ if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
1432
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
1433
+ 'status_code': 408, 'error_message': '客户端连接中断'}
1434
+ log('error', "客户端连接中断,终止后续重试", extra=extra_log)
1435
+ raise
1436
+ else:
1437
+ raise
1438
+
1439
+ # 添加通用响应处理函数
1440
+ def create_response(
1441
+ chat_request, response_content
1442
+ ):
1443
+ """创建标准响应对象但不缓存"""
1444
+ # 创建响应对象
1445
+ return create_chat_response(
1446
+ model=chat_request.model,
1447
+ choices=[{
1448
+ "index": 0,
1449
+ "message": {
1450
+ "role": "assistant",
1451
+ "content": response_content.text
1452
+ },
1453
+ "finish_reason": "stop"
1454
+ }]
1455
+ )
1456
+
1457
+ def cache_response(response, cache_key, client_ip):
1458
+ """将响应存入缓存"""
1459
+ if not cache_key:
1460
+ return
1461
+
1462
+ # 先检查缓存是否已存在
1463
+ existing_cache = cache_key in response_cache_manager.cache
1464
+
1465
+ if existing_cache:
1466
+ log('info', f"缓存已存在,跳过存储: {cache_key[:8]}...",
1467
+ extra={'cache_operation': 'skip-existing', 'request_type': 'non-stream'})
1468
+ else:
1469
+ response_cache_manager.store(cache_key, response, client_ip)
1470
+ log('info', f"API响应已缓存: {cache_key[:8]}...",
1471
+ extra={'cache_operation': 'store-new', 'request_type': 'non-stream'})
1472
+
1473
+ # 更新API调用统计
1474
+ update_api_call_stats()
1475
+
1476
+ # 统一的API错误处理函数
1477
+ async def handle_api_error(e, api_key, key_manager, request_type, model, retry_count=0):
1478
+ """统一处理API错误,对500和503错误实现自动重试机制"""
1479
+ error_detail = handle_gemini_error(e, api_key, key_manager)
1480
+
1481
+ # 处理500和503服务器错误
1482
+ if isinstance(e, requests.exceptions.HTTPError) and ('500' in str(e) or '503' in str(e)):
1483
+ status_code = '500' if '500' in str(e) else '503'
1484
+
1485
+ # 最多重试3次
1486
+ if retry_count < 3:
1487
+ wait_time = min(RETRY_DELAY * (2 ** retry_count), MAX_RETRY_DELAY)
1488
+ log('warning', f"Gemini服务器错误({status_code}),等待{wait_time}秒后重试 ({retry_count+1}/3)",
1489
+ key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
1490
+
1491
+ # 等待后返回重试信号
1492
+ await asyncio.sleep(wait_time)
1493
+ return {'should_retry': True, 'error': error_detail, 'remove_cache': False}
1494
+
1495
+ # 重试次数用尽,直接返回错误状态码
1496
+ log('error', f"服务器错误({status_code})重试{retry_count}次后仍然失败",
1497
+ key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
1498
+
1499
+ # 不建议切换密钥,直接抛出HTTP异常
1500
+ raise HTTPException(status_code=int(status_code),
1501
+ detail=f"Gemini API 服务器错误({status_code}),请稍后重试")
1502
+
1503
+ # 对于其他错误,返回切换密钥的信号
1504
+ log('error', f"API错误: {error_detail}",
1505
+ key=api_key[:8], request_type=request_type, model=model, error_message=error_detail)
1506
+ return {'should_retry': False, 'should_switch_key': True, 'error': error_detail, 'remove_cache': True}
app/models.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Optional, Union, Literal
2
+ from pydantic import BaseModel, Field
3
+
4
+ class Message(BaseModel):
5
+ role: str
6
+ content: str
7
+
8
+ class ChatCompletionRequest(BaseModel):
9
+ model: str
10
+ messages: List[Message]
11
+ temperature: float = 0.7
12
+ top_p: Optional[float] = 1.0
13
+ n: int = 1
14
+ stream: bool = False
15
+ stop: Optional[Union[str, List[str]]] = None
16
+ max_tokens: Optional[int] = None
17
+ presence_penalty: Optional[float] = 0.0
18
+ frequency_penalty: Optional[float] = 0.0
19
+
20
+ class Choice(BaseModel):
21
+ index: int
22
+ message: Message
23
+ finish_reason: Optional[str] = None
24
+
25
+ class Usage(BaseModel):
26
+ prompt_tokens: int = 0
27
+ completion_tokens: int = 0
28
+ total_tokens: int = 0
29
+
30
+ class ChatCompletionResponse(BaseModel):
31
+ id: str
32
+ object: Literal["chat.completion"]
33
+ created: int
34
+ model: str
35
+ choices: List[Choice]
36
+ usage: Usage = Field(default_factory=Usage)
37
+
38
+ class ErrorResponse(BaseModel):
39
+ message: str
40
+ type: str
41
+ param: Optional[str] = None
42
+ code: Optional[str] = None
43
+
44
+ class ModelList(BaseModel):
45
+ object: str = "list"
46
+ data: List[Dict]
app/utils.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from fastapi import HTTPException, Request
3
+ import time
4
+ import re
5
+ from datetime import datetime, timedelta
6
+ from apscheduler.schedulers.background import BackgroundScheduler
7
+ import os
8
+ import requests
9
+ import httpx
10
+ from threading import Lock
11
+ import logging
12
+ import sys
13
+ from collections import deque
14
+
15
+ DEBUG = os.environ.get("DEBUG", "false").lower() == "true"
16
+ LOG_FORMAT_DEBUG = '%(asctime)s - %(levelname)s - [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s - %(error_message)s'
17
+ LOG_FORMAT_NORMAL = '[%(asctime)s] [%(levelname)s] [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s'
18
+
19
+ # 配置 logger
20
+ logger = logging.getLogger("my_logger")
21
+ logger.setLevel(logging.DEBUG)
22
+
23
+ # 控制台处理器
24
+ console_handler = logging.StreamHandler()
25
+ console_formatter = logging.Formatter('%(message)s')
26
+ console_handler.setFormatter(console_formatter)
27
+ logger.addHandler(console_handler)
28
+
29
+ # 日志缓存,用于在网页上显示最近的日志
30
+ class LogManager:
31
+ def __init__(self, max_logs=100):
32
+ self.logs = deque(maxlen=max_logs) # 使用双端队列存储最近的日志
33
+ self.lock = Lock()
34
+
35
+ def add_log(self, log_entry):
36
+ with self.lock:
37
+ self.logs.append(log_entry)
38
+
39
+ def get_recent_logs(self, count=50):
40
+ with self.lock:
41
+ return list(self.logs)[-count:]
42
+
43
+ # 创建日志管理器实例
44
+ log_manager = LogManager()
45
+
46
+ def format_log_message(level, message, extra=None):
47
+ extra = extra or {}
48
+ log_values = {
49
+ 'asctime': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
50
+ 'levelname': level,
51
+ 'key': extra.get('key', 'N/A'),
52
+ 'request_type': extra.get('request_type', 'N/A'),
53
+ 'model': extra.get('model', 'N/A'),
54
+ 'status_code': extra.get('status_code', 'N/A'),
55
+ 'error_message': extra.get('error_message', ''),
56
+ 'message': message
57
+ }
58
+ log_format = LOG_FORMAT_DEBUG if DEBUG else LOG_FORMAT_NORMAL
59
+ formatted_log = log_format % log_values
60
+
61
+ # 将格式化后的日志添加到日志管理器
62
+ log_entry = {
63
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
64
+ 'level': level,
65
+ 'key': extra.get('key', 'N/A'),
66
+ 'request_type': extra.get('request_type', 'N/A'),
67
+ 'model': extra.get('model', 'N/A'),
68
+ 'status_code': extra.get('status_code', 'N/A'),
69
+ 'message': message,
70
+ 'error_message': extra.get('error_message', ''),
71
+ 'formatted': formatted_log
72
+ }
73
+ log_manager.add_log(log_entry)
74
+
75
+ return formatted_log
76
+
77
+
78
+ class APIKeyManager:
79
+ def __init__(self):
80
+ self.api_keys = re.findall(
81
+ r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('GEMINI_API_KEYS', ""))
82
+ self.key_stack = [] # 初始化密钥栈
83
+ self._reset_key_stack() # 初始化时创建随机密钥栈
84
+ # self.api_key_blacklist = set()
85
+ # self.api_key_blacklist_duration = 60
86
+ self.scheduler = BackgroundScheduler()
87
+ self.scheduler.start()
88
+ self.tried_keys_for_request = set() # 用于跟踪当前请求尝试中已试过的 key
89
+
90
+ def _reset_key_stack(self):
91
+ """创建并随机化密钥栈"""
92
+ shuffled_keys = self.api_keys[:] # 创建 api_keys 的副本以避免直接修改原列表
93
+ random.shuffle(shuffled_keys)
94
+ self.key_stack = shuffled_keys
95
+
96
+
97
+ def get_available_key(self):
98
+ """从栈顶获取密钥,栈空时重新生成 (修改后)"""
99
+ while self.key_stack:
100
+ key = self.key_stack.pop()
101
+ # if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
102
+ if key not in self.tried_keys_for_request:
103
+ self.tried_keys_for_request.add(key)
104
+ return key
105
+
106
+ if not self.api_keys:
107
+ log_msg = format_log_message('ERROR', "没有配置任何 API 密钥!")
108
+ logger.error(log_msg)
109
+ return None
110
+
111
+ self._reset_key_stack() # 重新生成密钥栈
112
+
113
+ # 再次尝试从新栈中获取密钥 (迭代一次)
114
+ while self.key_stack:
115
+ key = self.key_stack.pop()
116
+ # if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
117
+ if key not in self.tried_keys_for_request:
118
+ self.tried_keys_for_request.add(key)
119
+ return key
120
+
121
+ return None
122
+
123
+
124
+ def show_all_keys(self):
125
+ log_msg = format_log_message('INFO', f"当前可用API key个数: {len(self.api_keys)} ")
126
+ logger.info(log_msg)
127
+ for i, api_key in enumerate(self.api_keys):
128
+ log_msg = format_log_message('INFO', f"API Key{i}: {api_key[:8]}...{api_key[-3:]}")
129
+ logger.info(log_msg)
130
+
131
+ # def blacklist_key(self, key):
132
+ # log_msg = format_log_message('WARNING', f"{key[:8]} → 暂时禁用 {self.api_key_blacklist_duration} 秒")
133
+ # logger.warning(log_msg)
134
+ # self.api_key_blacklist.add(key)
135
+ # self.scheduler.add_job(lambda: self.api_key_blacklist.discard(key), 'date',
136
+ # run_date=datetime.now() + timedelta(seconds=self.api_key_blacklist_duration))
137
+
138
+ def reset_tried_keys_for_request(self):
139
+ """在新的请求尝试时重置已尝试的 key 集合"""
140
+ self.tried_keys_for_request = set()
141
+
142
+
143
+ def handle_gemini_error(error, current_api_key, key_manager) -> str:
144
+ if isinstance(error, requests.exceptions.HTTPError):
145
+ status_code = error.response.status_code
146
+ if status_code == 400:
147
+ try:
148
+ error_data = error.response.json()
149
+ if 'error' in error_data:
150
+ if error_data['error'].get('code') == "invalid_argument":
151
+ error_message = "无效的 API 密钥"
152
+ extra_log_invalid_key = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
153
+ log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 无效,可能已过期或被删除", extra=extra_log_invalid_key)
154
+ logger.error(log_msg)
155
+ # key_manager.blacklist_key(current_api_key)
156
+
157
+ return error_message
158
+ error_message = error_data['error'].get(
159
+ 'message', 'Bad Request')
160
+ extra_log_400 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
161
+ log_msg = format_log_message('WARNING', f"400 错误请求: {error_message}", extra=extra_log_400)
162
+ logger.warning(log_msg)
163
+ return f"400 错误请求: {error_message}"
164
+ except ValueError:
165
+ error_message = "400 错误请求:响应不是有效的JSON格式"
166
+ extra_log_400_json = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
167
+ log_msg = format_log_message('WARNING', error_message, extra=extra_log_400_json)
168
+ logger.warning(log_msg)
169
+ return error_message
170
+
171
+ elif status_code == 429:
172
+ error_message = "API 密钥配额已用尽或其他原因"
173
+ extra_log_429 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
174
+ log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 429 官方资源耗尽或其他原因", extra=extra_log_429)
175
+ logger.warning(log_msg)
176
+ # key_manager.blacklist_key(current_api_key)
177
+
178
+ return error_message
179
+
180
+ elif status_code == 403:
181
+ error_message = "权限被拒绝"
182
+ extra_log_403 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
183
+ log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 403 权限被拒绝", extra=extra_log_403)
184
+ logger.error(log_msg)
185
+ # key_manager.blacklist_key(current_api_key)
186
+
187
+ return error_message
188
+ elif status_code == 500:
189
+ error_message = "服务器内部错误"
190
+ extra_log_500 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
191
+ log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 500 服务器内部错误", extra=extra_log_500)
192
+ logger.warning(log_msg)
193
+
194
+ return "Gemini API 内部错误"
195
+
196
+ elif status_code == 503:
197
+ error_message = "服务不可用"
198
+ extra_log_503 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
199
+ log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 503 服务不可用", extra=extra_log_503)
200
+ logger.warning(log_msg)
201
+
202
+ return "Gemini API 服务不可用"
203
+ else:
204
+ error_message = f"未知错误: {status_code}"
205
+ extra_log_other = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
206
+ log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → {status_code} 未知错误", extra=extra_log_other)
207
+ logger.warning(log_msg)
208
+
209
+ return f"未知错误/模型不可用: {status_code}"
210
+
211
+ elif isinstance(error, requests.exceptions.ConnectionError):
212
+ error_message = "连接错误"
213
+ log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
214
+ logger.warning(log_msg)
215
+ return error_message
216
+
217
+ elif isinstance(error, requests.exceptions.Timeout):
218
+ error_message = "请求超时"
219
+ log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
220
+ logger.warning(log_msg)
221
+ return error_message
222
+ else:
223
+ error_message = f"发生未知错误: {error}"
224
+ log_msg = format_log_message('ERROR', error_message, extra={'error_message': error_message})
225
+ logger.error(log_msg)
226
+ return error_message
227
+
228
+
229
+ async def test_api_key(api_key: str) -> bool:
230
+ """
231
+ 测试 API 密钥是否有效。
232
+ """
233
+ try:
234
+ url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(api_key)
235
+ async with httpx.AsyncClient() as client:
236
+ response = await client.get(url)
237
+ response.raise_for_status()
238
+ return True
239
+ except Exception:
240
+ return False
241
+
242
+
243
+ rate_limit_data = {}
244
+ rate_limit_lock = Lock()
245
+
246
+
247
+ def protect_from_abuse(request: Request, max_requests_per_minute: int = 30, max_requests_per_day_per_ip: int = 600):
248
+ now = int(time.time())
249
+ minute = now // 60
250
+ day = now // (60 * 60 * 24)
251
+
252
+ minute_key = f"{request.url.path}:{minute}"
253
+ day_key = f"{request.client.host}:{day}"
254
+
255
+ with rate_limit_lock:
256
+ minute_count, minute_timestamp = rate_limit_data.get(
257
+ minute_key, (0, now))
258
+ if now - minute_timestamp >= 60:
259
+ minute_count = 0
260
+ minute_timestamp = now
261
+ minute_count += 1
262
+ rate_limit_data[minute_key] = (minute_count, minute_timestamp)
263
+
264
+ day_count, day_timestamp = rate_limit_data.get(day_key, (0, now))
265
+ if now - day_timestamp >= 86400:
266
+ day_count = 0
267
+ day_timestamp = now
268
+ day_count += 1
269
+ rate_limit_data[day_key] = (day_count, day_timestamp)
270
+
271
+ if minute_count > max_requests_per_minute:
272
+ raise HTTPException(status_code=429, detail={
273
+ "message": "Too many requests per minute", "limit": max_requests_per_minute})
274
+ if day_count > max_requests_per_day_per_ip:
275
+ raise HTTPException(status_code=429, detail={"message": "Too many requests per day from this IP", "limit": max_requests_per_day_per_ip})
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ httpx
4
+ python-dotenv
5
+ requests
6
+ apscheduler
7
+ jinja2
version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ version=0.0.3