darling777 commited on
Commit
276bc7c
·
verified ·
1 Parent(s): 9fb1d46

Delete hajimi.v0.0.3

Browse files
hajimi.v0.0.3/Dockerfile DELETED
@@ -1,14 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- COPY ./app /app/app
6
- COPY requirements.txt .
7
- COPY version.txt .
8
-
9
- RUN pip install --no-cache-dir -r requirements.txt
10
-
11
- # 环境变量 (在 Hugging Face Spaces 中设置)
12
- # ENV GEMINI_API_KEYS=your_key_1,your_key_2,your_key_3
13
-
14
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/app/__init__.py DELETED
File without changes
hajimi.v0.0.3/app/gemini.py DELETED
@@ -1,346 +0,0 @@
1
- import requests
2
- import json
3
- import os
4
- import asyncio
5
- import time
6
- from app.models import ChatCompletionRequest, Message # 相对导入
7
- from dataclasses import dataclass
8
- from typing import Optional, Dict, Any, List
9
- import httpx
10
- import logging
11
- from app.utils import format_log_message
12
-
13
- logger = logging.getLogger('my_logger')
14
-
15
- # 是否启用假流式请求 默认启用
16
- FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
17
- # 假流式请求的空内容返回间隔(秒)
18
- FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
19
-
20
- @dataclass
21
- class GeneratedText:
22
- text: str
23
- finish_reason: Optional[str] = None
24
-
25
-
26
- class ResponseWrapper:
27
- def __init__(self, data: Dict[Any, Any]): # 正确的初始化方法名
28
- self._data = data
29
- self._text = self._extract_text()
30
- self._finish_reason = self._extract_finish_reason()
31
- self._prompt_token_count = self._extract_prompt_token_count()
32
- self._candidates_token_count = self._extract_candidates_token_count()
33
- self._total_token_count = self._extract_total_token_count()
34
- self._thoughts = self._extract_thoughts()
35
- self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
36
-
37
- def _extract_thoughts(self) -> Optional[str]:
38
- try:
39
- for part in self._data['candidates'][0]['content']['parts']:
40
- if 'thought' in part:
41
- return part['text']
42
- return ""
43
- except (KeyError, IndexError):
44
- return ""
45
-
46
- def _extract_text(self) -> str:
47
- try:
48
- for part in self._data['candidates'][0]['content']['parts']:
49
- if 'thought' not in part:
50
- return part['text']
51
- return ""
52
- except (KeyError, IndexError):
53
- return ""
54
-
55
- def _extract_finish_reason(self) -> Optional[str]:
56
- try:
57
- return self._data['candidates'][0].get('finishReason')
58
- except (KeyError, IndexError):
59
- return None
60
-
61
- def _extract_prompt_token_count(self) -> Optional[int]:
62
- try:
63
- return self._data['usageMetadata'].get('promptTokenCount')
64
- except (KeyError):
65
- return None
66
-
67
- def _extract_candidates_token_count(self) -> Optional[int]:
68
- try:
69
- return self._data['usageMetadata'].get('candidatesTokenCount')
70
- except (KeyError):
71
- return None
72
-
73
- def _extract_total_token_count(self) -> Optional[int]:
74
- try:
75
- return self._data['usageMetadata'].get('totalTokenCount')
76
- except (KeyError):
77
- return None
78
-
79
- @property
80
- def text(self) -> str:
81
- return self._text
82
-
83
- @property
84
- def finish_reason(self) -> Optional[str]:
85
- return self._finish_reason
86
-
87
- @property
88
- def prompt_token_count(self) -> Optional[int]:
89
- return self._prompt_token_count
90
-
91
- @property
92
- def candidates_token_count(self) -> Optional[int]:
93
- return self._candidates_token_count
94
-
95
- @property
96
- def total_token_count(self) -> Optional[int]:
97
- return self._total_token_count
98
-
99
- @property
100
- def thoughts(self) -> Optional[str]:
101
- return self._thoughts
102
-
103
- @property
104
- def json_dumps(self) -> str:
105
- return self._json_dumps
106
-
107
-
108
- class GeminiClient:
109
-
110
- AVAILABLE_MODELS = []
111
- EXTRA_MODELS = os.environ.get("EXTRA_MODELS", "").split(",")
112
-
113
- def __init__(self, api_key: str):
114
- self.api_key = api_key
115
-
116
- async def stream_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
117
- extra_log = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'N/A'}
118
- log_msg = format_log_message('INFO', "流式请求开始", extra=extra_log)
119
- logger.info(log_msg)
120
-
121
- # 检查是否启用假流式请求
122
- if FAKE_STREAMING:
123
- log_msg = format_log_message('INFO', "使用假流式请求模式(发送换行符保持连接)", extra=extra_log)
124
- logger.info(log_msg)
125
-
126
- try:
127
- # 这个方法不再直接使用self.api_key,而是由main.py提供API密钥列表和管理
128
- # 在这里,我们只负责持续发送换行符,直到main.py那边获取到响应
129
-
130
- # 持续发送换行符,直到外部取消此生成器
131
- start_time = time.time()
132
- while True:
133
- # 发送换行符作为保活消息
134
- yield "\n"
135
- # 等待一段时间
136
- await asyncio.sleep(FAKE_STREAMING_INTERVAL)
137
-
138
- # 如果等待时间过长(超过300秒),防止无限等待
139
- if time.time() - start_time > 300:
140
- log_msg = format_log_message('WARNING', "假流式请求等待时间过长,强制结束", extra=extra_log)
141
- logger.warning(log_msg)
142
- # 抛出超时异常,让外部处理
143
- error_msg = "假流式请求等待时间过长,所有API密钥均已尝试"
144
- extra_log_timeout = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'TIMEOUT', 'error_message': error_msg}
145
- log_msg = format_log_message('ERROR', error_msg, extra=extra_log_timeout)
146
- logger.error(log_msg)
147
- raise TimeoutError(error_msg)
148
-
149
- except Exception as e:
150
- if not isinstance(e, asyncio.CancelledError): # 忽略取消异常的日志记录
151
- error_msg = f"假流式处理期间发生错误: {str(e)}"
152
- extra_log_error = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
153
- log_msg = format_log_message('ERROR', error_msg, extra=extra_log_error)
154
- logger.error(log_msg)
155
- raise e
156
- finally:
157
- log_msg = format_log_message('INFO', "假流式请求结束", extra=extra_log)
158
- logger.info(log_msg)
159
- else:
160
- # 原始流式请求处理逻辑
161
- api_version = "v1alpha" if "think" in request.model else "v1beta"
162
- url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:streamGenerateContent?key={self.api_key}&alt=sse"
163
- headers = {
164
- "Content-Type": "application/json",
165
- }
166
- data = {
167
- "contents": contents,
168
- "generationConfig": {
169
- "temperature": request.temperature,
170
- "maxOutputTokens": request.max_tokens,
171
- },
172
- "safetySettings": safety_settings,
173
- }
174
- if system_instruction:
175
- data["system_instruction"] = system_instruction
176
-
177
- async with httpx.AsyncClient() as client:
178
- async with client.stream("POST", url, headers=headers, json=data, timeout=600) as response:
179
- buffer = b""
180
- try:
181
- async for line in response.aiter_lines():
182
- if not line.strip():
183
- continue
184
- if line.startswith("data: "):
185
- line = line[len("data: "):]
186
- buffer += line.encode('utf-8')
187
- try:
188
- data = json.loads(buffer.decode('utf-8'))
189
- buffer = b""
190
- if 'candidates' in data and data['candidates']:
191
- candidate = data['candidates'][0]
192
- if 'content' in candidate:
193
- content = candidate['content']
194
- if 'parts' in content and content['parts']:
195
- parts = content['parts']
196
- text = ""
197
- for part in parts:
198
- if 'text' in part:
199
- text += part['text']
200
- if text:
201
- yield text
202
-
203
- if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
204
- error_msg = f"模型的响应被截断: {candidate.get('finishReason')}"
205
- extra_log_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
206
- log_msg = format_log_message('WARNING', error_msg, extra=extra_log_error)
207
- logger.warning(log_msg)
208
- raise ValueError(error_msg)
209
-
210
- if 'safetyRatings' in candidate:
211
- for rating in candidate['safetyRatings']:
212
- if rating['probability'] == 'HIGH':
213
- error_msg = f"模型的响应被截断: {rating['category']}"
214
- extra_log_safety = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
215
- log_msg = format_log_message('WARNING', error_msg, extra=extra_log_safety)
216
- logger.warning(log_msg)
217
- raise ValueError(error_msg)
218
- except json.JSONDecodeError:
219
- continue
220
- except Exception as e:
221
- error_msg = f"流式处理期间发生错误: {str(e)}"
222
- extra_log_stream_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
223
- log_msg = format_log_message('ERROR', error_msg, extra=extra_log_stream_error)
224
- logger.error(log_msg)
225
- raise e
226
- except Exception as e:
227
- raise e
228
- finally:
229
- log_msg = format_log_message('INFO', "流式请求结束", extra=extra_log)
230
- logger.info(log_msg)
231
-
232
- def complete_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
233
- extra_log = {'key': self.api_key[:8], 'request_type': 'non-stream', 'model': request.model, 'status_code': 'N/A'}
234
- log_msg = format_log_message('INFO', "非流式请求开始", extra=extra_log)
235
- logger.info(log_msg)
236
-
237
- api_version = "v1alpha" if "think" in request.model else "v1beta"
238
- url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:generateContent?key={self.api_key}"
239
- headers = {
240
- "Content-Type": "application/json",
241
- }
242
- data = {
243
- "contents": contents,
244
- "generationConfig": {
245
- "temperature": request.temperature,
246
- "maxOutputTokens": request.max_tokens,
247
- },
248
- "safetySettings": safety_settings,
249
- }
250
- if system_instruction:
251
- data["system_instruction"] = system_instruction
252
-
253
- try:
254
- response = requests.post(url, headers=headers, json=data)
255
- response.raise_for_status()
256
-
257
- log_msg = format_log_message('INFO', "非流式请求成功完成", extra=extra_log)
258
- logger.info(log_msg)
259
-
260
- return ResponseWrapper(response.json())
261
- except Exception as e:
262
- raise
263
-
264
- def convert_messages(self, messages, use_system_prompt=False):
265
- gemini_history = []
266
- errors = []
267
- system_instruction_text = ""
268
- is_system_phase = use_system_prompt
269
- for i, message in enumerate(messages):
270
- role = message.role
271
- content = message.content
272
-
273
- if isinstance(content, str):
274
- if is_system_phase and role == 'system':
275
- if system_instruction_text:
276
- system_instruction_text += "\n" + content
277
- else:
278
- system_instruction_text = content
279
- else:
280
- is_system_phase = False
281
-
282
- if role in ['user', 'system']:
283
- role_to_use = 'user'
284
- elif role == 'assistant':
285
- role_to_use = 'model'
286
- else:
287
- errors.append(f"Invalid role: {role}")
288
- continue
289
-
290
- if gemini_history and gemini_history[-1]['role'] == role_to_use:
291
- gemini_history[-1]['parts'].append({"text": content})
292
- else:
293
- gemini_history.append(
294
- {"role": role_to_use, "parts": [{"text": content}]})
295
- elif isinstance(content, list):
296
- parts = []
297
- for item in content:
298
- if item.get('type') == 'text':
299
- parts.append({"text": item.get('text')})
300
- elif item.get('type') == 'image_url':
301
- image_data = item.get('image_url', {}).get('url', '')
302
- if image_data.startswith('data:image/'):
303
- try:
304
- mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
305
- parts.append({
306
- "inline_data": {
307
- "mime_type": mime_type,
308
- "data": base64_data
309
- }
310
- })
311
- except (IndexError, ValueError):
312
- errors.append(
313
- f"Invalid data URI for image: {image_data}")
314
- else:
315
- errors.append(
316
- f"Invalid image URL format for item: {item}")
317
-
318
- if parts:
319
- if role in ['user', 'system']:
320
- role_to_use = 'user'
321
- elif role == 'assistant':
322
- role_to_use = 'model'
323
- else:
324
- errors.append(f"Invalid role: {role}")
325
- continue
326
- if gemini_history and gemini_history[-1]['role'] == role_to_use:
327
- gemini_history[-1]['parts'].extend(parts)
328
- else:
329
- gemini_history.append(
330
- {"role": role_to_use, "parts": parts})
331
- if errors:
332
- return errors
333
- else:
334
- return gemini_history, {"parts": [{"text": system_instruction_text}]}
335
-
336
- @staticmethod
337
- async def list_available_models(api_key) -> list:
338
- url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(
339
- api_key)
340
- async with httpx.AsyncClient() as client:
341
- response = await client.get(url)
342
- response.raise_for_status()
343
- data = response.json()
344
- models = [model["name"] for model in data.get("models", [])]
345
- models.extend(GeminiClient.EXTRA_MODELS)
346
- return models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/app/index.html DELETED
@@ -1,306 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Gemini API 代理服务</title>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <style>
8
- body {
9
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
10
- max-width: 1200px;
11
- margin: 0 auto;
12
- padding: 20px;
13
- line-height: 1.6;
14
- background-color: #f8f9fa;
15
- }
16
- h1, h2, h3 {
17
- color: #333;
18
- text-align: center;
19
- margin-bottom: 20px;
20
- }
21
- .info-box {
22
- background-color: #fff;
23
- border: 1px solid #dee2e6;
24
- border-radius: 8px;
25
- padding: 20px;
26
- margin-bottom: 20px;
27
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
28
- }
29
- .status {
30
- color: #28a745;
31
- font-weight: bold;
32
- font-size: 18px;
33
- margin-bottom: 20px;
34
- text-align: center;
35
- }
36
- .stats-grid {
37
- display: grid;
38
- grid-template-columns: repeat(3, 1fr);
39
- gap: 15px;
40
- margin-top: 15px;
41
- margin-bottom: 20px;
42
- }
43
- .stat-card {
44
- background-color: #e9ecef;
45
- padding: 15px;
46
- border-radius: 8px;
47
- text-align: center;
48
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
49
- transition: transform 0.2s;
50
- }
51
- .stat-card:hover {
52
- transform: translateY(-2px);
53
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
54
- }
55
- .stat-value {
56
- font-size: 24px;
57
- font-weight: bold;
58
- color: #007bff;
59
- }
60
- .stat-label {
61
- font-size: 14px;
62
- color: #6c757d;
63
- margin-top: 5px;
64
- }
65
- .section-title {
66
- color: #495057;
67
- border-bottom: 1px solid #dee2e6;
68
- padding-bottom: 10px;
69
- margin-bottom: 20px;
70
- }
71
- .log-container {
72
- background-color: #f5f5f5;
73
- border: 1px solid #ddd;
74
- border-radius: 8px;
75
- padding: 15px;
76
- margin-top: 20px;
77
- max-height: 500px;
78
- overflow-y: auto;
79
- font-family: monospace;
80
- font-size: 14px;
81
- line-height: 1.5;
82
- }
83
- .log-entry {
84
- margin-bottom: 8px;
85
- padding: 8px;
86
- border-radius: 4px;
87
- }
88
- .log-entry.INFO {
89
- background-color: #e8f4f8;
90
- border-left: 4px solid #17a2b8;
91
- }
92
- .log-entry.WARNING {
93
- background-color: #fff3cd;
94
- border-left: 4px solid #ffc107;
95
- }
96
- .log-entry.ERROR {
97
- background-color: #f8d7da;
98
- border-left: 4px solid #dc3545;
99
- }
100
- .log-entry.DEBUG {
101
- background-color: #d1ecf1;
102
- border-left: 4px solid #17a2b8;
103
- }
104
- .log-timestamp {
105
- color: #6c757d;
106
- font-size: 12px;
107
- margin-right: 10px;
108
- }
109
- .log-level {
110
- font-weight: bold;
111
- margin-right: 10px;
112
- }
113
- .log-level.INFO {
114
- color: #17a2b8;
115
- }
116
- .log-level.WARNING {
117
- color: #ffc107;
118
- }
119
- .log-level.ERROR {
120
- color: #dc3545;
121
- }
122
- .log-level.DEBUG {
123
- color: #17a2b8;
124
- }
125
- .log-message {
126
- color: #212529;
127
- }
128
- .refresh-button {
129
- display: block;
130
- margin: 20px auto;
131
- padding: 10px 20px;
132
- background-color: #007bff;
133
- color: white;
134
- border: none;
135
- border-radius: 4px;
136
- font-size: 16px;
137
- cursor: pointer;
138
- transition: background-color 0.2s;
139
- }
140
- .refresh-button:hover {
141
- background-color: #0069d9;
142
- }
143
- .log-filter {
144
- display: flex;
145
- justify-content: center;
146
- margin-bottom: 15px;
147
- gap: 10px;
148
- }
149
- .log-filter button {
150
- padding: 5px 10px;
151
- border: 1px solid #ddd;
152
- border-radius: 4px;
153
- background-color: #f8f9fa;
154
- cursor: pointer;
155
- }
156
- .log-filter button.active {
157
- background-color: #007bff;
158
- color: white;
159
- border-color: #007bff;
160
- }
161
- </style>
162
- </head>
163
- <body>
164
- <h1>🤖 Gemini API 代理服务</h1>
165
-
166
- <div class="info-box">
167
- <h2 class="section-title">🟢 运行状态</h2>
168
- <p class="status">服务运行中</p>
169
-
170
- <div class="stats-grid">
171
- <div class="stat-card">
172
- <div class="stat-value">{{ key_count }}</div>
173
- <div class="stat-label">可用API密钥数量</div>
174
- </div>
175
- <div class="stat-card">
176
- <div class="stat-value">{{ model_count }}</div>
177
- <div class="stat-label">可用模型数量</div>
178
- </div>
179
- <div class="stat-card">
180
- <div class="stat-value">{{ retry_count }}</div>
181
- <div class="stat-label">最大重试次数</div>
182
- </div>
183
- </div>
184
-
185
- <h3 class="section-title">API调用统计</h3>
186
- <div class="stats-grid">
187
- <div class="stat-card">
188
- <div class="stat-value">{{ last_24h_calls }}</div>
189
- <div class="stat-label">24小时内调用次数</div>
190
- </div>
191
- <div class="stat-card">
192
- <div class="stat-value">{{ hourly_calls }}</div>
193
- <div class="stat-label">一小时内调用次数</div>
194
- </div>
195
- <div class="stat-card">
196
- <div class="stat-value">{{ minute_calls }}</div>
197
- <div class="stat-label">一分钟内调用次数</div>
198
- </div>
199
- </div>
200
- </div>
201
-
202
- <div class="info-box">
203
- <h2 class="section-title">⚙️ 环境配置</h2>
204
- <div class="stats-grid">
205
- <div class="stat-card">
206
- <div class="stat-value">{{ max_requests_per_minute }}</div>
207
- <div class="stat-label">每分钟请求限制</div>
208
- </div>
209
- <div class="stat-card">
210
- <div class="stat-value">{{ max_requests_per_day_per_ip }}</div>
211
- <div class="stat-label">每IP每日请求限制</div>
212
- </div>
213
- <div class="stat-card">
214
- <div class="stat-value">{{ current_time }}</div>
215
- <div class="stat-label">当前服务器时间</div>
216
- </div>
217
- </div>
218
- </div>
219
-
220
- <div class="info-box">
221
- <h2 class="section-title">📦 版本信息</h2>
222
- <div class="version-info" style="text-align: center; margin-bottom: 15px;">
223
- <div style="font-size: 18px; margin-bottom: 10px;">
224
- 当前版本: <span style="font-weight: bold; color: #007bff;">{{ local_version }}</span>
225
- </div>
226
- {% if has_update %}
227
- <div style="display: flex; align-items: center; justify-content: center; margin-top: 15px;">
228
- <div style="background-color: #fef6e0; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px 15px; display: inline-flex; align-items: center;">
229
- <span style="color: #ff9800; margin-right: 10px;">
230
- <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
231
- <circle cx="12" cy="12" r="10"></circle>
232
- <line x1="12" y1="8" x2="12" y2="12"></line>
233
- <line x1="12" y1="16" x2="12.01" y2="16"></line>
234
- </svg>
235
- </span>
236
- <span>
237
- <strong>发现新版本!</strong> 最新版本: <span style="font-weight: bold; color: #28a745;">{{ remote_version }}</span>
238
- </span>
239
- </div>
240
- </div>
241
- {% endif %}
242
- </div>
243
- </div>
244
-
245
- <div class="info-box">
246
- <h2 class="section-title"> 系统日志</h2>
247
- <div class="log-filter">
248
- <button class="active" data-level="ALL">全部</button>
249
- <button data-level="INFO">信息</button>
250
- <button data-level="WARNING">警告</button>
251
- <button data-level="ERROR">错误</button>
252
- </div>
253
- <div class="log-container">
254
- {% for log in logs %}
255
- <div class="log-entry {{ log.level }}" data-level="{{ log.level }}">
256
- <span class="log-timestamp">{{ log.timestamp }}</span>
257
- <span class="log-level {{ log.level }}">{{ log.level }}</span>
258
- <span class="log-message">
259
- {% if log.key != 'N/A' %}[{{ log.key }}]{% endif %}
260
- {% if log.request_type != 'N/A' %}{{ log.request_type }}{% endif %}
261
- {% if log.model != 'N/A' %}[{{ log.model }}]{% endif %}
262
- {% if log.status_code != 'N/A' %}{{ log.status_code }}{% endif %}
263
- : {{ log.message }}
264
- {% if log.error_message %}
265
- - {{ log.error_message }}
266
- {% endif %}
267
- </span>
268
- </div>
269
- {% endfor %}
270
- </div>
271
- <button class="refresh-button" onclick="window.location.reload()">刷新日志</button>
272
- </div>
273
-
274
- <script>
275
- // 日志过滤功能
276
- document.querySelectorAll('.log-filter button').forEach(button => {
277
- button.addEventListener('click', function() {
278
- // 移除所有按钮的active类
279
- document.querySelectorAll('.log-filter button').forEach(btn => {
280
- btn.classList.remove('active');
281
- });
282
-
283
- // 为当前按钮添加active类
284
- this.classList.add('active');
285
-
286
- const level = this.getAttribute('data-level');
287
-
288
- // 显示或隐藏日志条目
289
- document.querySelectorAll('.log-entry').forEach(entry => {
290
- if (level === 'ALL' || entry.getAttribute('data-level') === level) {
291
- entry.style.display = 'block';
292
- } else {
293
- entry.style.display = 'none';
294
- }
295
- });
296
- });
297
- });
298
-
299
- // 页面加载时自动滚动到日志底部
300
- window.onload = function() {
301
- const logContainer = document.querySelector('.log-container');
302
- logContainer.scrollTop = logContainer.scrollHeight;
303
- };
304
- </script>
305
- </body>
306
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/app/main.py DELETED
@@ -1,1506 +0,0 @@
1
- from fastapi import FastAPI, HTTPException, Request, Depends, status
2
- from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
3
- from fastapi.staticfiles import StaticFiles
4
- from fastapi.templating import Jinja2Templates
5
- from .models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
6
- from .gemini import GeminiClient, ResponseWrapper
7
- from .utils import handle_gemini_error, protect_from_abuse, APIKeyManager, test_api_key, format_log_message, log_manager
8
- import os
9
- import json
10
- import asyncio
11
- from typing import Literal, Dict, Any, Optional
12
- import random
13
- import requests
14
- from datetime import datetime, timedelta
15
- from apscheduler.schedulers.background import BackgroundScheduler
16
- import sys
17
- import logging
18
- from collections import defaultdict
19
- import pathlib
20
- import hashlib
21
- import time
22
- FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
23
- # 假流式请求的空内容返回间隔(秒)
24
- FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
25
- logging.getLogger("uvicorn").disabled = True
26
- logging.getLogger("uvicorn.access").disabled = True
27
-
28
- # 配置 logger
29
- logger = logging.getLogger("my_logger")
30
- logger.setLevel(logging.DEBUG)
31
-
32
- # 设置模板目录
33
- BASE_DIR = pathlib.Path(__file__).parent
34
- templates = Jinja2Templates(directory=str(BASE_DIR))
35
-
36
- app = FastAPI()
37
-
38
- # --------------- 缓存管理类 ---------------
39
-
40
- class ResponseCacheManager:
41
- """管理API响应缓存的类"""
42
-
43
- def __init__(self, expiry_time: int, max_entries: int, remove_after_use: bool = True,
44
- cache_dict: Dict[str, Dict[str, Any]] = None):
45
- self.cache = cache_dict if cache_dict is not None else {} # 使用传入的缓存字典或创建新字典
46
- self.expiry_time = expiry_time
47
- self.max_entries = max_entries
48
- self.remove_after_use = remove_after_use
49
-
50
- def get(self, cache_key: str):
51
- """获取缓存项,如果存在且未过期"""
52
- now = time.time()
53
- if cache_key in self.cache and now < self.cache[cache_key].get('expiry_time', 0):
54
- cached_item = self.cache[cache_key]
55
-
56
- # 获取响应但先不删除
57
- response = cached_item['response']
58
-
59
- # 返回响应
60
- return response, True
61
-
62
- return None, False
63
-
64
- def store(self, cache_key: str, response, client_ip: str = None):
65
- """存储响应到缓存"""
66
- now = time.time()
67
- self.cache[cache_key] = {
68
- 'response': response,
69
- 'expiry_time': now + self.expiry_time,
70
- 'created_at': now,
71
- 'client_ip': client_ip
72
- }
73
-
74
- log('info', f"响应已缓存: {cache_key[:8]}...",
75
- extra={'cache_operation': 'store', 'request_type': 'non-stream'})
76
-
77
- # 如果缓存超过限制,清理最旧的
78
- self.clean_if_needed()
79
-
80
- def clean_expired(self):
81
- """清理所有过期的缓存项"""
82
- now = time.time()
83
- expired_keys = [k for k, v in self.cache.items() if now > v.get('expiry_time', 0)]
84
-
85
- for key in expired_keys:
86
- del self.cache[key]
87
- log('info', f"清理过期缓存: {key[:8]}...", extra={'cache_operation': 'clean'})
88
-
89
- def clean_if_needed(self):
90
- """如果缓存数量超过限制,清理最旧的项目"""
91
- if len(self.cache) <= self.max_entries:
92
- return
93
-
94
- # 按创建时间排序
95
- sorted_keys = sorted(self.cache.keys(),
96
- key=lambda k: self.cache[k].get('created_at', 0))
97
-
98
- # 计算需要删除的数量
99
- to_remove = len(self.cache) - self.max_entries
100
-
101
- # 删除最旧的项
102
- for key in sorted_keys[:to_remove]:
103
- del self.cache[key]
104
- log('info', f"缓存容量限制,删除旧缓存: {key[:8]}...", extra={'cache_operation': 'limit'})
105
-
106
- # --------------- 活跃请求管理类 ---------------
107
-
108
- class ActiveRequestsManager:
109
- """管理活跃API请求的类"""
110
-
111
- def __init__(self, requests_pool: Dict[str, asyncio.Task] = None):
112
- self.active_requests = requests_pool if requests_pool is not None else {} # 存储活跃请求
113
-
114
- def add(self, key: str, task: asyncio.Task):
115
- """添加新的活跃请求任务"""
116
- task.creation_time = time.time() # 添加创建时间属性
117
- self.active_requests[key] = task
118
-
119
- def get(self, key: str):
120
- """获取活跃请求任务"""
121
- return self.active_requests.get(key)
122
-
123
- def remove(self, key: str):
124
- """移除活跃请求任务"""
125
- if key in self.active_requests:
126
- del self.active_requests[key]
127
- return True
128
- return False
129
-
130
- def remove_by_prefix(self, prefix: str):
131
- """移除所有以特定前缀开头的活跃请求任务"""
132
- keys_to_remove = [k for k in self.active_requests.keys() if k.startswith(prefix)]
133
- for key in keys_to_remove:
134
- self.remove(key)
135
- return len(keys_to_remove)
136
-
137
- def clean_completed(self):
138
- """清理所有已完成或已取消的任务"""
139
- keys_to_remove = []
140
-
141
- for key, task in self.active_requests.items():
142
- if task.done() or task.cancelled():
143
- keys_to_remove.append(key)
144
-
145
- for key in keys_to_remove:
146
- self.remove(key)
147
-
148
- # if keys_to_remove:
149
- # log('info', f"清理已完成请求任务: {len(keys_to_remove)}个", cleanup='active_requests')
150
-
151
- def clean_long_running(self, max_age_seconds: int = 300):
152
- """清理长时间运行的任务"""
153
- now = time.time()
154
- long_running_keys = []
155
-
156
- for key, task in list(self.active_requests.items()):
157
- if (hasattr(task, 'creation_time') and
158
- task.creation_time < now - max_age_seconds and
159
- not task.done() and not task.cancelled()):
160
-
161
- long_running_keys.append(key)
162
- task.cancel() # 取消长时间运行的任务
163
-
164
- if long_running_keys:
165
- log('warning', f"取消长时间运行的任务: {len(long_running_keys)}个", cleanup='long_running_tasks')
166
-
167
-
168
-
169
- # --------------- 全局实例 ---------------
170
-
171
- PASSWORD = os.environ.get("PASSWORD", "123").strip('"')
172
- MAX_REQUESTS_PER_MINUTE = int(os.environ.get("MAX_REQUESTS_PER_MINUTE", "30"))
173
- MAX_REQUESTS_PER_DAY_PER_IP = int(
174
- os.environ.get("MAX_REQUESTS_PER_DAY_PER_IP", "600"))
175
- # MAX_RETRIES = int(os.environ.get('MaxRetries', '3').strip() or '3')
176
- RETRY_DELAY = 1
177
- MAX_RETRY_DELAY = 16
178
- MAX_RETRY_DELAY = 16
179
- safety_settings = [
180
- {
181
- "category": "HARM_CATEGORY_HARASSMENT",
182
- "threshold": "BLOCK_NONE"
183
- },
184
- {
185
- "category": "HARM_CATEGORY_HATE_SPEECH",
186
- "threshold": "BLOCK_NONE"
187
- },
188
- {
189
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
190
- "threshold": "BLOCK_NONE"
191
- },
192
- {
193
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
194
- "threshold": "BLOCK_NONE"
195
- },
196
- {
197
- "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
198
- "threshold": 'BLOCK_NONE'
199
- }
200
- ]
201
- safety_settings_g2 = [
202
- {
203
- "category": "HARM_CATEGORY_HARASSMENT",
204
- "threshold": "OFF"
205
- },
206
- {
207
- "category": "HARM_CATEGORY_HATE_SPEECH",
208
- "threshold": "OFF"
209
- },
210
- {
211
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
212
- "threshold": "OFF"
213
- },
214
- {
215
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
216
- "threshold": "OFF"
217
- },
218
- {
219
- "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
220
- "threshold": 'OFF'
221
- }
222
- ]
223
-
224
- key_manager = APIKeyManager() # 实例化 APIKeyManager,栈会在 __init__ 中初始化
225
- current_api_key = key_manager.get_available_key()
226
-
227
- # 初始化缓存管理器
228
- CACHE_EXPIRY_TIME = int(os.environ.get("CACHE_EXPIRY_TIME", "1200")) # 默认20分钟
229
- MAX_CACHE_ENTRIES = int(os.environ.get("MAX_CACHE_ENTRIES", "500")) # 默认最多缓存500条响应
230
- REMOVE_CACHE_AFTER_USE = os.environ.get("REMOVE_CACHE_AFTER_USE", "true").lower() in ["true", "1", "yes"]
231
-
232
- # 创建全局缓存字典,将作为缓存管理器的内部存储
233
- # 注意:所有缓存操作都应通过 response_cache_manager 进行,不要直接操作此字典
234
- response_cache: Dict[str, Dict[str, Any]] = {}
235
-
236
- # 初始化缓存管理器,使用全局字典作为存储
237
- response_cache_manager = ResponseCacheManager(
238
- expiry_time=CACHE_EXPIRY_TIME,
239
- max_entries=MAX_CACHE_ENTRIES,
240
- remove_after_use=REMOVE_CACHE_AFTER_USE,
241
- cache_dict=response_cache # 使用同一个字典实例,确保统一
242
- )
243
-
244
- # 活跃请求池 - 将作为活跃请求管理器的内部存储
245
- # 注意:所有活跃请求操作都应通过 active_requests_manager 进行,不要直接操作此字典
246
- active_requests_pool: Dict[str, asyncio.Task] = {}
247
-
248
- # 初始化活跃请求管理器
249
- active_requests_manager = ActiveRequestsManager(requests_pool=active_requests_pool)
250
-
251
- # 添加API调用计数器
252
- api_call_stats = {
253
- 'last_24h': defaultdict(int), # 按小时统计过去24小时
254
- 'hourly': defaultdict(int), # 按小时统计过去一小时
255
- 'minute': defaultdict(int), # 按分钟统计过去一分钟
256
- }
257
-
258
- # 清理过期统计数据的函数
259
- def clean_expired_stats():
260
- now = datetime.now()
261
-
262
- # 清理24小时前的数据
263
- for hour_key in list(api_call_stats['last_24h'].keys()):
264
- try:
265
- hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
266
- if (now - hour_time).total_seconds() > 24 * 3600: # 超过24小时
267
- del api_call_stats['last_24h'][hour_key]
268
- except ValueError:
269
- # 如果键格式不正确,直接删除
270
- del api_call_stats['last_24h'][hour_key]
271
-
272
- # 清理一小时前的小时统计数据
273
- one_hour_ago = now - timedelta(hours=1)
274
- for hour_key in list(api_call_stats['hourly'].keys()):
275
- try:
276
- hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
277
- if hour_time < one_hour_ago:
278
- del api_call_stats['hourly'][hour_key]
279
- except ValueError:
280
- # 如果键格式不正确,直接删除
281
- del api_call_stats['hourly'][hour_key]
282
-
283
- # 清理一分钟前的分钟统计数据
284
- one_minute_ago = now - timedelta(minutes=1)
285
- for minute_key in list(api_call_stats['minute'].keys()):
286
- try:
287
- minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
288
- if minute_time < one_minute_ago:
289
- del api_call_stats['minute'][minute_key]
290
- except ValueError:
291
- # 如果键格式不正确,直接删除
292
- del api_call_stats['minute'][minute_key]
293
-
294
- # 更新API调用统计的函数
295
- def update_api_call_stats():
296
- now = datetime.now()
297
- hour_key = now.strftime('%Y-%m-%d %H:00')
298
- minute_key = now.strftime('%Y-%m-%d %H:%M')
299
-
300
- # 检查并清理过期统计
301
- clean_expired_stats()
302
-
303
- # 更新统计
304
- api_call_stats['last_24h'][hour_key] += 1
305
- api_call_stats['hourly'][hour_key] += 1
306
- api_call_stats['minute'][minute_key] += 1
307
-
308
- log('info', "API调用统计已更新: 24小时=%s, 1小时=%s, 1分钟=%s" % (sum(api_call_stats['last_24h'].values()), sum(api_call_stats['hourly'].values()), sum(api_call_stats['minute'].values())))
309
-
310
-
311
- def switch_api_key():
312
- global current_api_key
313
- key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
314
- if key:
315
- current_api_key = key
316
- log('info', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
317
- else:
318
- log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
319
-
320
-
321
- async def check_keys():
322
- available_keys = []
323
- for key in key_manager.api_keys:
324
- is_valid = await test_api_key(key)
325
- status_msg = "有效" if is_valid else "无效"
326
- log('info', f"API Key {key[:10]}... {status_msg}.")
327
- if is_valid:
328
- available_keys.append(key)
329
- if not available_keys:
330
- log('error', "没有可用的 API 密钥!", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
331
- return available_keys
332
-
333
-
334
- # 存储版本信息的全局变量
335
- local_version = "0.0.0"
336
- remote_version = "0.0.0"
337
- has_update = False
338
-
339
- # 检查版本更新
340
- async def check_version():
341
- global local_version, remote_version, has_update
342
- try:
343
- # 读取本地版本
344
- with open("version.txt", "r") as f:
345
- version_line = f.read().strip()
346
- local_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
347
-
348
- # 获取远程版本
349
- github_url = "https://raw.githubusercontent.com/wyeeeee/hajimi/refs/heads/main/version.txt"
350
- response = requests.get(github_url, timeout=5)
351
- if response.status_code == 200:
352
- version_line = response.text.strip()
353
- remote_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
354
-
355
- # 比较版本号
356
- local_parts = [int(x) for x in local_version.split(".")]
357
- remote_parts = [int(x) for x in remote_version.split(".")]
358
-
359
- # 确保两个列表长度相同
360
- while len(local_parts) < len(remote_parts):
361
- local_parts.append(0)
362
- while len(remote_parts) < len(local_parts):
363
- remote_parts.append(0)
364
-
365
- # 比较版本号
366
- for i in range(len(local_parts)):
367
- if remote_parts[i] > local_parts[i]:
368
- has_update = True
369
- break
370
- elif remote_parts[i] < local_parts[i]:
371
- break
372
-
373
- log('info', f"版本检查: 本地版本 {local_version}, 远程版本 {remote_version}, 有更新: {has_update}")
374
- else:
375
- log('warning', f"无法获取远程版本信息,HTTP状态码: {response.status_code}")
376
- except Exception as e:
377
- log('error', f"版本检查失败: {str(e)}")
378
-
379
-
380
- # --------------- 工具函数 ---------------
381
-
382
- def log(level: str, message: str, **extra):
383
- """简化日志记录的统一函数"""
384
- msg = format_log_message(level.upper(), message, extra=extra)
385
- getattr(logger, level.lower())(msg)
386
-
387
- def translate_error(message: str) -> str:
388
- if "quota exceeded" in message.lower():
389
- return "API 密钥配额已用尽"
390
- if "invalid argument" in message.lower():
391
- return "无效参数"
392
- if "internal server error" in message.lower():
393
- return "服务器内部错误"
394
- if "service unavailable" in message.lower():
395
- return "服务不可用"
396
- return message
397
-
398
- def create_chat_response(model: str, choices: list, id: str = None) -> ChatCompletionResponse:
399
- """创建标准响应对象的工厂函数"""
400
- return ChatCompletionResponse(
401
- id=id or f"chatcmpl-{int(time.time()*1000)}",
402
- object="chat.completion",
403
- created=int(time.time()),
404
- model=model,
405
- choices=choices
406
- )
407
-
408
- def create_error_response(model: str, error_message: str) -> ChatCompletionResponse:
409
- """创建错误响应对象的工厂函数"""
410
- return create_chat_response(
411
- model=model,
412
- choices=[{
413
- "index": 0,
414
- "message": {
415
- "role": "assistant",
416
- "content": error_message
417
- },
418
- "finish_reason": "error"
419
- }]
420
- )
421
-
422
- def handle_exception(exc_type, exc_value, exc_traceback):
423
- if issubclass(exc_type, KeyboardInterrupt):
424
- sys.excepthook(exc_type, exc_value, exc_traceback)
425
- return
426
- error_message = translate_error(str(exc_value))
427
- log('error', f"未捕获的异常: {error_message}", status_code=500, error_message=error_message)
428
-
429
- sys.excepthook = handle_exception
430
-
431
- @app.on_event("startup")
432
- async def startup_event():
433
- log('info', "Starting Gemini API proxy...")
434
-
435
- # 启动缓存清理定时任务
436
- schedule_cache_cleanup()
437
-
438
- # 检查版本
439
- await check_version()
440
-
441
- available_keys = await check_keys()
442
- if available_keys:
443
- key_manager.api_keys = available_keys
444
- key_manager._reset_key_stack() # 启动时也确保创建随机栈
445
- key_manager.show_all_keys()
446
- log('info', f"可用 API 密钥数量:{len(key_manager.api_keys)}")
447
- # MAX_RETRIES = len(key_manager.api_keys)
448
- log('info', f"最大重试次数设置为:{len(key_manager.api_keys)}") # 添加日志
449
- if key_manager.api_keys:
450
- all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
451
- GeminiClient.AVAILABLE_MODELS = [model.replace(
452
- "models/", "") for model in all_models]
453
- log('info', "Available models loaded.")
454
-
455
- @app.get("/v1/models", response_model=ModelList)
456
- def list_models():
457
- log('info', "Received request to list models", extra={'request_type': 'list_models', 'status_code': 200})
458
- return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in GeminiClient.AVAILABLE_MODELS])
459
-
460
-
461
- async def verify_password(request: Request):
462
- if PASSWORD:
463
- auth_header = request.headers.get("Authorization")
464
- if not auth_header or not auth_header.startswith("Bearer "):
465
- raise HTTPException(
466
- status_code=401, detail="Unauthorized: Missing or invalid token")
467
- token = auth_header.split(" ")[1]
468
- if token != PASSWORD:
469
- raise HTTPException(
470
- status_code=401, detail="Unauthorized: Invalid token")
471
-
472
-
473
- @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
474
- async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(verify_password)):
475
- # 获取客户端IP
476
- client_ip = http_request.client.host if http_request.client else "unknown"
477
-
478
- # 流式请求直接处理,不使用缓存
479
- if request.stream:
480
- return await process_request(request, http_request, "stream")
481
-
482
- # 生成完整缓存键 - 用于精确匹配
483
- cache_key = generate_cache_key(request)
484
-
485
- # 记录请求缓存键信息
486
- log('info', f"请求缓存键: {cache_key[:8]}...",
487
- extra={'cache_key': cache_key[:8], 'request_type': 'non-stream'})
488
-
489
- # 检查精确缓存是否存在且未过期
490
- cached_response, cache_hit = response_cache_manager.get(cache_key)
491
- if cache_hit:
492
- # 精确缓存命中
493
- log('info', f"精确缓存命中: {cache_key[:8]}...",
494
- extra={'cache_operation': 'hit', 'request_type': 'non-stream'})
495
-
496
- # 同时清理相关的活跃任务,避免后续请求等待已经不需要的任务
497
- active_requests_manager.remove_by_prefix(f"cache:{cache_key}")
498
-
499
- # 安全删除缓存
500
- if cache_key in response_cache_manager.cache:
501
- del response_cache_manager.cache[cache_key]
502
- log('info', f"缓存使用后已删除: {cache_key[:8]}...",
503
- extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
504
-
505
- # 返回缓存响应
506
- return cached_response
507
-
508
- # 构建包含缓存键的活跃请求池键
509
- pool_key = f"cache:{cache_key}"
510
-
511
- # 查找所有使用相同缓存键的活跃任务
512
- active_task = active_requests_manager.get(pool_key)
513
- if active_task and not active_task.done():
514
- log('info', f"发现相同请求的进行中任务",
515
- extra={'request_type': 'non-stream', 'model': request.model})
516
-
517
- # 等待已有任务完成
518
- try:
519
- # 设置超时,避免无限等待
520
- await asyncio.wait_for(active_task, timeout=180)
521
-
522
- # 通过缓存管理器获取已完成任务的结果
523
- cached_response, cache_hit = response_cache_manager.get(cache_key)
524
- if cache_hit:
525
- # 安全删除缓存
526
- if cache_key in response_cache_manager.cache:
527
- del response_cache_manager.cache[cache_key]
528
- log('info', f"使用已完成任务的缓存后删除: {cache_key[:8]}...",
529
- extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
530
-
531
- return cached_response
532
-
533
- # 如果缓存已被清除或不存在,使用任务结果
534
- if active_task.done() and not active_task.cancelled():
535
- result = active_task.result()
536
- if result:
537
- # log('info', f"使用已完成任务的原始结果",
538
- # extra={'request_type': 'non-stream', 'model': request.model})
539
-
540
- # 使用原始结果时,我们需要创建一个新的响应对象
541
- # 避免使用可能已被其他请求修改的对象
542
- new_response = ChatCompletionResponse(
543
- id=f"chatcmpl-{int(time.time()*1000)}",
544
- object="chat.completion",
545
- created=int(time.time()),
546
- model=result.model,
547
- choices=result.choices
548
- )
549
-
550
- # 不要缓存此结果,因为它很可能是一个已存在但被使用后清除的缓存
551
- return new_response
552
- except (asyncio.TimeoutError, asyncio.CancelledError) as e:
553
- # 任务超时或被取消的情况下,记录日志然后让代码继续执行
554
- error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消"
555
- log('warning', f"等待已有任务{error_type}: {pool_key}",
556
- extra={'request_type': 'non-stream', 'model': request.model})
557
-
558
- # 从活跃请求池移除该任务
559
- if active_task.done() or active_task.cancelled():
560
- active_requests_manager.remove(pool_key)
561
- log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}",
562
- extra={'request_type': 'non-stream'})
563
-
564
- # 创建请求处理任务
565
- process_task = asyncio.create_task(
566
- process_request(request, http_request, "non-stream", cache_key=cache_key, client_ip=client_ip)
567
- )
568
-
569
- # 将任务添加到活跃请求池
570
- active_requests_manager.add(pool_key, process_task)
571
-
572
- # 等待任务完成
573
- try:
574
- response = await process_task
575
- return response
576
- except Exception as e:
577
- # 如果任务失败,从活跃请求池中移除
578
- active_requests_manager.remove(pool_key)
579
-
580
- # 检查是否已有缓存的结果(可能是由另一个任务创建的)
581
- cached_response, cache_hit = response_cache_manager.get(cache_key)
582
- if cache_hit:
583
- log('info', f"任务失败但找到缓存,使用缓存结果: {cache_key[:8]}...",
584
- extra={'request_type': 'non-stream', 'model': request.model})
585
- return cached_response
586
-
587
- # 重新抛出异常
588
- raise
589
-
590
- async def process_request(chat_request: ChatCompletionRequest, http_request: Request, request_type: Literal['stream', 'non-stream'], cache_key: str = None, client_ip: str = None):
591
- """处理API请求的主函数,根据需要处理流式或非流式请求"""
592
- global current_api_key
593
-
594
- # 请求前基本检查
595
- protect_from_abuse(
596
- http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
597
- if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
598
- error_msg = "无效的模型"
599
- extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
600
- log('error', error_msg, extra=extra_log)
601
- raise HTTPException(
602
- status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
603
-
604
- # 重置已尝试的密钥
605
- key_manager.reset_tried_keys_for_request()
606
-
607
- # 转换消息格式
608
- contents, system_instruction = GeminiClient.convert_messages(
609
- GeminiClient, chat_request.messages)
610
-
611
- # 设置重试次数(使用可用API密钥数量作为最大重试次数)
612
- retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1
613
-
614
- # 尝试使用不同API密钥
615
- for attempt in range(1, retry_attempts + 1):
616
- # 获取下一个密钥
617
- current_api_key = key_manager.get_available_key()
618
-
619
- # 检查API密钥是否可用
620
- if current_api_key is None:
621
- log('warning', "没有可用的 API 密钥,跳过本次尝试",
622
- extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
623
- break
624
-
625
- # 记录当前尝试的密钥信息
626
- log('info', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...",
627
- extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
628
-
629
- # 服务器错误重试逻辑
630
- server_error_retries = 3
631
- for server_retry in range(1, server_error_retries + 1):
632
- try:
633
- # 根据请求类型分别处理
634
- if chat_request.stream:
635
- try:
636
- return await process_stream_request(
637
- chat_request,
638
- http_request,
639
- contents,
640
- system_instruction,
641
- current_api_key
642
- )
643
- except Exception as e:
644
- # 捕获流式请求的异常,但不立即返回错误
645
- # 记录错误并继续尝试下一个API密钥
646
- error_detail = handle_gemini_error(e, current_api_key, key_manager)
647
- log('error', f"流式请求失败: {error_detail}",
648
- extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
649
- # 不返回错误,而是抛出异常让外层循环处理
650
- raise
651
- else:
652
- return await process_nonstream_request(
653
- chat_request,
654
- http_request,
655
- request_type,
656
- contents,
657
- system_instruction,
658
- current_api_key,
659
- cache_key,
660
- client_ip
661
- )
662
- except HTTPException as e:
663
- if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
664
- log('error', "客户端连接中断",
665
- extra={'key': current_api_key[:8], 'request_type': request_type,
666
- 'model': chat_request.model, 'status_code': 408})
667
- raise
668
- else:
669
- raise
670
- except Exception as e:
671
- # 使用统一的API错误处理函数
672
- error_result = await handle_api_error(
673
- e,
674
- current_api_key,
675
- key_manager,
676
- request_type,
677
- chat_request.model,
678
- server_retry - 1
679
- )
680
-
681
- # 如果需要删除缓存,清除缓存
682
- if error_result.get('remove_cache', False) and cache_key and cache_key in response_cache_manager.cache:
683
- log('info', f"因API错误,删除缓存: {cache_key[:8]}...",
684
- extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
685
- del response_cache_manager.cache[cache_key]
686
-
687
- if error_result.get('should_retry', False):
688
- # 服务器错误需要重试(等待已在handle_api_error中完成)
689
- continue
690
- elif error_result.get('should_switch_key', False) and attempt < retry_attempts:
691
- # 跳出服务器错误重试循环,获取下一个可用密钥
692
- log('info', f"API密钥 {current_api_key[:8]}... 失败,准备尝试下一个密钥",
693
- extra={'key': current_api_key[:8], 'request_type': request_type})
694
- break
695
- else:
696
- # 无法处理的错误或已达到重试上限
697
- break
698
-
699
- # 如果所有尝试都失败
700
- msg = "所有API密钥均请求失败,请稍后重试"
701
- log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
702
-
703
- # 对于流式请求,创建一个特殊的StreamingResponse返回错误
704
- if chat_request.stream:
705
- async def error_generator():
706
- error_json = json.dumps({'error': {'message': msg, 'type': 'api_error'}})
707
- yield f"data: {error_json}\n\n"
708
- yield "data: [DONE]\n\n"
709
-
710
- return StreamingResponse(error_generator(), media_type="text/event-stream")
711
- else:
712
- # 非流式请求使用标准HTTP异常
713
- raise HTTPException(
714
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)
715
-
716
-
717
- @app.exception_handler(Exception)
718
- async def global_exception_handler(request: Request, exc: Exception):
719
- error_message = translate_error(str(exc))
720
- extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
721
- log('error', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
722
- return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
723
-
724
-
725
- @app.get("/", response_class=HTMLResponse)
726
- async def root(request: Request):
727
- # 先清理过期数据,确保统计数据是最新的
728
- clean_expired_stats()
729
- response_cache_manager.clean_expired() # 使用管理器清理缓存
730
- active_requests_manager.clean_completed() # 使用管理器清理活跃请求
731
- await check_version()
732
- # 获取当前统计数据
733
- now = datetime.now()
734
-
735
- # 计算过去24小时的调用总数
736
- last_24h_calls = sum(api_call_stats['last_24h'].values())
737
-
738
- # 计算过去一小时内的调用总数
739
- one_hour_ago = now - timedelta(hours=1)
740
- hourly_calls = 0
741
- for hour_key, count in api_call_stats['hourly'].items():
742
- try:
743
- hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
744
- if hour_time >= one_hour_ago:
745
- hourly_calls += count
746
- except ValueError:
747
- continue
748
-
749
- # 计算过去一分钟内的调用总数
750
- one_minute_ago = now - timedelta(minutes=1)
751
- minute_calls = 0
752
- for minute_key, count in api_call_stats['minute'].items():
753
- try:
754
- minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
755
- if minute_time >= one_minute_ago:
756
- minute_calls += count
757
- except ValueError:
758
- continue
759
-
760
- # 获取最近的日志
761
- recent_logs = log_manager.get_recent_logs(50) # 获取最近50条日志
762
-
763
- # 获取缓存统计
764
- total_cache = len(response_cache_manager.cache)
765
- valid_cache = sum(1 for _, data in response_cache_manager.cache.items()
766
- if time.time() < data.get('expiry_time', 0))
767
- cache_by_model = {}
768
-
769
- # 分析缓存数据
770
- for _, cache_data in response_cache_manager.cache.items():
771
- if time.time() < cache_data.get('expiry_time', 0):
772
- # 按模型统计缓存
773
- model = cache_data.get('response', {}).model
774
- if model:
775
- if model in cache_by_model:
776
- cache_by_model[model] += 1
777
- else:
778
- cache_by_model[model] = 1
779
-
780
- # 获取请求历史统计
781
- history_count = len(client_request_history)
782
-
783
- # 获取活跃请求统计
784
- active_count = len(active_requests_manager.active_requests)
785
- active_done = sum(1 for task in active_requests_manager.active_requests.values() if task.done())
786
- active_pending = active_count - active_done
787
-
788
- # 准备模板上下文
789
- context = {
790
- "key_count": len(key_manager.api_keys),
791
- "model_count": len(GeminiClient.AVAILABLE_MODELS),
792
- "retry_count": len(key_manager.api_keys),
793
- "last_24h_calls": last_24h_calls,
794
- "hourly_calls": hourly_calls,
795
- "minute_calls": minute_calls,
796
- "max_requests_per_minute": MAX_REQUESTS_PER_MINUTE,
797
- "max_requests_per_day_per_ip": MAX_REQUESTS_PER_DAY_PER_IP,
798
- "current_time": datetime.now().strftime('%H:%M:%S'),
799
- "logs": recent_logs,
800
- # 添加版本信息
801
- "local_version": local_version,
802
- "remote_version": remote_version,
803
- "has_update": has_update,
804
- # 添加缓存信息
805
- "cache_entries": total_cache,
806
- "valid_cache": valid_cache,
807
- "expired_cache": total_cache - valid_cache,
808
- "cache_expiry_time": CACHE_EXPIRY_TIME,
809
- "max_cache_entries": MAX_CACHE_ENTRIES,
810
- "cache_by_model": cache_by_model,
811
- "request_history_count": history_count,
812
- "enable_reconnect_detection": ENABLE_RECONNECT_DETECTION,
813
- "remove_cache_after_use": REMOVE_CACHE_AFTER_USE,
814
- # 添加活跃请求池信息
815
- "active_count": active_count,
816
- "active_done": active_done,
817
- "active_pending": active_pending,
818
- }
819
-
820
- # 使用Jinja2模板引擎正确渲染HTML
821
- return templates.TemplateResponse("index.html", {"request": request, **context})
822
-
823
- # 客户端IP到最近请求的映射,用于识别重连请求
824
- client_request_history: Dict[str, Dict[str, Any]] = {}
825
- # 请求历史记录保留时间(秒)
826
- REQUEST_HISTORY_EXPIRY_TIME = int(os.environ.get("REQUEST_HISTORY_EXPIRY_TIME", "600")) # 默认10分钟
827
- # 是否启用重连检测
828
- ENABLE_RECONNECT_DETECTION = os.environ.get("ENABLE_RECONNECT_DETECTION", "true").lower() in ["true", "1", "yes"]
829
-
830
- # 定期清理缓存的定时任务
831
- def schedule_cache_cleanup():
832
- scheduler = BackgroundScheduler()
833
- scheduler.add_job(response_cache_manager.clean_expired, 'interval', minutes=1) # 每分钟清理过期缓存
834
- scheduler.add_job(active_requests_manager.clean_completed, 'interval', seconds=30) # 每30秒清理已完成的活跃请求
835
- scheduler.add_job(active_requests_manager.clean_long_running, 'interval', minutes=5, args=[300]) # 每5分钟清理运行超过5分钟的任务
836
- scheduler.add_job(clean_expired_stats, 'interval', minutes=5) # 每5分钟清理过期的统计数��
837
- scheduler.start()
838
-
839
- # 生成请求的唯一缓存键
840
- def generate_cache_key(chat_request: ChatCompletionRequest) -> str:
841
- # 创建包含请求关键信息的字典
842
- request_data = {
843
- 'model': chat_request.model,
844
- 'messages': []
845
- }
846
-
847
- # 添加消息内容
848
- for msg in chat_request.messages:
849
- if isinstance(msg.content, str):
850
- message_data = {'role': msg.role, 'content': msg.content}
851
- request_data['messages'].append(message_data)
852
- elif isinstance(msg.content, list):
853
- content_list = []
854
- for item in msg.content:
855
- if item.get('type') == 'text':
856
- content_list.append({'type': 'text', 'text': item.get('text')})
857
- # 对于图像数据,我们只使用标识符而不是全部数据
858
- elif item.get('type') == 'image_url':
859
- image_data = item.get('image_url', {}).get('url', '')
860
- if image_data.startswith('data:image/'):
861
- # 对于base64图像,使用前32字符作为标识符
862
- content_list.append({'type': 'image_url', 'hash': hashlib.md5(image_data[:32].encode()).hexdigest()})
863
- else:
864
- content_list.append({'type': 'image_url', 'url': image_data})
865
- request_data['messages'].append({'role': msg.role, 'content': content_list})
866
-
867
- # 将字典转换为JSON字符串并计算哈希值
868
- json_data = json.dumps(request_data, sort_keys=True)
869
- return hashlib.md5(json_data.encode()).hexdigest()
870
-
871
- # 拆分process_request为更小的函数
872
-
873
- async def process_stream_request(
874
- chat_request: ChatCompletionRequest,
875
- http_request: Request,
876
- contents,
877
- system_instruction,
878
- current_api_key: str
879
- ) -> StreamingResponse:
880
- """处理流式API请求"""
881
-
882
- # 创建一个直接流式响应的生成器函数
883
- async def stream_response_generator():
884
- # 如果启用了假流式模式,使用随机遍历API密钥的方式
885
- if FAKE_STREAMING:
886
- # 创建一个队列用于在任务之间传递数据
887
- queue = asyncio.Queue()
888
- keep_alive_task = None
889
- api_request_task = None
890
-
891
- try:
892
- # 创建一个保持连接的任务,持续发送换行符
893
- async def keep_alive_sender():
894
- try:
895
- # 创建一个Gemini客户端用于发送保持连接的换行符
896
- keep_alive_client = GeminiClient(current_api_key)
897
-
898
- # 启动保持连接的生成器
899
- keep_alive_generator = keep_alive_client.stream_chat(
900
- chat_request,
901
- contents,
902
- safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
903
- system_instruction
904
- )
905
-
906
- # 持续发送换行符直到被取消
907
- async for line in keep_alive_generator:
908
- if line == "\n":
909
- # 将换行符格式化为SSE格式
910
- formatted_chunk = {
911
- "id": "chatcmpl-keepalive",
912
- "object": "chat.completion.chunk",
913
- "created": int(time.time()),
914
- "model": chat_request.model,
915
- "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]
916
- }
917
- # 将格式化的换行符放入队列
918
- await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
919
- except asyncio.CancelledError:
920
- log('info', "保持连接任务被取消",
921
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
922
- raise
923
- except Exception as e:
924
- log('error', f"保持连接任务出错: {str(e)}",
925
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
926
- # 将错误放入队列
927
- await queue.put(None)
928
- raise
929
-
930
- # 创建一个任务来随机遍历API密钥并请求内容
931
- async def api_request_handler():
932
- success = False
933
- try:
934
- # 重置已尝试的密钥
935
- key_manager.reset_tried_keys_for_request()
936
-
937
- # 获取可用的API密钥
938
- available_keys = key_manager.api_keys.copy()
939
- random.shuffle(available_keys) # 随机打乱密钥顺序
940
-
941
- # 遍历所有API密钥尝试获取响应
942
- for attempt, api_key in enumerate(available_keys, 1):
943
- try:
944
- log('info', f"假流式模式: 尝试API密钥 {api_key[:8]}... ({attempt}/{len(available_keys)})",
945
- extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
946
-
947
- # 创建一个新的客户端使用当前API密钥
948
- non_stream_client = GeminiClient(api_key)
949
-
950
- # 使用非流式方式请求内容
951
- response_content = await asyncio.to_thread(
952
- non_stream_client.complete_chat,
953
- chat_request,
954
- contents,
955
- safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
956
- system_instruction
957
- )
958
-
959
- # 检查响应是否有效
960
- if response_content and response_content.text:
961
- log('info', f"假流式模式: API密钥 {api_key[:8]}... 成功获取响应",
962
- extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
963
-
964
- # 将完整响应分割成小块,模拟流式返回
965
- full_text = response_content.text
966
- chunk_size = max(len(full_text) // 10, 1) # 至少分成10块,每块至少1个字符
967
-
968
- for i in range(0, len(full_text), chunk_size):
969
- chunk = full_text[i:i+chunk_size]
970
- formatted_chunk = {
971
- "id": "chatcmpl-someid",
972
- "object": "chat.completion.chunk",
973
- "created": int(time.time()),
974
- "model": chat_request.model,
975
- "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
976
- }
977
- # 将格式化的内容块放入队列
978
- await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
979
-
980
- success = True
981
- # 更新API调用统计
982
- update_api_call_stats()
983
- break # 成功获取响应,退出循环
984
- else:
985
- log('warning', f"假流式模式: API密钥 {api_key[:8]}... 返回空响应",
986
- extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
987
- except Exception as e:
988
- error_detail = handle_gemini_error(e, api_key, key_manager)
989
- log('error', f"假流式模式: API密钥 {api_key[:8]}... 请求失败: {error_detail}",
990
- extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
991
- # 继续尝试下一个API密钥
992
-
993
- # 如果所有API密钥都尝试失败
994
- if not success:
995
- error_msg = "所有API密钥均请求失败,请稍后重试"
996
- log('error', error_msg,
997
- extra={'key': 'ALL', 'request_type': 'fake-stream', 'model': chat_request.model})
998
-
999
- # 添加错误信息到队列
1000
- error_json = {
1001
- "id": "chatcmpl-error",
1002
- "object": "chat.completion.chunk",
1003
- "created": int(time.time()),
1004
- "model": chat_request.model,
1005
- "choices": [{"delta": {"content": f"\n\n[错误: {error_msg}]"}, "index": 0, "finish_reason": "error"}]
1006
- }
1007
- await queue.put(f"data: {json.dumps(error_json)}\n\n")
1008
-
1009
- # 添加完成标记到队列
1010
- await queue.put("data: [DONE]\n\n")
1011
- # 添加None表示队列结束
1012
- await queue.put(None)
1013
-
1014
- except asyncio.CancelledError:
1015
- log('info', "API请求任务被取消",
1016
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1017
- # 添加None表示队列结束
1018
- await queue.put(None)
1019
- raise
1020
- except Exception as e:
1021
- log('error', f"API请求任务出错: {str(e)}",
1022
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1023
- # 添加错误信息到队列
1024
- error_json = {
1025
- "id": "chatcmpl-error",
1026
- "object": "chat.completion.chunk",
1027
- "created": int(time.time()),
1028
- "model": chat_request.model,
1029
- "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
1030
- }
1031
- await queue.put(f"data: {json.dumps(error_json)}\n\n")
1032
- await queue.put("data: [DONE]\n\n")
1033
- # 添加None表示队列结束
1034
- await queue.put(None)
1035
- raise
1036
-
1037
- # 启动保持连接的任务
1038
- keep_alive_task = asyncio.create_task(keep_alive_sender())
1039
- # 启动API请求任务
1040
- api_request_task = asyncio.create_task(api_request_handler())
1041
-
1042
- # 从队列中获取数据并发送给客户端
1043
- while True:
1044
- chunk = await queue.get()
1045
- if chunk is None: # None表示队列结束
1046
- break
1047
- yield chunk
1048
-
1049
- # 如果API请求任务已完成,取消保持连接任务
1050
- if api_request_task.done() and not keep_alive_task.done():
1051
- keep_alive_task.cancel()
1052
-
1053
- except asyncio.CancelledError:
1054
- log('info', "流式响应生成器被取消",
1055
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1056
- # 取消所有任务
1057
- if keep_alive_task and not keep_alive_task.done():
1058
- keep_alive_task.cancel()
1059
- if api_request_task and not api_request_task.done():
1060
- api_request_task.cancel()
1061
- except Exception as e:
1062
- log('error', f"流式响应生成器出错: {str(e)}",
1063
- extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
1064
- # 取消所有任务
1065
- if keep_alive_task and not keep_alive_task.done():
1066
- keep_alive_task.cancel()
1067
- if api_request_task and not api_request_task.done():
1068
- api_request_task.cancel()
1069
- # 发送错误信息给客户端
1070
- error_json = {
1071
- "id": "chatcmpl-error",
1072
- "object": "chat.completion.chunk",
1073
- "created": int(time.time()),
1074
- "model": chat_request.model,
1075
- "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
1076
- }
1077
- yield f"data: {json.dumps(error_json)}\n\n"
1078
- yield "data: [DONE]\n\n"
1079
- finally:
1080
- # 确保所有任务都被取消
1081
- if keep_alive_task and not keep_alive_task.done():
1082
- keep_alive_task.cancel()
1083
- if api_request_task and not api_request_task.done():
1084
- api_request_task.cancel()
1085
- else:
1086
- # 原始流式请求处理逻辑
1087
- gemini_client = GeminiClient(current_api_key)
1088
- success = False
1089
-
1090
- try:
1091
- # 直接迭代生成器并发送响应块
1092
- async for chunk in gemini_client.stream_chat(
1093
- chat_request,
1094
- contents,
1095
- safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
1096
- system_instruction
1097
- ):
1098
- # 空字符串跳过
1099
- if not chunk:
1100
- continue
1101
-
1102
- formatted_chunk = {
1103
- "id": "chatcmpl-someid",
1104
- "object": "chat.completion.chunk",
1105
- "created": int(time.time()),
1106
- "model": chat_request.model,
1107
- "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
1108
- }
1109
- success = True # ��要有一个chunk成功,就标记为成功
1110
- yield f"data: {json.dumps(formatted_chunk)}\n\n"
1111
-
1112
- # 如果成功获取到响应,更新API调用统计
1113
- if success:
1114
- update_api_call_stats()
1115
-
1116
- yield "data: [DONE]\n\n"
1117
-
1118
- except asyncio.CancelledError:
1119
- extra_log_cancel = {'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model, 'error_message': '客户端已断开连接'}
1120
- log('info', "客户端连接已中断", extra=extra_log_cancel)
1121
- except Exception as e:
1122
- error_detail = handle_gemini_error(e, current_api_key, key_manager)
1123
- log('error', f"流式请求失败: {error_detail}",
1124
- extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
1125
- # 发送错误信息给客户端
1126
- error_json = {
1127
- "id": "chatcmpl-error",
1128
- "object": "chat.completion.chunk",
1129
- "created": int(time.time()),
1130
- "model": chat_request.model,
1131
- "choices": [{"delta": {"content": f"\n\n[错误: {error_detail}]"}, "index": 0, "finish_reason": "error"}]
1132
- }
1133
- yield f"data: {json.dumps(error_json)}\n\n"
1134
- yield "data: [DONE]\n\n"
1135
- # 重新抛出异常,这样process_request可以捕获它
1136
- raise e
1137
-
1138
- return StreamingResponse(stream_response_generator(), media_type="text/event-stream")
1139
-
1140
- async def run_gemini_completion(
1141
- gemini_client,
1142
- chat_request: ChatCompletionRequest,
1143
- contents,
1144
- system_instruction,
1145
- request_type: str,
1146
- current_api_key: str
1147
- ):
1148
- """运行Gemini非流式请求"""
1149
- # 记录函数调用状态
1150
- run_fn = run_gemini_completion
1151
-
1152
- try:
1153
- # 创建一个不会被客户端断开影响的任务
1154
- response_future = asyncio.create_task(
1155
- asyncio.to_thread(
1156
- gemini_client.complete_chat,
1157
- chat_request,
1158
- contents,
1159
- safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
1160
- system_instruction
1161
- )
1162
- )
1163
-
1164
- # 使用shield防止任务被外部取消
1165
- response_content = await asyncio.shield(response_future)
1166
-
1167
- # 只在第一次调用时记录完成日志
1168
- if not hasattr(run_fn, 'logged_complete'):
1169
- log('info', "非流式请求成功完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1170
- run_fn.logged_complete = True
1171
- return response_content
1172
- except asyncio.CancelledError:
1173
- # 即使任务被取消,我们也确保正在进行的API请求能够完成
1174
- if 'response_future' in locals() and not response_future.done():
1175
- try:
1176
- # 使用shield确保任务不被取消,并等待它完成
1177
- response_content = await asyncio.shield(response_future)
1178
- log('info', "API请求在客户端断开后完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1179
- return response_content
1180
- except Exception as e:
1181
- extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': f'API请求在客户端断开后失败: {str(e)}'}
1182
- log('info', "API调用因客户端断开而失败", extra=extra_log_gemini_cancel)
1183
- raise
1184
-
1185
- # 如果任务尚未开始或已经失败,记录日志
1186
- extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
1187
- log('info', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
1188
- raise
1189
-
1190
- async def check_client_disconnect(http_request: Request, current_api_key: str, request_type: str, model: str):
1191
- """检查客户端是否断开连接"""
1192
- while True:
1193
- if await http_request.is_disconnected():
1194
- extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': model, 'error_message': '检测到客户端断开连接'}
1195
- log('info', "客户端连接已中断,等待API请求完成", extra=extra_log)
1196
- return True
1197
- await asyncio.sleep(0.5)
1198
-
1199
- async def handle_client_disconnect(
1200
- gemini_task: asyncio.Task,
1201
- chat_request: ChatCompletionRequest,
1202
- request_type: str,
1203
- current_api_key: str,
1204
- cache_key: str = None,
1205
- client_ip: str = None
1206
- ):
1207
-
1208
- try:
1209
- # 等待API任务完成,使用shield防止它被取消
1210
- response_content = await asyncio.shield(gemini_task)
1211
-
1212
- # 检查响应文本��否为空
1213
- if response_content is None or response_content.text == "":
1214
- if response_content is None:
1215
- log('info', "客户端断开后API任务返回None",
1216
- extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1217
- else:
1218
- extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
1219
- log('info', "客户端断开后Gemini API 返回空响应", extra=extra_log)
1220
-
1221
- # 删除任何现有缓存,因为响应为空
1222
- if cache_key and cache_key in response_cache_manager.cache:
1223
- log('info', f"因空响应,删除缓存: {cache_key[:8]}...",
1224
- extra={'cache_operation': 'remove-on-empty', 'request_type': request_type})
1225
- del response_cache_manager.cache[cache_key]
1226
-
1227
- # 返回错误响应而不是None
1228
- return create_error_response(chat_request.model, "AI未返回任何内容,请重试")
1229
-
1230
- # 首先检查是否有现有缓存
1231
- cached_response, cache_hit = response_cache_manager.get(cache_key)
1232
- if cache_hit:
1233
- log('info', f"客户端断开但找到已存在缓存,将删除: {cache_key[:8]}...",
1234
- extra={'cache_operation': 'disconnect-found-cache', 'request_type': request_type})
1235
-
1236
- # 安全删除缓存
1237
- if cache_key in response_cache_manager.cache:
1238
- del response_cache_manager.cache[cache_key]
1239
-
1240
- # 不返回缓存,而是创建新响应并缓存
1241
-
1242
- # 创建新响应
1243
- # log('info', f"客户端断开后创建新缓存: {cache_key[:8] if cache_key else 'none'}...",
1244
- # extra={'cache_operation': 'create-after-disconnect', 'request_type': request_type})
1245
- response = create_response(chat_request, response_content)
1246
-
1247
- # 客户端已断开,此响应不会实际发送,可以考虑将其缓存以供后续使用
1248
- # 如果确实需要缓存,则可以取消下面的注释
1249
- # cache_response(response, cache_key, client_ip)
1250
-
1251
- return response
1252
- except asyncio.CancelledError:
1253
- # 对于取消异常,仍然尝试继续完成任务
1254
- log('info', "客户端断开后任务被取消,但我们仍会尝试完成",
1255
- extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1256
-
1257
- # 检查任务是否已经完成
1258
- if gemini_task.done() and not gemini_task.cancelled():
1259
- try:
1260
- response_content = gemini_task.result()
1261
-
1262
- # 首先检查是否有现有缓存
1263
- cached_response, cache_hit = response_cache_manager.get(cache_key)
1264
- if cache_hit:
1265
- log('info', f"任务被取消但找到已存在缓存,将删除: {cache_key[:8]}...",
1266
- extra={'cache_operation': 'cancel-found-cache', 'request_type': request_type})
1267
-
1268
- # 安全删除缓存
1269
- if cache_key in response_cache_manager.cache:
1270
- del response_cache_manager.cache[cache_key]
1271
-
1272
- # 创建但不缓存响应
1273
- response = create_response(chat_request, response_content)
1274
- return response
1275
- except Exception as inner_e:
1276
- log('error', f"客户端断开后从已完成任务获取结果失败: {str(inner_e)}",
1277
- extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
1278
-
1279
- # 删除缓存,因为出现错误
1280
- if cache_key and cache_key in response_cache_manager.cache:
1281
- log('info', f"因任务获取结果失败,删除缓存: {cache_key[:8]}...",
1282
- extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
1283
- del response_cache_manager.cache[cache_key]
1284
-
1285
- # 创建错误响应而不是返回None
1286
- return create_error_response(chat_request.model, "请求处理过程中发生错误,请重试")
1287
- except Exception as e:
1288
- # 处理API任务异常
1289
- error_msg = str(e)
1290
- extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': error_msg}
1291
- log('error', f"客户端断开后处理API响应时出错: {error_msg}", extra=extra_log)
1292
-
1293
- # 删除缓存,因为出现错误
1294
- if cache_key and cache_key in response_cache_manager.cache:
1295
- log('info', f"因API响应错误,删除缓存: {cache_key[:8]}...",
1296
- extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
1297
- del response_cache_manager.cache[cache_key]
1298
-
1299
- # 创建错误响应而不是返回None
1300
- return create_error_response(chat_request.model, f"请求处理错误: {error_msg}")
1301
-
1302
- async def process_nonstream_request(
1303
- chat_request: ChatCompletionRequest,
1304
- http_request: Request,
1305
- request_type: str,
1306
- contents,
1307
- system_instruction,
1308
- current_api_key: str,
1309
- cache_key: str = None,
1310
- client_ip: str = None
1311
- ):
1312
- """处理非流式API请求"""
1313
- gemini_client = GeminiClient(current_api_key)
1314
-
1315
- # 创建任务
1316
- gemini_task = asyncio.create_task(
1317
- run_gemini_completion(
1318
- gemini_client,
1319
- chat_request,
1320
- contents,
1321
- system_instruction,
1322
- request_type,
1323
- current_api_key
1324
- )
1325
- )
1326
-
1327
- disconnect_task = asyncio.create_task(
1328
- check_client_disconnect(
1329
- http_request,
1330
- current_api_key,
1331
- request_type,
1332
- chat_request.model
1333
- )
1334
- )
1335
-
1336
- try:
1337
- # 先等待看是否API任务先完成,或者客户端先断开连接
1338
- done, pending = await asyncio.wait(
1339
- [gemini_task, disconnect_task],
1340
- return_when=asyncio.FIRST_COMPLETED
1341
- )
1342
-
1343
- if disconnect_task in done:
1344
- # 客户端已断开连接,但我们仍继续完成API请求以便缓存结果
1345
- return await handle_client_disconnect(
1346
- gemini_task,
1347
- chat_request,
1348
- request_type,
1349
- current_api_key,
1350
- cache_key,
1351
- client_ip
1352
- )
1353
- else:
1354
- # API任务先完成,取消断开检测任务
1355
- disconnect_task.cancel()
1356
-
1357
- # 获取响应内容
1358
- response_content = await gemini_task
1359
-
1360
- # 检查缓存是否已经存在,如果存在则不再创建新缓存
1361
- cached_response, cache_hit = response_cache_manager.get(cache_key)
1362
- if cache_hit:
1363
- log('info', f"缓存已存在,直接返回: {cache_key[:8]}...",
1364
- extra={'cache_operation': 'use-existing', 'request_type': request_type})
1365
-
1366
- # 安全删除缓存
1367
- if cache_key in response_cache_manager.cache:
1368
- del response_cache_manager.cache[cache_key]
1369
- log('info', f"缓存使用后已删除: {cache_key[:8]}...",
1370
- extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
1371
-
1372
- return cached_response
1373
-
1374
- # 创建响应
1375
- response = create_response(chat_request, response_content)
1376
-
1377
- # 缓存响应
1378
- cache_response(response, cache_key, client_ip)
1379
-
1380
- # 立即删除缓存,确保只能使用一次
1381
- if cache_key and cache_key in response_cache_manager.cache:
1382
- del response_cache_manager.cache[cache_key]
1383
- log('info', f"缓存创建后立即删除: {cache_key[:8]}...",
1384
- extra={'cache_operation': 'store-and-remove', 'request_type': request_type})
1385
-
1386
- # 返回响应
1387
- return response
1388
-
1389
- except asyncio.CancelledError:
1390
- extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消"}
1391
- log('info', "请求取消", extra=extra_log)
1392
-
1393
- # 在请求被取消时先检查缓存中是否已有结果
1394
- cached_response, cache_hit = response_cache_manager.get(cache_key)
1395
- if cache_hit:
1396
- log('info', f"请求取消但找到有效缓存,使用缓存响应: {cache_key[:8]}...",
1397
- extra={'cache_operation': 'use-cache-on-cancel', 'request_type': request_type})
1398
-
1399
- # 安全删除缓存
1400
- if cache_key in response_cache_manager.cache:
1401
- del response_cache_manager.cache[cache_key]
1402
- log('info', f"缓存使用后已删除: {cache_key[:8]}...",
1403
- extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
1404
-
1405
- return cached_response
1406
-
1407
- # 尝试完成正在进行的API请求
1408
- if not gemini_task.done():
1409
- log('info', "请求取消但API请求尚未完成,继续等待...",
1410
- extra={'key': current_api_key[:8], 'request_type': request_type})
1411
-
1412
- # 使用shield确保任务不会被取消
1413
- response_content = await asyncio.shield(gemini_task)
1414
-
1415
- # 创建响应
1416
- response = create_response(chat_request, response_content)
1417
-
1418
- # 不缓存这个响应,直接返回
1419
- return response
1420
- else:
1421
- # 任务已完成,获取结果
1422
- response_content = gemini_task.result()
1423
-
1424
- # 创建响应
1425
- response = create_response(chat_request, response_content)
1426
-
1427
- # 不缓存这个响应,直接返回
1428
- return response
1429
-
1430
- except HTTPException as e:
1431
- if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
1432
- extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
1433
- 'status_code': 408, 'error_message': '客户端连接中断'}
1434
- log('error', "客户端连接中断,终止后续重试", extra=extra_log)
1435
- raise
1436
- else:
1437
- raise
1438
-
1439
- # 添加通用响应处理函数
1440
- def create_response(
1441
- chat_request, response_content
1442
- ):
1443
- """创建标准响应对象但不缓存"""
1444
- # 创建响应对象
1445
- return create_chat_response(
1446
- model=chat_request.model,
1447
- choices=[{
1448
- "index": 0,
1449
- "message": {
1450
- "role": "assistant",
1451
- "content": response_content.text
1452
- },
1453
- "finish_reason": "stop"
1454
- }]
1455
- )
1456
-
1457
- def cache_response(response, cache_key, client_ip):
1458
- """将响应存入缓存"""
1459
- if not cache_key:
1460
- return
1461
-
1462
- # 先检查缓存是否已存在
1463
- existing_cache = cache_key in response_cache_manager.cache
1464
-
1465
- if existing_cache:
1466
- log('info', f"缓存已存在,跳过存储: {cache_key[:8]}...",
1467
- extra={'cache_operation': 'skip-existing', 'request_type': 'non-stream'})
1468
- else:
1469
- response_cache_manager.store(cache_key, response, client_ip)
1470
- log('info', f"API响应已缓存: {cache_key[:8]}...",
1471
- extra={'cache_operation': 'store-new', 'request_type': 'non-stream'})
1472
-
1473
- # 更新API调用统计
1474
- update_api_call_stats()
1475
-
1476
- # 统一的API错误处理函数
1477
- async def handle_api_error(e, api_key, key_manager, request_type, model, retry_count=0):
1478
- """统一处理API错误,对500和503错误实现自动重试机制"""
1479
- error_detail = handle_gemini_error(e, api_key, key_manager)
1480
-
1481
- # 处理500和503服务器错误
1482
- if isinstance(e, requests.exceptions.HTTPError) and ('500' in str(e) or '503' in str(e)):
1483
- status_code = '500' if '500' in str(e) else '503'
1484
-
1485
- # 最多重试3次
1486
- if retry_count < 3:
1487
- wait_time = min(RETRY_DELAY * (2 ** retry_count), MAX_RETRY_DELAY)
1488
- log('warning', f"Gemini服务器错误({status_code}),等待{wait_time}秒后重试 ({retry_count+1}/3)",
1489
- key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
1490
-
1491
- # 等待后返回重试信号
1492
- await asyncio.sleep(wait_time)
1493
- return {'should_retry': True, 'error': error_detail, 'remove_cache': False}
1494
-
1495
- # 重试次数用尽,直接返回错误状态码
1496
- log('error', f"服务器错误({status_code})重试{retry_count}次后仍然失败",
1497
- key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
1498
-
1499
- # 不建议切换密钥,直接抛出HTTP异常
1500
- raise HTTPException(status_code=int(status_code),
1501
- detail=f"Gemini API 服务器错误({status_code}),请稍后重试")
1502
-
1503
- # 对于其他错误,返回切换密钥的信号
1504
- log('error', f"API错误: {error_detail}",
1505
- key=api_key[:8], request_type=request_type, model=model, error_message=error_detail)
1506
- return {'should_retry': False, 'should_switch_key': True, 'error': error_detail, 'remove_cache': True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/app/models.py DELETED
@@ -1,46 +0,0 @@
1
- from typing import List, Dict, Optional, Union, Literal
2
- from pydantic import BaseModel, Field
3
-
4
- class Message(BaseModel):
5
- role: str
6
- content: str
7
-
8
- class ChatCompletionRequest(BaseModel):
9
- model: str
10
- messages: List[Message]
11
- temperature: float = 0.7
12
- top_p: Optional[float] = 1.0
13
- n: int = 1
14
- stream: bool = False
15
- stop: Optional[Union[str, List[str]]] = None
16
- max_tokens: Optional[int] = None
17
- presence_penalty: Optional[float] = 0.0
18
- frequency_penalty: Optional[float] = 0.0
19
-
20
- class Choice(BaseModel):
21
- index: int
22
- message: Message
23
- finish_reason: Optional[str] = None
24
-
25
- class Usage(BaseModel):
26
- prompt_tokens: int = 0
27
- completion_tokens: int = 0
28
- total_tokens: int = 0
29
-
30
- class ChatCompletionResponse(BaseModel):
31
- id: str
32
- object: Literal["chat.completion"]
33
- created: int
34
- model: str
35
- choices: List[Choice]
36
- usage: Usage = Field(default_factory=Usage)
37
-
38
- class ErrorResponse(BaseModel):
39
- message: str
40
- type: str
41
- param: Optional[str] = None
42
- code: Optional[str] = None
43
-
44
- class ModelList(BaseModel):
45
- object: str = "list"
46
- data: List[Dict]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/app/utils.py DELETED
@@ -1,275 +0,0 @@
1
- import random
2
- from fastapi import HTTPException, Request
3
- import time
4
- import re
5
- from datetime import datetime, timedelta
6
- from apscheduler.schedulers.background import BackgroundScheduler
7
- import os
8
- import requests
9
- import httpx
10
- from threading import Lock
11
- import logging
12
- import sys
13
- from collections import deque
14
-
15
- DEBUG = os.environ.get("DEBUG", "false").lower() == "true"
16
- LOG_FORMAT_DEBUG = '%(asctime)s - %(levelname)s - [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s - %(error_message)s'
17
- LOG_FORMAT_NORMAL = '[%(asctime)s] [%(levelname)s] [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s'
18
-
19
- # 配置 logger
20
- logger = logging.getLogger("my_logger")
21
- logger.setLevel(logging.DEBUG)
22
-
23
- # 控制台处理器
24
- console_handler = logging.StreamHandler()
25
- console_formatter = logging.Formatter('%(message)s')
26
- console_handler.setFormatter(console_formatter)
27
- logger.addHandler(console_handler)
28
-
29
- # 日志缓存,用于在网页上显示最近的日志
30
- class LogManager:
31
- def __init__(self, max_logs=100):
32
- self.logs = deque(maxlen=max_logs) # 使用双端队列存储最近的日志
33
- self.lock = Lock()
34
-
35
- def add_log(self, log_entry):
36
- with self.lock:
37
- self.logs.append(log_entry)
38
-
39
- def get_recent_logs(self, count=50):
40
- with self.lock:
41
- return list(self.logs)[-count:]
42
-
43
- # 创建日志管理器实例
44
- log_manager = LogManager()
45
-
46
- def format_log_message(level, message, extra=None):
47
- extra = extra or {}
48
- log_values = {
49
- 'asctime': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
50
- 'levelname': level,
51
- 'key': extra.get('key', 'N/A'),
52
- 'request_type': extra.get('request_type', 'N/A'),
53
- 'model': extra.get('model', 'N/A'),
54
- 'status_code': extra.get('status_code', 'N/A'),
55
- 'error_message': extra.get('error_message', ''),
56
- 'message': message
57
- }
58
- log_format = LOG_FORMAT_DEBUG if DEBUG else LOG_FORMAT_NORMAL
59
- formatted_log = log_format % log_values
60
-
61
- # 将格式化后的日志添加到日志管理器
62
- log_entry = {
63
- 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
64
- 'level': level,
65
- 'key': extra.get('key', 'N/A'),
66
- 'request_type': extra.get('request_type', 'N/A'),
67
- 'model': extra.get('model', 'N/A'),
68
- 'status_code': extra.get('status_code', 'N/A'),
69
- 'message': message,
70
- 'error_message': extra.get('error_message', ''),
71
- 'formatted': formatted_log
72
- }
73
- log_manager.add_log(log_entry)
74
-
75
- return formatted_log
76
-
77
-
78
- class APIKeyManager:
79
- def __init__(self):
80
- self.api_keys = re.findall(
81
- r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('GEMINI_API_KEYS', ""))
82
- self.key_stack = [] # 初始化密钥栈
83
- self._reset_key_stack() # 初始化时创建随机密钥栈
84
- # self.api_key_blacklist = set()
85
- # self.api_key_blacklist_duration = 60
86
- self.scheduler = BackgroundScheduler()
87
- self.scheduler.start()
88
- self.tried_keys_for_request = set() # 用于跟踪当前请求尝试中已试过的 key
89
-
90
- def _reset_key_stack(self):
91
- """创建并随机化密钥栈"""
92
- shuffled_keys = self.api_keys[:] # 创建 api_keys 的副本以避免直接修改原列表
93
- random.shuffle(shuffled_keys)
94
- self.key_stack = shuffled_keys
95
-
96
-
97
- def get_available_key(self):
98
- """从栈顶获取密钥,栈空时重新生成 (修改后)"""
99
- while self.key_stack:
100
- key = self.key_stack.pop()
101
- # if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
102
- if key not in self.tried_keys_for_request:
103
- self.tried_keys_for_request.add(key)
104
- return key
105
-
106
- if not self.api_keys:
107
- log_msg = format_log_message('ERROR', "没有配置任何 API 密钥!")
108
- logger.error(log_msg)
109
- return None
110
-
111
- self._reset_key_stack() # 重新生成密钥栈
112
-
113
- # 再次尝试从新栈中获取密钥 (迭代一次)
114
- while self.key_stack:
115
- key = self.key_stack.pop()
116
- # if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
117
- if key not in self.tried_keys_for_request:
118
- self.tried_keys_for_request.add(key)
119
- return key
120
-
121
- return None
122
-
123
-
124
- def show_all_keys(self):
125
- log_msg = format_log_message('INFO', f"当前可用API key个数: {len(self.api_keys)} ")
126
- logger.info(log_msg)
127
- for i, api_key in enumerate(self.api_keys):
128
- log_msg = format_log_message('INFO', f"API Key{i}: {api_key[:8]}...{api_key[-3:]}")
129
- logger.info(log_msg)
130
-
131
- # def blacklist_key(self, key):
132
- # log_msg = format_log_message('WARNING', f"{key[:8]} → 暂时禁用 {self.api_key_blacklist_duration} 秒")
133
- # logger.warning(log_msg)
134
- # self.api_key_blacklist.add(key)
135
- # self.scheduler.add_job(lambda: self.api_key_blacklist.discard(key), 'date',
136
- # run_date=datetime.now() + timedelta(seconds=self.api_key_blacklist_duration))
137
-
138
- def reset_tried_keys_for_request(self):
139
- """在新的请求尝试时重置已尝试的 key 集合"""
140
- self.tried_keys_for_request = set()
141
-
142
-
143
- def handle_gemini_error(error, current_api_key, key_manager) -> str:
144
- if isinstance(error, requests.exceptions.HTTPError):
145
- status_code = error.response.status_code
146
- if status_code == 400:
147
- try:
148
- error_data = error.response.json()
149
- if 'error' in error_data:
150
- if error_data['error'].get('code') == "invalid_argument":
151
- error_message = "无效的 API 密钥"
152
- extra_log_invalid_key = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
153
- log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 无效,可能已过期或被删除", extra=extra_log_invalid_key)
154
- logger.error(log_msg)
155
- # key_manager.blacklist_key(current_api_key)
156
-
157
- return error_message
158
- error_message = error_data['error'].get(
159
- 'message', 'Bad Request')
160
- extra_log_400 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
161
- log_msg = format_log_message('WARNING', f"400 错误请求: {error_message}", extra=extra_log_400)
162
- logger.warning(log_msg)
163
- return f"400 错误请求: {error_message}"
164
- except ValueError:
165
- error_message = "400 错误请求:响应不是有效的JSON格式"
166
- extra_log_400_json = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
167
- log_msg = format_log_message('WARNING', error_message, extra=extra_log_400_json)
168
- logger.warning(log_msg)
169
- return error_message
170
-
171
- elif status_code == 429:
172
- error_message = "API 密钥配额已用尽或其他原因"
173
- extra_log_429 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
174
- log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 429 官方资源耗尽或其他原因", extra=extra_log_429)
175
- logger.warning(log_msg)
176
- # key_manager.blacklist_key(current_api_key)
177
-
178
- return error_message
179
-
180
- elif status_code == 403:
181
- error_message = "权限被拒绝"
182
- extra_log_403 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
183
- log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 403 权限被拒绝", extra=extra_log_403)
184
- logger.error(log_msg)
185
- # key_manager.blacklist_key(current_api_key)
186
-
187
- return error_message
188
- elif status_code == 500:
189
- error_message = "服务器内部错误"
190
- extra_log_500 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
191
- log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 500 服务器内部错误", extra=extra_log_500)
192
- logger.warning(log_msg)
193
-
194
- return "Gemini API 内部错误"
195
-
196
- elif status_code == 503:
197
- error_message = "服务不可用"
198
- extra_log_503 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
199
- log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 503 服务不可用", extra=extra_log_503)
200
- logger.warning(log_msg)
201
-
202
- return "Gemini API 服务不可用"
203
- else:
204
- error_message = f"未知错误: {status_code}"
205
- extra_log_other = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
206
- log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → {status_code} 未知错误", extra=extra_log_other)
207
- logger.warning(log_msg)
208
-
209
- return f"未知错误/模型不可用: {status_code}"
210
-
211
- elif isinstance(error, requests.exceptions.ConnectionError):
212
- error_message = "连接错误"
213
- log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
214
- logger.warning(log_msg)
215
- return error_message
216
-
217
- elif isinstance(error, requests.exceptions.Timeout):
218
- error_message = "请求超时"
219
- log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
220
- logger.warning(log_msg)
221
- return error_message
222
- else:
223
- error_message = f"发生未知错误: {error}"
224
- log_msg = format_log_message('ERROR', error_message, extra={'error_message': error_message})
225
- logger.error(log_msg)
226
- return error_message
227
-
228
-
229
- async def test_api_key(api_key: str) -> bool:
230
- """
231
- 测试 API 密钥是否有效。
232
- """
233
- try:
234
- url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(api_key)
235
- async with httpx.AsyncClient() as client:
236
- response = await client.get(url)
237
- response.raise_for_status()
238
- return True
239
- except Exception:
240
- return False
241
-
242
-
243
- rate_limit_data = {}
244
- rate_limit_lock = Lock()
245
-
246
-
247
- def protect_from_abuse(request: Request, max_requests_per_minute: int = 30, max_requests_per_day_per_ip: int = 600):
248
- now = int(time.time())
249
- minute = now // 60
250
- day = now // (60 * 60 * 24)
251
-
252
- minute_key = f"{request.url.path}:{minute}"
253
- day_key = f"{request.client.host}:{day}"
254
-
255
- with rate_limit_lock:
256
- minute_count, minute_timestamp = rate_limit_data.get(
257
- minute_key, (0, now))
258
- if now - minute_timestamp >= 60:
259
- minute_count = 0
260
- minute_timestamp = now
261
- minute_count += 1
262
- rate_limit_data[minute_key] = (minute_count, minute_timestamp)
263
-
264
- day_count, day_timestamp = rate_limit_data.get(day_key, (0, now))
265
- if now - day_timestamp >= 86400:
266
- day_count = 0
267
- day_timestamp = now
268
- day_count += 1
269
- rate_limit_data[day_key] = (day_count, day_timestamp)
270
-
271
- if minute_count > max_requests_per_minute:
272
- raise HTTPException(status_code=429, detail={
273
- "message": "Too many requests per minute", "limit": max_requests_per_minute})
274
- if day_count > max_requests_per_day_per_ip:
275
- raise HTTPException(status_code=429, detail={"message": "Too many requests per day from this IP", "limit": max_requests_per_day_per_ip})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hajimi.v0.0.3/requirements.txt DELETED
@@ -1,7 +0,0 @@
1
- fastapi
2
- uvicorn
3
- httpx
4
- python-dotenv
5
- requests
6
- apscheduler
7
- jinja2
 
 
 
 
 
 
 
 
hajimi.v0.0.3/version.txt DELETED
@@ -1 +0,0 @@
1
- version=0.0.3