Upload 9 files
Browse files- Dockerfile +14 -0
- app/__init__.py +0 -0
- app/gemini.py +346 -0
- app/index.html +306 -0
- app/main.py +1506 -0
- app/models.py +46 -0
- app/utils.py +275 -0
- requirements.txt +7 -0
- version.txt +1 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY ./app /app/app
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
COPY version.txt .
|
| 8 |
+
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# 环境变量 (在 Hugging Face Spaces 中设置)
|
| 12 |
+
# ENV GEMINI_API_KEYS=your_key_1,your_key_2,your_key_3
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/__init__.py
ADDED
|
File without changes
|
app/gemini.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import asyncio
|
| 5 |
+
import time
|
| 6 |
+
from app.models import ChatCompletionRequest, Message # 相对导入
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import Optional, Dict, Any, List
|
| 9 |
+
import httpx
|
| 10 |
+
import logging
|
| 11 |
+
from app.utils import format_log_message
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger('my_logger')
|
| 14 |
+
|
| 15 |
+
# 是否启用假流式请求 默认启用
|
| 16 |
+
FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
|
| 17 |
+
# 假流式请求的空内容返回间隔(秒)
|
| 18 |
+
FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class GeneratedText:
|
| 22 |
+
text: str
|
| 23 |
+
finish_reason: Optional[str] = None
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class ResponseWrapper:
|
| 27 |
+
def __init__(self, data: Dict[Any, Any]): # 正确的初始化方法名
|
| 28 |
+
self._data = data
|
| 29 |
+
self._text = self._extract_text()
|
| 30 |
+
self._finish_reason = self._extract_finish_reason()
|
| 31 |
+
self._prompt_token_count = self._extract_prompt_token_count()
|
| 32 |
+
self._candidates_token_count = self._extract_candidates_token_count()
|
| 33 |
+
self._total_token_count = self._extract_total_token_count()
|
| 34 |
+
self._thoughts = self._extract_thoughts()
|
| 35 |
+
self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
|
| 36 |
+
|
| 37 |
+
def _extract_thoughts(self) -> Optional[str]:
|
| 38 |
+
try:
|
| 39 |
+
for part in self._data['candidates'][0]['content']['parts']:
|
| 40 |
+
if 'thought' in part:
|
| 41 |
+
return part['text']
|
| 42 |
+
return ""
|
| 43 |
+
except (KeyError, IndexError):
|
| 44 |
+
return ""
|
| 45 |
+
|
| 46 |
+
def _extract_text(self) -> str:
|
| 47 |
+
try:
|
| 48 |
+
for part in self._data['candidates'][0]['content']['parts']:
|
| 49 |
+
if 'thought' not in part:
|
| 50 |
+
return part['text']
|
| 51 |
+
return ""
|
| 52 |
+
except (KeyError, IndexError):
|
| 53 |
+
return ""
|
| 54 |
+
|
| 55 |
+
def _extract_finish_reason(self) -> Optional[str]:
|
| 56 |
+
try:
|
| 57 |
+
return self._data['candidates'][0].get('finishReason')
|
| 58 |
+
except (KeyError, IndexError):
|
| 59 |
+
return None
|
| 60 |
+
|
| 61 |
+
def _extract_prompt_token_count(self) -> Optional[int]:
|
| 62 |
+
try:
|
| 63 |
+
return self._data['usageMetadata'].get('promptTokenCount')
|
| 64 |
+
except (KeyError):
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
def _extract_candidates_token_count(self) -> Optional[int]:
|
| 68 |
+
try:
|
| 69 |
+
return self._data['usageMetadata'].get('candidatesTokenCount')
|
| 70 |
+
except (KeyError):
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
def _extract_total_token_count(self) -> Optional[int]:
|
| 74 |
+
try:
|
| 75 |
+
return self._data['usageMetadata'].get('totalTokenCount')
|
| 76 |
+
except (KeyError):
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
@property
|
| 80 |
+
def text(self) -> str:
|
| 81 |
+
return self._text
|
| 82 |
+
|
| 83 |
+
@property
|
| 84 |
+
def finish_reason(self) -> Optional[str]:
|
| 85 |
+
return self._finish_reason
|
| 86 |
+
|
| 87 |
+
@property
|
| 88 |
+
def prompt_token_count(self) -> Optional[int]:
|
| 89 |
+
return self._prompt_token_count
|
| 90 |
+
|
| 91 |
+
@property
|
| 92 |
+
def candidates_token_count(self) -> Optional[int]:
|
| 93 |
+
return self._candidates_token_count
|
| 94 |
+
|
| 95 |
+
@property
|
| 96 |
+
def total_token_count(self) -> Optional[int]:
|
| 97 |
+
return self._total_token_count
|
| 98 |
+
|
| 99 |
+
@property
|
| 100 |
+
def thoughts(self) -> Optional[str]:
|
| 101 |
+
return self._thoughts
|
| 102 |
+
|
| 103 |
+
@property
|
| 104 |
+
def json_dumps(self) -> str:
|
| 105 |
+
return self._json_dumps
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class GeminiClient:
|
| 109 |
+
|
| 110 |
+
AVAILABLE_MODELS = []
|
| 111 |
+
EXTRA_MODELS = os.environ.get("EXTRA_MODELS", "").split(",")
|
| 112 |
+
|
| 113 |
+
def __init__(self, api_key: str):
|
| 114 |
+
self.api_key = api_key
|
| 115 |
+
|
| 116 |
+
async def stream_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
|
| 117 |
+
extra_log = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'N/A'}
|
| 118 |
+
log_msg = format_log_message('INFO', "流式请求开始", extra=extra_log)
|
| 119 |
+
logger.info(log_msg)
|
| 120 |
+
|
| 121 |
+
# 检查是否启用假流式请求
|
| 122 |
+
if FAKE_STREAMING:
|
| 123 |
+
log_msg = format_log_message('INFO', "使用假流式请求模式(发送换行符保持连接)", extra=extra_log)
|
| 124 |
+
logger.info(log_msg)
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
# 这个方法不再直接使用self.api_key,而是由main.py提供API密钥列表和管理
|
| 128 |
+
# 在这里,我们只负责持续发送换行符,直到main.py那边获取到响应
|
| 129 |
+
|
| 130 |
+
# 持续发送换行符,直到外部取消此生成器
|
| 131 |
+
start_time = time.time()
|
| 132 |
+
while True:
|
| 133 |
+
# 发送换行符作为保活消息
|
| 134 |
+
yield "\n"
|
| 135 |
+
# 等待一段时间
|
| 136 |
+
await asyncio.sleep(FAKE_STREAMING_INTERVAL)
|
| 137 |
+
|
| 138 |
+
# 如果等待时间过长(超过300秒),防止无限等待
|
| 139 |
+
if time.time() - start_time > 300:
|
| 140 |
+
log_msg = format_log_message('WARNING', "假流式请求等待时间过长,强制结束", extra=extra_log)
|
| 141 |
+
logger.warning(log_msg)
|
| 142 |
+
# 抛出超时异常,让外部处理
|
| 143 |
+
error_msg = "假流式请求等待时间过长,所有API密钥均已尝试"
|
| 144 |
+
extra_log_timeout = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'TIMEOUT', 'error_message': error_msg}
|
| 145 |
+
log_msg = format_log_message('ERROR', error_msg, extra=extra_log_timeout)
|
| 146 |
+
logger.error(log_msg)
|
| 147 |
+
raise TimeoutError(error_msg)
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
if not isinstance(e, asyncio.CancelledError): # 忽略取消异常的日志记录
|
| 151 |
+
error_msg = f"假流式处理期间发生错误: {str(e)}"
|
| 152 |
+
extra_log_error = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
|
| 153 |
+
log_msg = format_log_message('ERROR', error_msg, extra=extra_log_error)
|
| 154 |
+
logger.error(log_msg)
|
| 155 |
+
raise e
|
| 156 |
+
finally:
|
| 157 |
+
log_msg = format_log_message('INFO', "假流式请求结束", extra=extra_log)
|
| 158 |
+
logger.info(log_msg)
|
| 159 |
+
else:
|
| 160 |
+
# 原始流式请求处理逻辑
|
| 161 |
+
api_version = "v1alpha" if "think" in request.model else "v1beta"
|
| 162 |
+
url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:streamGenerateContent?key={self.api_key}&alt=sse"
|
| 163 |
+
headers = {
|
| 164 |
+
"Content-Type": "application/json",
|
| 165 |
+
}
|
| 166 |
+
data = {
|
| 167 |
+
"contents": contents,
|
| 168 |
+
"generationConfig": {
|
| 169 |
+
"temperature": request.temperature,
|
| 170 |
+
"maxOutputTokens": request.max_tokens,
|
| 171 |
+
},
|
| 172 |
+
"safetySettings": safety_settings,
|
| 173 |
+
}
|
| 174 |
+
if system_instruction:
|
| 175 |
+
data["system_instruction"] = system_instruction
|
| 176 |
+
|
| 177 |
+
async with httpx.AsyncClient() as client:
|
| 178 |
+
async with client.stream("POST", url, headers=headers, json=data, timeout=600) as response:
|
| 179 |
+
buffer = b""
|
| 180 |
+
try:
|
| 181 |
+
async for line in response.aiter_lines():
|
| 182 |
+
if not line.strip():
|
| 183 |
+
continue
|
| 184 |
+
if line.startswith("data: "):
|
| 185 |
+
line = line[len("data: "):]
|
| 186 |
+
buffer += line.encode('utf-8')
|
| 187 |
+
try:
|
| 188 |
+
data = json.loads(buffer.decode('utf-8'))
|
| 189 |
+
buffer = b""
|
| 190 |
+
if 'candidates' in data and data['candidates']:
|
| 191 |
+
candidate = data['candidates'][0]
|
| 192 |
+
if 'content' in candidate:
|
| 193 |
+
content = candidate['content']
|
| 194 |
+
if 'parts' in content and content['parts']:
|
| 195 |
+
parts = content['parts']
|
| 196 |
+
text = ""
|
| 197 |
+
for part in parts:
|
| 198 |
+
if 'text' in part:
|
| 199 |
+
text += part['text']
|
| 200 |
+
if text:
|
| 201 |
+
yield text
|
| 202 |
+
|
| 203 |
+
if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
|
| 204 |
+
error_msg = f"模型的响应被截断: {candidate.get('finishReason')}"
|
| 205 |
+
extra_log_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
|
| 206 |
+
log_msg = format_log_message('WARNING', error_msg, extra=extra_log_error)
|
| 207 |
+
logger.warning(log_msg)
|
| 208 |
+
raise ValueError(error_msg)
|
| 209 |
+
|
| 210 |
+
if 'safetyRatings' in candidate:
|
| 211 |
+
for rating in candidate['safetyRatings']:
|
| 212 |
+
if rating['probability'] == 'HIGH':
|
| 213 |
+
error_msg = f"模型的响应被截断: {rating['category']}"
|
| 214 |
+
extra_log_safety = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
|
| 215 |
+
log_msg = format_log_message('WARNING', error_msg, extra=extra_log_safety)
|
| 216 |
+
logger.warning(log_msg)
|
| 217 |
+
raise ValueError(error_msg)
|
| 218 |
+
except json.JSONDecodeError:
|
| 219 |
+
continue
|
| 220 |
+
except Exception as e:
|
| 221 |
+
error_msg = f"流式处理期间发生错误: {str(e)}"
|
| 222 |
+
extra_log_stream_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
|
| 223 |
+
log_msg = format_log_message('ERROR', error_msg, extra=extra_log_stream_error)
|
| 224 |
+
logger.error(log_msg)
|
| 225 |
+
raise e
|
| 226 |
+
except Exception as e:
|
| 227 |
+
raise e
|
| 228 |
+
finally:
|
| 229 |
+
log_msg = format_log_message('INFO', "流式请求结束", extra=extra_log)
|
| 230 |
+
logger.info(log_msg)
|
| 231 |
+
|
| 232 |
+
def complete_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
|
| 233 |
+
extra_log = {'key': self.api_key[:8], 'request_type': 'non-stream', 'model': request.model, 'status_code': 'N/A'}
|
| 234 |
+
log_msg = format_log_message('INFO', "非流式请求开始", extra=extra_log)
|
| 235 |
+
logger.info(log_msg)
|
| 236 |
+
|
| 237 |
+
api_version = "v1alpha" if "think" in request.model else "v1beta"
|
| 238 |
+
url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:generateContent?key={self.api_key}"
|
| 239 |
+
headers = {
|
| 240 |
+
"Content-Type": "application/json",
|
| 241 |
+
}
|
| 242 |
+
data = {
|
| 243 |
+
"contents": contents,
|
| 244 |
+
"generationConfig": {
|
| 245 |
+
"temperature": request.temperature,
|
| 246 |
+
"maxOutputTokens": request.max_tokens,
|
| 247 |
+
},
|
| 248 |
+
"safetySettings": safety_settings,
|
| 249 |
+
}
|
| 250 |
+
if system_instruction:
|
| 251 |
+
data["system_instruction"] = system_instruction
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
response = requests.post(url, headers=headers, json=data)
|
| 255 |
+
response.raise_for_status()
|
| 256 |
+
|
| 257 |
+
log_msg = format_log_message('INFO', "非流式请求成功完成", extra=extra_log)
|
| 258 |
+
logger.info(log_msg)
|
| 259 |
+
|
| 260 |
+
return ResponseWrapper(response.json())
|
| 261 |
+
except Exception as e:
|
| 262 |
+
raise
|
| 263 |
+
|
| 264 |
+
def convert_messages(self, messages, use_system_prompt=False):
|
| 265 |
+
gemini_history = []
|
| 266 |
+
errors = []
|
| 267 |
+
system_instruction_text = ""
|
| 268 |
+
is_system_phase = use_system_prompt
|
| 269 |
+
for i, message in enumerate(messages):
|
| 270 |
+
role = message.role
|
| 271 |
+
content = message.content
|
| 272 |
+
|
| 273 |
+
if isinstance(content, str):
|
| 274 |
+
if is_system_phase and role == 'system':
|
| 275 |
+
if system_instruction_text:
|
| 276 |
+
system_instruction_text += "\n" + content
|
| 277 |
+
else:
|
| 278 |
+
system_instruction_text = content
|
| 279 |
+
else:
|
| 280 |
+
is_system_phase = False
|
| 281 |
+
|
| 282 |
+
if role in ['user', 'system']:
|
| 283 |
+
role_to_use = 'user'
|
| 284 |
+
elif role == 'assistant':
|
| 285 |
+
role_to_use = 'model'
|
| 286 |
+
else:
|
| 287 |
+
errors.append(f"Invalid role: {role}")
|
| 288 |
+
continue
|
| 289 |
+
|
| 290 |
+
if gemini_history and gemini_history[-1]['role'] == role_to_use:
|
| 291 |
+
gemini_history[-1]['parts'].append({"text": content})
|
| 292 |
+
else:
|
| 293 |
+
gemini_history.append(
|
| 294 |
+
{"role": role_to_use, "parts": [{"text": content}]})
|
| 295 |
+
elif isinstance(content, list):
|
| 296 |
+
parts = []
|
| 297 |
+
for item in content:
|
| 298 |
+
if item.get('type') == 'text':
|
| 299 |
+
parts.append({"text": item.get('text')})
|
| 300 |
+
elif item.get('type') == 'image_url':
|
| 301 |
+
image_data = item.get('image_url', {}).get('url', '')
|
| 302 |
+
if image_data.startswith('data:image/'):
|
| 303 |
+
try:
|
| 304 |
+
mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
|
| 305 |
+
parts.append({
|
| 306 |
+
"inline_data": {
|
| 307 |
+
"mime_type": mime_type,
|
| 308 |
+
"data": base64_data
|
| 309 |
+
}
|
| 310 |
+
})
|
| 311 |
+
except (IndexError, ValueError):
|
| 312 |
+
errors.append(
|
| 313 |
+
f"Invalid data URI for image: {image_data}")
|
| 314 |
+
else:
|
| 315 |
+
errors.append(
|
| 316 |
+
f"Invalid image URL format for item: {item}")
|
| 317 |
+
|
| 318 |
+
if parts:
|
| 319 |
+
if role in ['user', 'system']:
|
| 320 |
+
role_to_use = 'user'
|
| 321 |
+
elif role == 'assistant':
|
| 322 |
+
role_to_use = 'model'
|
| 323 |
+
else:
|
| 324 |
+
errors.append(f"Invalid role: {role}")
|
| 325 |
+
continue
|
| 326 |
+
if gemini_history and gemini_history[-1]['role'] == role_to_use:
|
| 327 |
+
gemini_history[-1]['parts'].extend(parts)
|
| 328 |
+
else:
|
| 329 |
+
gemini_history.append(
|
| 330 |
+
{"role": role_to_use, "parts": parts})
|
| 331 |
+
if errors:
|
| 332 |
+
return errors
|
| 333 |
+
else:
|
| 334 |
+
return gemini_history, {"parts": [{"text": system_instruction_text}]}
|
| 335 |
+
|
| 336 |
+
@staticmethod
|
| 337 |
+
async def list_available_models(api_key) -> list:
|
| 338 |
+
url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(
|
| 339 |
+
api_key)
|
| 340 |
+
async with httpx.AsyncClient() as client:
|
| 341 |
+
response = await client.get(url)
|
| 342 |
+
response.raise_for_status()
|
| 343 |
+
data = response.json()
|
| 344 |
+
models = [model["name"] for model in data.get("models", [])]
|
| 345 |
+
models.extend(GeminiClient.EXTRA_MODELS)
|
| 346 |
+
return models
|
app/index.html
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>Gemini API 代理服务</title>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
| 10 |
+
max-width: 1200px;
|
| 11 |
+
margin: 0 auto;
|
| 12 |
+
padding: 20px;
|
| 13 |
+
line-height: 1.6;
|
| 14 |
+
background-color: #f8f9fa;
|
| 15 |
+
}
|
| 16 |
+
h1, h2, h3 {
|
| 17 |
+
color: #333;
|
| 18 |
+
text-align: center;
|
| 19 |
+
margin-bottom: 20px;
|
| 20 |
+
}
|
| 21 |
+
.info-box {
|
| 22 |
+
background-color: #fff;
|
| 23 |
+
border: 1px solid #dee2e6;
|
| 24 |
+
border-radius: 8px;
|
| 25 |
+
padding: 20px;
|
| 26 |
+
margin-bottom: 20px;
|
| 27 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 28 |
+
}
|
| 29 |
+
.status {
|
| 30 |
+
color: #28a745;
|
| 31 |
+
font-weight: bold;
|
| 32 |
+
font-size: 18px;
|
| 33 |
+
margin-bottom: 20px;
|
| 34 |
+
text-align: center;
|
| 35 |
+
}
|
| 36 |
+
.stats-grid {
|
| 37 |
+
display: grid;
|
| 38 |
+
grid-template-columns: repeat(3, 1fr);
|
| 39 |
+
gap: 15px;
|
| 40 |
+
margin-top: 15px;
|
| 41 |
+
margin-bottom: 20px;
|
| 42 |
+
}
|
| 43 |
+
.stat-card {
|
| 44 |
+
background-color: #e9ecef;
|
| 45 |
+
padding: 15px;
|
| 46 |
+
border-radius: 8px;
|
| 47 |
+
text-align: center;
|
| 48 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 49 |
+
transition: transform 0.2s;
|
| 50 |
+
}
|
| 51 |
+
.stat-card:hover {
|
| 52 |
+
transform: translateY(-2px);
|
| 53 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 54 |
+
}
|
| 55 |
+
.stat-value {
|
| 56 |
+
font-size: 24px;
|
| 57 |
+
font-weight: bold;
|
| 58 |
+
color: #007bff;
|
| 59 |
+
}
|
| 60 |
+
.stat-label {
|
| 61 |
+
font-size: 14px;
|
| 62 |
+
color: #6c757d;
|
| 63 |
+
margin-top: 5px;
|
| 64 |
+
}
|
| 65 |
+
.section-title {
|
| 66 |
+
color: #495057;
|
| 67 |
+
border-bottom: 1px solid #dee2e6;
|
| 68 |
+
padding-bottom: 10px;
|
| 69 |
+
margin-bottom: 20px;
|
| 70 |
+
}
|
| 71 |
+
.log-container {
|
| 72 |
+
background-color: #f5f5f5;
|
| 73 |
+
border: 1px solid #ddd;
|
| 74 |
+
border-radius: 8px;
|
| 75 |
+
padding: 15px;
|
| 76 |
+
margin-top: 20px;
|
| 77 |
+
max-height: 500px;
|
| 78 |
+
overflow-y: auto;
|
| 79 |
+
font-family: monospace;
|
| 80 |
+
font-size: 14px;
|
| 81 |
+
line-height: 1.5;
|
| 82 |
+
}
|
| 83 |
+
.log-entry {
|
| 84 |
+
margin-bottom: 8px;
|
| 85 |
+
padding: 8px;
|
| 86 |
+
border-radius: 4px;
|
| 87 |
+
}
|
| 88 |
+
.log-entry.INFO {
|
| 89 |
+
background-color: #e8f4f8;
|
| 90 |
+
border-left: 4px solid #17a2b8;
|
| 91 |
+
}
|
| 92 |
+
.log-entry.WARNING {
|
| 93 |
+
background-color: #fff3cd;
|
| 94 |
+
border-left: 4px solid #ffc107;
|
| 95 |
+
}
|
| 96 |
+
.log-entry.ERROR {
|
| 97 |
+
background-color: #f8d7da;
|
| 98 |
+
border-left: 4px solid #dc3545;
|
| 99 |
+
}
|
| 100 |
+
.log-entry.DEBUG {
|
| 101 |
+
background-color: #d1ecf1;
|
| 102 |
+
border-left: 4px solid #17a2b8;
|
| 103 |
+
}
|
| 104 |
+
.log-timestamp {
|
| 105 |
+
color: #6c757d;
|
| 106 |
+
font-size: 12px;
|
| 107 |
+
margin-right: 10px;
|
| 108 |
+
}
|
| 109 |
+
.log-level {
|
| 110 |
+
font-weight: bold;
|
| 111 |
+
margin-right: 10px;
|
| 112 |
+
}
|
| 113 |
+
.log-level.INFO {
|
| 114 |
+
color: #17a2b8;
|
| 115 |
+
}
|
| 116 |
+
.log-level.WARNING {
|
| 117 |
+
color: #ffc107;
|
| 118 |
+
}
|
| 119 |
+
.log-level.ERROR {
|
| 120 |
+
color: #dc3545;
|
| 121 |
+
}
|
| 122 |
+
.log-level.DEBUG {
|
| 123 |
+
color: #17a2b8;
|
| 124 |
+
}
|
| 125 |
+
.log-message {
|
| 126 |
+
color: #212529;
|
| 127 |
+
}
|
| 128 |
+
.refresh-button {
|
| 129 |
+
display: block;
|
| 130 |
+
margin: 20px auto;
|
| 131 |
+
padding: 10px 20px;
|
| 132 |
+
background-color: #007bff;
|
| 133 |
+
color: white;
|
| 134 |
+
border: none;
|
| 135 |
+
border-radius: 4px;
|
| 136 |
+
font-size: 16px;
|
| 137 |
+
cursor: pointer;
|
| 138 |
+
transition: background-color 0.2s;
|
| 139 |
+
}
|
| 140 |
+
.refresh-button:hover {
|
| 141 |
+
background-color: #0069d9;
|
| 142 |
+
}
|
| 143 |
+
.log-filter {
|
| 144 |
+
display: flex;
|
| 145 |
+
justify-content: center;
|
| 146 |
+
margin-bottom: 15px;
|
| 147 |
+
gap: 10px;
|
| 148 |
+
}
|
| 149 |
+
.log-filter button {
|
| 150 |
+
padding: 5px 10px;
|
| 151 |
+
border: 1px solid #ddd;
|
| 152 |
+
border-radius: 4px;
|
| 153 |
+
background-color: #f8f9fa;
|
| 154 |
+
cursor: pointer;
|
| 155 |
+
}
|
| 156 |
+
.log-filter button.active {
|
| 157 |
+
background-color: #007bff;
|
| 158 |
+
color: white;
|
| 159 |
+
border-color: #007bff;
|
| 160 |
+
}
|
| 161 |
+
</style>
|
| 162 |
+
</head>
|
| 163 |
+
<body>
|
| 164 |
+
<h1>🤖 Gemini API 代理服务</h1>
|
| 165 |
+
|
| 166 |
+
<div class="info-box">
|
| 167 |
+
<h2 class="section-title">🟢 运行状态</h2>
|
| 168 |
+
<p class="status">服务运行中</p>
|
| 169 |
+
|
| 170 |
+
<div class="stats-grid">
|
| 171 |
+
<div class="stat-card">
|
| 172 |
+
<div class="stat-value">{{ key_count }}</div>
|
| 173 |
+
<div class="stat-label">可用API密钥数量</div>
|
| 174 |
+
</div>
|
| 175 |
+
<div class="stat-card">
|
| 176 |
+
<div class="stat-value">{{ model_count }}</div>
|
| 177 |
+
<div class="stat-label">可用模型数量</div>
|
| 178 |
+
</div>
|
| 179 |
+
<div class="stat-card">
|
| 180 |
+
<div class="stat-value">{{ retry_count }}</div>
|
| 181 |
+
<div class="stat-label">最大重试次数</div>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
|
| 185 |
+
<h3 class="section-title">API调用统计</h3>
|
| 186 |
+
<div class="stats-grid">
|
| 187 |
+
<div class="stat-card">
|
| 188 |
+
<div class="stat-value">{{ last_24h_calls }}</div>
|
| 189 |
+
<div class="stat-label">24小时内调用次数</div>
|
| 190 |
+
</div>
|
| 191 |
+
<div class="stat-card">
|
| 192 |
+
<div class="stat-value">{{ hourly_calls }}</div>
|
| 193 |
+
<div class="stat-label">一小时内调用次数</div>
|
| 194 |
+
</div>
|
| 195 |
+
<div class="stat-card">
|
| 196 |
+
<div class="stat-value">{{ minute_calls }}</div>
|
| 197 |
+
<div class="stat-label">一分钟内调用次数</div>
|
| 198 |
+
</div>
|
| 199 |
+
</div>
|
| 200 |
+
</div>
|
| 201 |
+
|
| 202 |
+
<div class="info-box">
|
| 203 |
+
<h2 class="section-title">⚙️ 环境配置</h2>
|
| 204 |
+
<div class="stats-grid">
|
| 205 |
+
<div class="stat-card">
|
| 206 |
+
<div class="stat-value">{{ max_requests_per_minute }}</div>
|
| 207 |
+
<div class="stat-label">每分钟请求限制</div>
|
| 208 |
+
</div>
|
| 209 |
+
<div class="stat-card">
|
| 210 |
+
<div class="stat-value">{{ max_requests_per_day_per_ip }}</div>
|
| 211 |
+
<div class="stat-label">每IP每日请求限制</div>
|
| 212 |
+
</div>
|
| 213 |
+
<div class="stat-card">
|
| 214 |
+
<div class="stat-value">{{ current_time }}</div>
|
| 215 |
+
<div class="stat-label">当前服务器时间</div>
|
| 216 |
+
</div>
|
| 217 |
+
</div>
|
| 218 |
+
</div>
|
| 219 |
+
|
| 220 |
+
<div class="info-box">
|
| 221 |
+
<h2 class="section-title">📦 版本信息</h2>
|
| 222 |
+
<div class="version-info" style="text-align: center; margin-bottom: 15px;">
|
| 223 |
+
<div style="font-size: 18px; margin-bottom: 10px;">
|
| 224 |
+
当前版本: <span style="font-weight: bold; color: #007bff;">{{ local_version }}</span>
|
| 225 |
+
</div>
|
| 226 |
+
{% if has_update %}
|
| 227 |
+
<div style="display: flex; align-items: center; justify-content: center; margin-top: 15px;">
|
| 228 |
+
<div style="background-color: #fef6e0; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px 15px; display: inline-flex; align-items: center;">
|
| 229 |
+
<span style="color: #ff9800; margin-right: 10px;">
|
| 230 |
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 231 |
+
<circle cx="12" cy="12" r="10"></circle>
|
| 232 |
+
<line x1="12" y1="8" x2="12" y2="12"></line>
|
| 233 |
+
<line x1="12" y1="16" x2="12.01" y2="16"></line>
|
| 234 |
+
</svg>
|
| 235 |
+
</span>
|
| 236 |
+
<span>
|
| 237 |
+
<strong>发现新版本!</strong> 最新版本: <span style="font-weight: bold; color: #28a745;">{{ remote_version }}</span>
|
| 238 |
+
</span>
|
| 239 |
+
</div>
|
| 240 |
+
</div>
|
| 241 |
+
{% endif %}
|
| 242 |
+
</div>
|
| 243 |
+
</div>
|
| 244 |
+
|
| 245 |
+
<div class="info-box">
|
| 246 |
+
<h2 class="section-title"> 系统日志</h2>
|
| 247 |
+
<div class="log-filter">
|
| 248 |
+
<button class="active" data-level="ALL">全部</button>
|
| 249 |
+
<button data-level="INFO">信息</button>
|
| 250 |
+
<button data-level="WARNING">警告</button>
|
| 251 |
+
<button data-level="ERROR">错误</button>
|
| 252 |
+
</div>
|
| 253 |
+
<div class="log-container">
|
| 254 |
+
{% for log in logs %}
|
| 255 |
+
<div class="log-entry {{ log.level }}" data-level="{{ log.level }}">
|
| 256 |
+
<span class="log-timestamp">{{ log.timestamp }}</span>
|
| 257 |
+
<span class="log-level {{ log.level }}">{{ log.level }}</span>
|
| 258 |
+
<span class="log-message">
|
| 259 |
+
{% if log.key != 'N/A' %}[{{ log.key }}]{% endif %}
|
| 260 |
+
{% if log.request_type != 'N/A' %}{{ log.request_type }}{% endif %}
|
| 261 |
+
{% if log.model != 'N/A' %}[{{ log.model }}]{% endif %}
|
| 262 |
+
{% if log.status_code != 'N/A' %}{{ log.status_code }}{% endif %}
|
| 263 |
+
: {{ log.message }}
|
| 264 |
+
{% if log.error_message %}
|
| 265 |
+
- {{ log.error_message }}
|
| 266 |
+
{% endif %}
|
| 267 |
+
</span>
|
| 268 |
+
</div>
|
| 269 |
+
{% endfor %}
|
| 270 |
+
</div>
|
| 271 |
+
<button class="refresh-button" onclick="window.location.reload()">刷新日志</button>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
<script>
|
| 275 |
+
// 日志过滤功能
|
| 276 |
+
document.querySelectorAll('.log-filter button').forEach(button => {
|
| 277 |
+
button.addEventListener('click', function() {
|
| 278 |
+
// 移除所有按钮的active类
|
| 279 |
+
document.querySelectorAll('.log-filter button').forEach(btn => {
|
| 280 |
+
btn.classList.remove('active');
|
| 281 |
+
});
|
| 282 |
+
|
| 283 |
+
// 为当前按钮添加active类
|
| 284 |
+
this.classList.add('active');
|
| 285 |
+
|
| 286 |
+
const level = this.getAttribute('data-level');
|
| 287 |
+
|
| 288 |
+
// 显示或隐藏日志条目
|
| 289 |
+
document.querySelectorAll('.log-entry').forEach(entry => {
|
| 290 |
+
if (level === 'ALL' || entry.getAttribute('data-level') === level) {
|
| 291 |
+
entry.style.display = 'block';
|
| 292 |
+
} else {
|
| 293 |
+
entry.style.display = 'none';
|
| 294 |
+
}
|
| 295 |
+
});
|
| 296 |
+
});
|
| 297 |
+
});
|
| 298 |
+
|
| 299 |
+
// 页面加载时自动滚动到日志底部
|
| 300 |
+
window.onload = function() {
|
| 301 |
+
const logContainer = document.querySelector('.log-container');
|
| 302 |
+
logContainer.scrollTop = logContainer.scrollHeight;
|
| 303 |
+
};
|
| 304 |
+
</script>
|
| 305 |
+
</body>
|
| 306 |
+
</html>
|
app/main.py
ADDED
|
@@ -0,0 +1,1506 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Request, Depends, status
|
| 2 |
+
from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.templating import Jinja2Templates
|
| 5 |
+
from .models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
|
| 6 |
+
from .gemini import GeminiClient, ResponseWrapper
|
| 7 |
+
from .utils import handle_gemini_error, protect_from_abuse, APIKeyManager, test_api_key, format_log_message, log_manager
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
import asyncio
|
| 11 |
+
from typing import Literal, Dict, Any, Optional
|
| 12 |
+
import random
|
| 13 |
+
import requests
|
| 14 |
+
from datetime import datetime, timedelta
|
| 15 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
| 16 |
+
import sys
|
| 17 |
+
import logging
|
| 18 |
+
from collections import defaultdict
|
| 19 |
+
import pathlib
|
| 20 |
+
import hashlib
|
| 21 |
+
import time
|
| 22 |
+
FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
|
| 23 |
+
# 假流式请求的空内容返回间隔(秒)
|
| 24 |
+
FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
|
| 25 |
+
logging.getLogger("uvicorn").disabled = True
|
| 26 |
+
logging.getLogger("uvicorn.access").disabled = True
|
| 27 |
+
|
| 28 |
+
# 配置 logger
|
| 29 |
+
logger = logging.getLogger("my_logger")
|
| 30 |
+
logger.setLevel(logging.DEBUG)
|
| 31 |
+
|
| 32 |
+
# 设置模板目录
|
| 33 |
+
BASE_DIR = pathlib.Path(__file__).parent
|
| 34 |
+
templates = Jinja2Templates(directory=str(BASE_DIR))
|
| 35 |
+
|
| 36 |
+
app = FastAPI()
|
| 37 |
+
|
| 38 |
+
# --------------- 缓存管理类 ---------------
|
| 39 |
+
|
| 40 |
+
class ResponseCacheManager:
|
| 41 |
+
"""管理API响应缓存的类"""
|
| 42 |
+
|
| 43 |
+
def __init__(self, expiry_time: int, max_entries: int, remove_after_use: bool = True,
|
| 44 |
+
cache_dict: Dict[str, Dict[str, Any]] = None):
|
| 45 |
+
self.cache = cache_dict if cache_dict is not None else {} # 使用传入的缓存字典或创建新字典
|
| 46 |
+
self.expiry_time = expiry_time
|
| 47 |
+
self.max_entries = max_entries
|
| 48 |
+
self.remove_after_use = remove_after_use
|
| 49 |
+
|
| 50 |
+
def get(self, cache_key: str):
|
| 51 |
+
"""获取缓存项,如果存在且未过期"""
|
| 52 |
+
now = time.time()
|
| 53 |
+
if cache_key in self.cache and now < self.cache[cache_key].get('expiry_time', 0):
|
| 54 |
+
cached_item = self.cache[cache_key]
|
| 55 |
+
|
| 56 |
+
# 获取响应但先不删除
|
| 57 |
+
response = cached_item['response']
|
| 58 |
+
|
| 59 |
+
# 返回响应
|
| 60 |
+
return response, True
|
| 61 |
+
|
| 62 |
+
return None, False
|
| 63 |
+
|
| 64 |
+
def store(self, cache_key: str, response, client_ip: str = None):
|
| 65 |
+
"""存储响应到缓存"""
|
| 66 |
+
now = time.time()
|
| 67 |
+
self.cache[cache_key] = {
|
| 68 |
+
'response': response,
|
| 69 |
+
'expiry_time': now + self.expiry_time,
|
| 70 |
+
'created_at': now,
|
| 71 |
+
'client_ip': client_ip
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
log('info', f"响应已缓存: {cache_key[:8]}...",
|
| 75 |
+
extra={'cache_operation': 'store', 'request_type': 'non-stream'})
|
| 76 |
+
|
| 77 |
+
# 如果缓存超过限制,清理最旧的
|
| 78 |
+
self.clean_if_needed()
|
| 79 |
+
|
| 80 |
+
def clean_expired(self):
|
| 81 |
+
"""清理所有过期的缓存项"""
|
| 82 |
+
now = time.time()
|
| 83 |
+
expired_keys = [k for k, v in self.cache.items() if now > v.get('expiry_time', 0)]
|
| 84 |
+
|
| 85 |
+
for key in expired_keys:
|
| 86 |
+
del self.cache[key]
|
| 87 |
+
log('info', f"清理过期缓存: {key[:8]}...", extra={'cache_operation': 'clean'})
|
| 88 |
+
|
| 89 |
+
def clean_if_needed(self):
|
| 90 |
+
"""如果缓存数量超过限制,清理最旧的项目"""
|
| 91 |
+
if len(self.cache) <= self.max_entries:
|
| 92 |
+
return
|
| 93 |
+
|
| 94 |
+
# 按创建时间排序
|
| 95 |
+
sorted_keys = sorted(self.cache.keys(),
|
| 96 |
+
key=lambda k: self.cache[k].get('created_at', 0))
|
| 97 |
+
|
| 98 |
+
# 计算需要删除的数量
|
| 99 |
+
to_remove = len(self.cache) - self.max_entries
|
| 100 |
+
|
| 101 |
+
# 删除最旧的项
|
| 102 |
+
for key in sorted_keys[:to_remove]:
|
| 103 |
+
del self.cache[key]
|
| 104 |
+
log('info', f"缓存容量限制,删除旧缓存: {key[:8]}...", extra={'cache_operation': 'limit'})
|
| 105 |
+
|
| 106 |
+
# --------------- 活跃请求管理类 ---------------
|
| 107 |
+
|
| 108 |
+
class ActiveRequestsManager:
|
| 109 |
+
"""管理活跃API请求的类"""
|
| 110 |
+
|
| 111 |
+
def __init__(self, requests_pool: Dict[str, asyncio.Task] = None):
|
| 112 |
+
self.active_requests = requests_pool if requests_pool is not None else {} # 存储活跃请求
|
| 113 |
+
|
| 114 |
+
def add(self, key: str, task: asyncio.Task):
|
| 115 |
+
"""添加新的活跃请求任务"""
|
| 116 |
+
task.creation_time = time.time() # 添加创建时间属性
|
| 117 |
+
self.active_requests[key] = task
|
| 118 |
+
|
| 119 |
+
def get(self, key: str):
|
| 120 |
+
"""获取活跃请求任务"""
|
| 121 |
+
return self.active_requests.get(key)
|
| 122 |
+
|
| 123 |
+
def remove(self, key: str):
|
| 124 |
+
"""移除活跃请求任务"""
|
| 125 |
+
if key in self.active_requests:
|
| 126 |
+
del self.active_requests[key]
|
| 127 |
+
return True
|
| 128 |
+
return False
|
| 129 |
+
|
| 130 |
+
def remove_by_prefix(self, prefix: str):
|
| 131 |
+
"""移除所有以特定前缀开头的活跃请求任务"""
|
| 132 |
+
keys_to_remove = [k for k in self.active_requests.keys() if k.startswith(prefix)]
|
| 133 |
+
for key in keys_to_remove:
|
| 134 |
+
self.remove(key)
|
| 135 |
+
return len(keys_to_remove)
|
| 136 |
+
|
| 137 |
+
def clean_completed(self):
|
| 138 |
+
"""清理所有已完成或已取消的任务"""
|
| 139 |
+
keys_to_remove = []
|
| 140 |
+
|
| 141 |
+
for key, task in self.active_requests.items():
|
| 142 |
+
if task.done() or task.cancelled():
|
| 143 |
+
keys_to_remove.append(key)
|
| 144 |
+
|
| 145 |
+
for key in keys_to_remove:
|
| 146 |
+
self.remove(key)
|
| 147 |
+
|
| 148 |
+
# if keys_to_remove:
|
| 149 |
+
# log('info', f"清理已完成请求任务: {len(keys_to_remove)}个", cleanup='active_requests')
|
| 150 |
+
|
| 151 |
+
def clean_long_running(self, max_age_seconds: int = 300):
|
| 152 |
+
"""清理长时间运行的任务"""
|
| 153 |
+
now = time.time()
|
| 154 |
+
long_running_keys = []
|
| 155 |
+
|
| 156 |
+
for key, task in list(self.active_requests.items()):
|
| 157 |
+
if (hasattr(task, 'creation_time') and
|
| 158 |
+
task.creation_time < now - max_age_seconds and
|
| 159 |
+
not task.done() and not task.cancelled()):
|
| 160 |
+
|
| 161 |
+
long_running_keys.append(key)
|
| 162 |
+
task.cancel() # 取消长时间运行的任务
|
| 163 |
+
|
| 164 |
+
if long_running_keys:
|
| 165 |
+
log('warning', f"取消长时间运行的任务: {len(long_running_keys)}个", cleanup='long_running_tasks')
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
# --------------- 全局实例 ---------------
|
| 170 |
+
|
| 171 |
+
PASSWORD = os.environ.get("PASSWORD", "123").strip('"')
|
| 172 |
+
MAX_REQUESTS_PER_MINUTE = int(os.environ.get("MAX_REQUESTS_PER_MINUTE", "30"))
|
| 173 |
+
MAX_REQUESTS_PER_DAY_PER_IP = int(
|
| 174 |
+
os.environ.get("MAX_REQUESTS_PER_DAY_PER_IP", "600"))
|
| 175 |
+
# MAX_RETRIES = int(os.environ.get('MaxRetries', '3').strip() or '3')
|
| 176 |
+
RETRY_DELAY = 1
|
| 177 |
+
MAX_RETRY_DELAY = 16
|
| 178 |
+
MAX_RETRY_DELAY = 16
|
| 179 |
+
safety_settings = [
|
| 180 |
+
{
|
| 181 |
+
"category": "HARM_CATEGORY_HARASSMENT",
|
| 182 |
+
"threshold": "BLOCK_NONE"
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
| 186 |
+
"threshold": "BLOCK_NONE"
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
| 190 |
+
"threshold": "BLOCK_NONE"
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
| 194 |
+
"threshold": "BLOCK_NONE"
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
|
| 198 |
+
"threshold": 'BLOCK_NONE'
|
| 199 |
+
}
|
| 200 |
+
]
|
| 201 |
+
safety_settings_g2 = [
|
| 202 |
+
{
|
| 203 |
+
"category": "HARM_CATEGORY_HARASSMENT",
|
| 204 |
+
"threshold": "OFF"
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
| 208 |
+
"threshold": "OFF"
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
| 212 |
+
"threshold": "OFF"
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
| 216 |
+
"threshold": "OFF"
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
|
| 220 |
+
"threshold": 'OFF'
|
| 221 |
+
}
|
| 222 |
+
]
|
| 223 |
+
|
| 224 |
+
key_manager = APIKeyManager() # 实例化 APIKeyManager,栈会在 __init__ 中初始化
|
| 225 |
+
current_api_key = key_manager.get_available_key()
|
| 226 |
+
|
| 227 |
+
# 初始化缓存管理器
|
| 228 |
+
CACHE_EXPIRY_TIME = int(os.environ.get("CACHE_EXPIRY_TIME", "1200")) # 默认20分钟
|
| 229 |
+
MAX_CACHE_ENTRIES = int(os.environ.get("MAX_CACHE_ENTRIES", "500")) # 默认最多缓存500条响应
|
| 230 |
+
REMOVE_CACHE_AFTER_USE = os.environ.get("REMOVE_CACHE_AFTER_USE", "true").lower() in ["true", "1", "yes"]
|
| 231 |
+
|
| 232 |
+
# 创建全局缓存字典,将作为缓存管理器的内部存储
|
| 233 |
+
# 注意:所有缓存操作都应通过 response_cache_manager 进行,不要直接操作此字典
|
| 234 |
+
response_cache: Dict[str, Dict[str, Any]] = {}
|
| 235 |
+
|
| 236 |
+
# 初始化缓存管理器,使用全局字典作为存储
|
| 237 |
+
response_cache_manager = ResponseCacheManager(
|
| 238 |
+
expiry_time=CACHE_EXPIRY_TIME,
|
| 239 |
+
max_entries=MAX_CACHE_ENTRIES,
|
| 240 |
+
remove_after_use=REMOVE_CACHE_AFTER_USE,
|
| 241 |
+
cache_dict=response_cache # 使用同一个字典实例,确保统一
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
# 活跃请求池 - 将作为活跃请求管理器的内部存储
|
| 245 |
+
# 注意:所有活跃请求操作都应通过 active_requests_manager 进行,不要直接操作此字典
|
| 246 |
+
active_requests_pool: Dict[str, asyncio.Task] = {}
|
| 247 |
+
|
| 248 |
+
# 初始化活跃请求管理器
|
| 249 |
+
active_requests_manager = ActiveRequestsManager(requests_pool=active_requests_pool)
|
| 250 |
+
|
| 251 |
+
# 添加API调用计数器
|
| 252 |
+
api_call_stats = {
|
| 253 |
+
'last_24h': defaultdict(int), # 按小时统计过去24小时
|
| 254 |
+
'hourly': defaultdict(int), # 按小时统计过去一小时
|
| 255 |
+
'minute': defaultdict(int), # 按分钟统计过去一分钟
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
# 清理过期统计数据的函数
|
| 259 |
+
def clean_expired_stats():
|
| 260 |
+
now = datetime.now()
|
| 261 |
+
|
| 262 |
+
# 清理24小时前的数据
|
| 263 |
+
for hour_key in list(api_call_stats['last_24h'].keys()):
|
| 264 |
+
try:
|
| 265 |
+
hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
|
| 266 |
+
if (now - hour_time).total_seconds() > 24 * 3600: # 超过24小时
|
| 267 |
+
del api_call_stats['last_24h'][hour_key]
|
| 268 |
+
except ValueError:
|
| 269 |
+
# 如果键格式不正确,直接删除
|
| 270 |
+
del api_call_stats['last_24h'][hour_key]
|
| 271 |
+
|
| 272 |
+
# 清理一小时前的小时统计数据
|
| 273 |
+
one_hour_ago = now - timedelta(hours=1)
|
| 274 |
+
for hour_key in list(api_call_stats['hourly'].keys()):
|
| 275 |
+
try:
|
| 276 |
+
hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
|
| 277 |
+
if hour_time < one_hour_ago:
|
| 278 |
+
del api_call_stats['hourly'][hour_key]
|
| 279 |
+
except ValueError:
|
| 280 |
+
# 如果键格式不正确,直接删除
|
| 281 |
+
del api_call_stats['hourly'][hour_key]
|
| 282 |
+
|
| 283 |
+
# 清理一分钟前的分钟统计数据
|
| 284 |
+
one_minute_ago = now - timedelta(minutes=1)
|
| 285 |
+
for minute_key in list(api_call_stats['minute'].keys()):
|
| 286 |
+
try:
|
| 287 |
+
minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
|
| 288 |
+
if minute_time < one_minute_ago:
|
| 289 |
+
del api_call_stats['minute'][minute_key]
|
| 290 |
+
except ValueError:
|
| 291 |
+
# 如果键格式不正确,直接删除
|
| 292 |
+
del api_call_stats['minute'][minute_key]
|
| 293 |
+
|
| 294 |
+
# 更新API调用统计的函数
|
| 295 |
+
def update_api_call_stats():
|
| 296 |
+
now = datetime.now()
|
| 297 |
+
hour_key = now.strftime('%Y-%m-%d %H:00')
|
| 298 |
+
minute_key = now.strftime('%Y-%m-%d %H:%M')
|
| 299 |
+
|
| 300 |
+
# 检查并清理过期统计
|
| 301 |
+
clean_expired_stats()
|
| 302 |
+
|
| 303 |
+
# 更新统计
|
| 304 |
+
api_call_stats['last_24h'][hour_key] += 1
|
| 305 |
+
api_call_stats['hourly'][hour_key] += 1
|
| 306 |
+
api_call_stats['minute'][minute_key] += 1
|
| 307 |
+
|
| 308 |
+
log('info', "API调用统计已更新: 24小时=%s, 1小时=%s, 1分钟=%s" % (sum(api_call_stats['last_24h'].values()), sum(api_call_stats['hourly'].values()), sum(api_call_stats['minute'].values())))
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def switch_api_key():
|
| 312 |
+
global current_api_key
|
| 313 |
+
key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
|
| 314 |
+
if key:
|
| 315 |
+
current_api_key = key
|
| 316 |
+
log('info', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
|
| 317 |
+
else:
|
| 318 |
+
log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
async def check_keys():
|
| 322 |
+
available_keys = []
|
| 323 |
+
for key in key_manager.api_keys:
|
| 324 |
+
is_valid = await test_api_key(key)
|
| 325 |
+
status_msg = "有效" if is_valid else "无效"
|
| 326 |
+
log('info', f"API Key {key[:10]}... {status_msg}.")
|
| 327 |
+
if is_valid:
|
| 328 |
+
available_keys.append(key)
|
| 329 |
+
if not available_keys:
|
| 330 |
+
log('error', "没有可用的 API 密钥!", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
|
| 331 |
+
return available_keys
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# 存储版本信息的全局变量
|
| 335 |
+
local_version = "0.0.0"
|
| 336 |
+
remote_version = "0.0.0"
|
| 337 |
+
has_update = False
|
| 338 |
+
|
| 339 |
+
# 检查版本更新
|
| 340 |
+
async def check_version():
|
| 341 |
+
global local_version, remote_version, has_update
|
| 342 |
+
try:
|
| 343 |
+
# 读取本地版本
|
| 344 |
+
with open("version.txt", "r") as f:
|
| 345 |
+
version_line = f.read().strip()
|
| 346 |
+
local_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
|
| 347 |
+
|
| 348 |
+
# 获取远程版本
|
| 349 |
+
github_url = "https://raw.githubusercontent.com/wyeeeee/hajimi/refs/heads/main/version.txt"
|
| 350 |
+
response = requests.get(github_url, timeout=5)
|
| 351 |
+
if response.status_code == 200:
|
| 352 |
+
version_line = response.text.strip()
|
| 353 |
+
remote_version = version_line.split("=")[1] if "=" in version_line else "0.0.0"
|
| 354 |
+
|
| 355 |
+
# 比较版本号
|
| 356 |
+
local_parts = [int(x) for x in local_version.split(".")]
|
| 357 |
+
remote_parts = [int(x) for x in remote_version.split(".")]
|
| 358 |
+
|
| 359 |
+
# 确保两个列表长度相同
|
| 360 |
+
while len(local_parts) < len(remote_parts):
|
| 361 |
+
local_parts.append(0)
|
| 362 |
+
while len(remote_parts) < len(local_parts):
|
| 363 |
+
remote_parts.append(0)
|
| 364 |
+
|
| 365 |
+
# 比较版本号
|
| 366 |
+
for i in range(len(local_parts)):
|
| 367 |
+
if remote_parts[i] > local_parts[i]:
|
| 368 |
+
has_update = True
|
| 369 |
+
break
|
| 370 |
+
elif remote_parts[i] < local_parts[i]:
|
| 371 |
+
break
|
| 372 |
+
|
| 373 |
+
log('info', f"版本检查: 本地版本 {local_version}, 远程版本 {remote_version}, 有更新: {has_update}")
|
| 374 |
+
else:
|
| 375 |
+
log('warning', f"无法获取远程版本信息,HTTP状态码: {response.status_code}")
|
| 376 |
+
except Exception as e:
|
| 377 |
+
log('error', f"版本检查失败: {str(e)}")
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
# --------------- 工具函数 ---------------
|
| 381 |
+
|
| 382 |
+
def log(level: str, message: str, **extra):
|
| 383 |
+
"""简化日志记录的统一函数"""
|
| 384 |
+
msg = format_log_message(level.upper(), message, extra=extra)
|
| 385 |
+
getattr(logger, level.lower())(msg)
|
| 386 |
+
|
| 387 |
+
def translate_error(message: str) -> str:
|
| 388 |
+
if "quota exceeded" in message.lower():
|
| 389 |
+
return "API 密钥配额已用尽"
|
| 390 |
+
if "invalid argument" in message.lower():
|
| 391 |
+
return "无效参数"
|
| 392 |
+
if "internal server error" in message.lower():
|
| 393 |
+
return "服务器内部错误"
|
| 394 |
+
if "service unavailable" in message.lower():
|
| 395 |
+
return "服务不可用"
|
| 396 |
+
return message
|
| 397 |
+
|
| 398 |
+
def create_chat_response(model: str, choices: list, id: str = None) -> ChatCompletionResponse:
|
| 399 |
+
"""创建标准响应对象的工厂函数"""
|
| 400 |
+
return ChatCompletionResponse(
|
| 401 |
+
id=id or f"chatcmpl-{int(time.time()*1000)}",
|
| 402 |
+
object="chat.completion",
|
| 403 |
+
created=int(time.time()),
|
| 404 |
+
model=model,
|
| 405 |
+
choices=choices
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
def create_error_response(model: str, error_message: str) -> ChatCompletionResponse:
|
| 409 |
+
"""创建错误响应对象的工厂函数"""
|
| 410 |
+
return create_chat_response(
|
| 411 |
+
model=model,
|
| 412 |
+
choices=[{
|
| 413 |
+
"index": 0,
|
| 414 |
+
"message": {
|
| 415 |
+
"role": "assistant",
|
| 416 |
+
"content": error_message
|
| 417 |
+
},
|
| 418 |
+
"finish_reason": "error"
|
| 419 |
+
}]
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
def handle_exception(exc_type, exc_value, exc_traceback):
|
| 423 |
+
if issubclass(exc_type, KeyboardInterrupt):
|
| 424 |
+
sys.excepthook(exc_type, exc_value, exc_traceback)
|
| 425 |
+
return
|
| 426 |
+
error_message = translate_error(str(exc_value))
|
| 427 |
+
log('error', f"未捕获的异常: {error_message}", status_code=500, error_message=error_message)
|
| 428 |
+
|
| 429 |
+
sys.excepthook = handle_exception
|
| 430 |
+
|
| 431 |
+
@app.on_event("startup")
|
| 432 |
+
async def startup_event():
|
| 433 |
+
log('info', "Starting Gemini API proxy...")
|
| 434 |
+
|
| 435 |
+
# 启动缓存清理定时任务
|
| 436 |
+
schedule_cache_cleanup()
|
| 437 |
+
|
| 438 |
+
# 检查版本
|
| 439 |
+
await check_version()
|
| 440 |
+
|
| 441 |
+
available_keys = await check_keys()
|
| 442 |
+
if available_keys:
|
| 443 |
+
key_manager.api_keys = available_keys
|
| 444 |
+
key_manager._reset_key_stack() # 启动时也确保创建随机栈
|
| 445 |
+
key_manager.show_all_keys()
|
| 446 |
+
log('info', f"可用 API 密钥数量:{len(key_manager.api_keys)}")
|
| 447 |
+
# MAX_RETRIES = len(key_manager.api_keys)
|
| 448 |
+
log('info', f"最大重试次数设置为:{len(key_manager.api_keys)}") # 添加日志
|
| 449 |
+
if key_manager.api_keys:
|
| 450 |
+
all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
|
| 451 |
+
GeminiClient.AVAILABLE_MODELS = [model.replace(
|
| 452 |
+
"models/", "") for model in all_models]
|
| 453 |
+
log('info', "Available models loaded.")
|
| 454 |
+
|
| 455 |
+
@app.get("/v1/models", response_model=ModelList)
|
| 456 |
+
def list_models():
|
| 457 |
+
log('info', "Received request to list models", extra={'request_type': 'list_models', 'status_code': 200})
|
| 458 |
+
return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in GeminiClient.AVAILABLE_MODELS])
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
async def verify_password(request: Request):
|
| 462 |
+
if PASSWORD:
|
| 463 |
+
auth_header = request.headers.get("Authorization")
|
| 464 |
+
if not auth_header or not auth_header.startswith("Bearer "):
|
| 465 |
+
raise HTTPException(
|
| 466 |
+
status_code=401, detail="Unauthorized: Missing or invalid token")
|
| 467 |
+
token = auth_header.split(" ")[1]
|
| 468 |
+
if token != PASSWORD:
|
| 469 |
+
raise HTTPException(
|
| 470 |
+
status_code=401, detail="Unauthorized: Invalid token")
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
|
| 474 |
+
async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(verify_password)):
|
| 475 |
+
# 获取客户端IP
|
| 476 |
+
client_ip = http_request.client.host if http_request.client else "unknown"
|
| 477 |
+
|
| 478 |
+
# 流式请求直接处理,不使用缓存
|
| 479 |
+
if request.stream:
|
| 480 |
+
return await process_request(request, http_request, "stream")
|
| 481 |
+
|
| 482 |
+
# 生成完整缓存键 - 用于精确匹配
|
| 483 |
+
cache_key = generate_cache_key(request)
|
| 484 |
+
|
| 485 |
+
# 记录请求缓存键信息
|
| 486 |
+
log('info', f"请求缓存键: {cache_key[:8]}...",
|
| 487 |
+
extra={'cache_key': cache_key[:8], 'request_type': 'non-stream'})
|
| 488 |
+
|
| 489 |
+
# 检查精确缓存是否存在且未过期
|
| 490 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 491 |
+
if cache_hit:
|
| 492 |
+
# 精确缓存命中
|
| 493 |
+
log('info', f"精确缓存命中: {cache_key[:8]}...",
|
| 494 |
+
extra={'cache_operation': 'hit', 'request_type': 'non-stream'})
|
| 495 |
+
|
| 496 |
+
# 同时清理相关的活跃任务,避免后续请求等待已经不需要的任务
|
| 497 |
+
active_requests_manager.remove_by_prefix(f"cache:{cache_key}")
|
| 498 |
+
|
| 499 |
+
# 安全删除缓存
|
| 500 |
+
if cache_key in response_cache_manager.cache:
|
| 501 |
+
del response_cache_manager.cache[cache_key]
|
| 502 |
+
log('info', f"缓存使用后已删除: {cache_key[:8]}...",
|
| 503 |
+
extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
|
| 504 |
+
|
| 505 |
+
# 返回缓存响应
|
| 506 |
+
return cached_response
|
| 507 |
+
|
| 508 |
+
# 构建包含缓存键的活跃请求池键
|
| 509 |
+
pool_key = f"cache:{cache_key}"
|
| 510 |
+
|
| 511 |
+
# 查找所有使用相同缓存键的活跃任务
|
| 512 |
+
active_task = active_requests_manager.get(pool_key)
|
| 513 |
+
if active_task and not active_task.done():
|
| 514 |
+
log('info', f"发现相同请求的进行中任务",
|
| 515 |
+
extra={'request_type': 'non-stream', 'model': request.model})
|
| 516 |
+
|
| 517 |
+
# 等待已有任务完成
|
| 518 |
+
try:
|
| 519 |
+
# 设置超时,避免无限等待
|
| 520 |
+
await asyncio.wait_for(active_task, timeout=180)
|
| 521 |
+
|
| 522 |
+
# 通过缓存管理器获取已完成任务的结果
|
| 523 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 524 |
+
if cache_hit:
|
| 525 |
+
# 安全删除缓存
|
| 526 |
+
if cache_key in response_cache_manager.cache:
|
| 527 |
+
del response_cache_manager.cache[cache_key]
|
| 528 |
+
log('info', f"使用已完成任务的缓存后删除: {cache_key[:8]}...",
|
| 529 |
+
extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'})
|
| 530 |
+
|
| 531 |
+
return cached_response
|
| 532 |
+
|
| 533 |
+
# 如果缓存已被清除或不存在,使用任务结果
|
| 534 |
+
if active_task.done() and not active_task.cancelled():
|
| 535 |
+
result = active_task.result()
|
| 536 |
+
if result:
|
| 537 |
+
# log('info', f"使用已完成任务的原始结果",
|
| 538 |
+
# extra={'request_type': 'non-stream', 'model': request.model})
|
| 539 |
+
|
| 540 |
+
# 使用原始结果时,我们需要创建一个新的响应对象
|
| 541 |
+
# 避免使用可能已被其他请求修改的对象
|
| 542 |
+
new_response = ChatCompletionResponse(
|
| 543 |
+
id=f"chatcmpl-{int(time.time()*1000)}",
|
| 544 |
+
object="chat.completion",
|
| 545 |
+
created=int(time.time()),
|
| 546 |
+
model=result.model,
|
| 547 |
+
choices=result.choices
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
# 不要缓存此结果,因为它很可能是一个已存在但被使用后清除的缓存
|
| 551 |
+
return new_response
|
| 552 |
+
except (asyncio.TimeoutError, asyncio.CancelledError) as e:
|
| 553 |
+
# 任务超时或被取消的情况下,记录日志然后让代码继续执行
|
| 554 |
+
error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消"
|
| 555 |
+
log('warning', f"等待已有任务{error_type}: {pool_key}",
|
| 556 |
+
extra={'request_type': 'non-stream', 'model': request.model})
|
| 557 |
+
|
| 558 |
+
# 从活跃请求池移除该任务
|
| 559 |
+
if active_task.done() or active_task.cancelled():
|
| 560 |
+
active_requests_manager.remove(pool_key)
|
| 561 |
+
log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}",
|
| 562 |
+
extra={'request_type': 'non-stream'})
|
| 563 |
+
|
| 564 |
+
# 创建请求处理任务
|
| 565 |
+
process_task = asyncio.create_task(
|
| 566 |
+
process_request(request, http_request, "non-stream", cache_key=cache_key, client_ip=client_ip)
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
# 将任务添加到活跃请求池
|
| 570 |
+
active_requests_manager.add(pool_key, process_task)
|
| 571 |
+
|
| 572 |
+
# 等待任务完成
|
| 573 |
+
try:
|
| 574 |
+
response = await process_task
|
| 575 |
+
return response
|
| 576 |
+
except Exception as e:
|
| 577 |
+
# 如果任务失败,从活跃请求池中移除
|
| 578 |
+
active_requests_manager.remove(pool_key)
|
| 579 |
+
|
| 580 |
+
# 检查是否已有缓存的结果(可能是由另一个任务创建的)
|
| 581 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 582 |
+
if cache_hit:
|
| 583 |
+
log('info', f"任务失败但找到缓存,使用缓存结果: {cache_key[:8]}...",
|
| 584 |
+
extra={'request_type': 'non-stream', 'model': request.model})
|
| 585 |
+
return cached_response
|
| 586 |
+
|
| 587 |
+
# 重新抛出异常
|
| 588 |
+
raise
|
| 589 |
+
|
| 590 |
+
async def process_request(chat_request: ChatCompletionRequest, http_request: Request, request_type: Literal['stream', 'non-stream'], cache_key: str = None, client_ip: str = None):
|
| 591 |
+
"""处理API请求的主函数,根据需要处理流式或非流式请求"""
|
| 592 |
+
global current_api_key
|
| 593 |
+
|
| 594 |
+
# 请求前基本检查
|
| 595 |
+
protect_from_abuse(
|
| 596 |
+
http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
|
| 597 |
+
if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
|
| 598 |
+
error_msg = "无效的模型"
|
| 599 |
+
extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
|
| 600 |
+
log('error', error_msg, extra=extra_log)
|
| 601 |
+
raise HTTPException(
|
| 602 |
+
status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
|
| 603 |
+
|
| 604 |
+
# 重置已尝试的密钥
|
| 605 |
+
key_manager.reset_tried_keys_for_request()
|
| 606 |
+
|
| 607 |
+
# 转换消息格式
|
| 608 |
+
contents, system_instruction = GeminiClient.convert_messages(
|
| 609 |
+
GeminiClient, chat_request.messages)
|
| 610 |
+
|
| 611 |
+
# 设置重试次数(使用可用API密钥数量作为最大重试次数)
|
| 612 |
+
retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1
|
| 613 |
+
|
| 614 |
+
# 尝试使用不同API密钥
|
| 615 |
+
for attempt in range(1, retry_attempts + 1):
|
| 616 |
+
# 获取下一个密钥
|
| 617 |
+
current_api_key = key_manager.get_available_key()
|
| 618 |
+
|
| 619 |
+
# 检查API密钥是否可用
|
| 620 |
+
if current_api_key is None:
|
| 621 |
+
log('warning', "没有可用的 API 密钥,跳过本次尝试",
|
| 622 |
+
extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
|
| 623 |
+
break
|
| 624 |
+
|
| 625 |
+
# 记录当前尝试的密钥信息
|
| 626 |
+
log('info', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...",
|
| 627 |
+
extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 628 |
+
|
| 629 |
+
# 服务器错误重试逻辑
|
| 630 |
+
server_error_retries = 3
|
| 631 |
+
for server_retry in range(1, server_error_retries + 1):
|
| 632 |
+
try:
|
| 633 |
+
# 根据请求类型分别处理
|
| 634 |
+
if chat_request.stream:
|
| 635 |
+
try:
|
| 636 |
+
return await process_stream_request(
|
| 637 |
+
chat_request,
|
| 638 |
+
http_request,
|
| 639 |
+
contents,
|
| 640 |
+
system_instruction,
|
| 641 |
+
current_api_key
|
| 642 |
+
)
|
| 643 |
+
except Exception as e:
|
| 644 |
+
# 捕获流式请求的异常,但不立即返回错误
|
| 645 |
+
# 记录错误并继续尝试下一个API密钥
|
| 646 |
+
error_detail = handle_gemini_error(e, current_api_key, key_manager)
|
| 647 |
+
log('error', f"流式请求失败: {error_detail}",
|
| 648 |
+
extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
|
| 649 |
+
# 不返回错误,而是抛出异常让外层循环处理
|
| 650 |
+
raise
|
| 651 |
+
else:
|
| 652 |
+
return await process_nonstream_request(
|
| 653 |
+
chat_request,
|
| 654 |
+
http_request,
|
| 655 |
+
request_type,
|
| 656 |
+
contents,
|
| 657 |
+
system_instruction,
|
| 658 |
+
current_api_key,
|
| 659 |
+
cache_key,
|
| 660 |
+
client_ip
|
| 661 |
+
)
|
| 662 |
+
except HTTPException as e:
|
| 663 |
+
if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
|
| 664 |
+
log('error', "客户端连接中断",
|
| 665 |
+
extra={'key': current_api_key[:8], 'request_type': request_type,
|
| 666 |
+
'model': chat_request.model, 'status_code': 408})
|
| 667 |
+
raise
|
| 668 |
+
else:
|
| 669 |
+
raise
|
| 670 |
+
except Exception as e:
|
| 671 |
+
# 使用统一的API错误处理函数
|
| 672 |
+
error_result = await handle_api_error(
|
| 673 |
+
e,
|
| 674 |
+
current_api_key,
|
| 675 |
+
key_manager,
|
| 676 |
+
request_type,
|
| 677 |
+
chat_request.model,
|
| 678 |
+
server_retry - 1
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
# 如果需要删除缓存,清除缓存
|
| 682 |
+
if error_result.get('remove_cache', False) and cache_key and cache_key in response_cache_manager.cache:
|
| 683 |
+
log('info', f"因API错误,删除缓存: {cache_key[:8]}...",
|
| 684 |
+
extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
|
| 685 |
+
del response_cache_manager.cache[cache_key]
|
| 686 |
+
|
| 687 |
+
if error_result.get('should_retry', False):
|
| 688 |
+
# 服务器错误需要重试(等待已在handle_api_error中完成)
|
| 689 |
+
continue
|
| 690 |
+
elif error_result.get('should_switch_key', False) and attempt < retry_attempts:
|
| 691 |
+
# 跳出服务器错误重试循环,获取下一个可用密钥
|
| 692 |
+
log('info', f"API密钥 {current_api_key[:8]}... 失败,准备尝试下一个密钥",
|
| 693 |
+
extra={'key': current_api_key[:8], 'request_type': request_type})
|
| 694 |
+
break
|
| 695 |
+
else:
|
| 696 |
+
# 无法处理的错误或已达到重试上限
|
| 697 |
+
break
|
| 698 |
+
|
| 699 |
+
# 如果所有尝试都失败
|
| 700 |
+
msg = "所有API密钥均请求失败,请稍后重试"
|
| 701 |
+
log('error', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
|
| 702 |
+
|
| 703 |
+
# 对于流式请求,创建一个特殊的StreamingResponse返回错误
|
| 704 |
+
if chat_request.stream:
|
| 705 |
+
async def error_generator():
|
| 706 |
+
error_json = json.dumps({'error': {'message': msg, 'type': 'api_error'}})
|
| 707 |
+
yield f"data: {error_json}\n\n"
|
| 708 |
+
yield "data: [DONE]\n\n"
|
| 709 |
+
|
| 710 |
+
return StreamingResponse(error_generator(), media_type="text/event-stream")
|
| 711 |
+
else:
|
| 712 |
+
# 非流式请求使用标准HTTP异常
|
| 713 |
+
raise HTTPException(
|
| 714 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)
|
| 715 |
+
|
| 716 |
+
|
| 717 |
+
@app.exception_handler(Exception)
|
| 718 |
+
async def global_exception_handler(request: Request, exc: Exception):
|
| 719 |
+
error_message = translate_error(str(exc))
|
| 720 |
+
extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
|
| 721 |
+
log('error', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
|
| 722 |
+
return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
@app.get("/", response_class=HTMLResponse)
|
| 726 |
+
async def root(request: Request):
|
| 727 |
+
# 先清理过期数据,确保统计数据是最新的
|
| 728 |
+
clean_expired_stats()
|
| 729 |
+
response_cache_manager.clean_expired() # 使用管理器清理缓存
|
| 730 |
+
active_requests_manager.clean_completed() # 使用管理器清理活跃请求
|
| 731 |
+
await check_version()
|
| 732 |
+
# 获取当前统计数据
|
| 733 |
+
now = datetime.now()
|
| 734 |
+
|
| 735 |
+
# 计算过去24小时的调用总数
|
| 736 |
+
last_24h_calls = sum(api_call_stats['last_24h'].values())
|
| 737 |
+
|
| 738 |
+
# 计算过去一小时内的调用总数
|
| 739 |
+
one_hour_ago = now - timedelta(hours=1)
|
| 740 |
+
hourly_calls = 0
|
| 741 |
+
for hour_key, count in api_call_stats['hourly'].items():
|
| 742 |
+
try:
|
| 743 |
+
hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
|
| 744 |
+
if hour_time >= one_hour_ago:
|
| 745 |
+
hourly_calls += count
|
| 746 |
+
except ValueError:
|
| 747 |
+
continue
|
| 748 |
+
|
| 749 |
+
# 计算过去一分钟内的调用总数
|
| 750 |
+
one_minute_ago = now - timedelta(minutes=1)
|
| 751 |
+
minute_calls = 0
|
| 752 |
+
for minute_key, count in api_call_stats['minute'].items():
|
| 753 |
+
try:
|
| 754 |
+
minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
|
| 755 |
+
if minute_time >= one_minute_ago:
|
| 756 |
+
minute_calls += count
|
| 757 |
+
except ValueError:
|
| 758 |
+
continue
|
| 759 |
+
|
| 760 |
+
# 获取最近的日志
|
| 761 |
+
recent_logs = log_manager.get_recent_logs(50) # 获取最近50条日志
|
| 762 |
+
|
| 763 |
+
# 获取缓存统计
|
| 764 |
+
total_cache = len(response_cache_manager.cache)
|
| 765 |
+
valid_cache = sum(1 for _, data in response_cache_manager.cache.items()
|
| 766 |
+
if time.time() < data.get('expiry_time', 0))
|
| 767 |
+
cache_by_model = {}
|
| 768 |
+
|
| 769 |
+
# 分析缓存数据
|
| 770 |
+
for _, cache_data in response_cache_manager.cache.items():
|
| 771 |
+
if time.time() < cache_data.get('expiry_time', 0):
|
| 772 |
+
# 按模型统计缓存
|
| 773 |
+
model = cache_data.get('response', {}).model
|
| 774 |
+
if model:
|
| 775 |
+
if model in cache_by_model:
|
| 776 |
+
cache_by_model[model] += 1
|
| 777 |
+
else:
|
| 778 |
+
cache_by_model[model] = 1
|
| 779 |
+
|
| 780 |
+
# 获取请求历史统计
|
| 781 |
+
history_count = len(client_request_history)
|
| 782 |
+
|
| 783 |
+
# 获取活跃请求统计
|
| 784 |
+
active_count = len(active_requests_manager.active_requests)
|
| 785 |
+
active_done = sum(1 for task in active_requests_manager.active_requests.values() if task.done())
|
| 786 |
+
active_pending = active_count - active_done
|
| 787 |
+
|
| 788 |
+
# 准备模板上下文
|
| 789 |
+
context = {
|
| 790 |
+
"key_count": len(key_manager.api_keys),
|
| 791 |
+
"model_count": len(GeminiClient.AVAILABLE_MODELS),
|
| 792 |
+
"retry_count": len(key_manager.api_keys),
|
| 793 |
+
"last_24h_calls": last_24h_calls,
|
| 794 |
+
"hourly_calls": hourly_calls,
|
| 795 |
+
"minute_calls": minute_calls,
|
| 796 |
+
"max_requests_per_minute": MAX_REQUESTS_PER_MINUTE,
|
| 797 |
+
"max_requests_per_day_per_ip": MAX_REQUESTS_PER_DAY_PER_IP,
|
| 798 |
+
"current_time": datetime.now().strftime('%H:%M:%S'),
|
| 799 |
+
"logs": recent_logs,
|
| 800 |
+
# 添加版本信息
|
| 801 |
+
"local_version": local_version,
|
| 802 |
+
"remote_version": remote_version,
|
| 803 |
+
"has_update": has_update,
|
| 804 |
+
# 添加缓存信息
|
| 805 |
+
"cache_entries": total_cache,
|
| 806 |
+
"valid_cache": valid_cache,
|
| 807 |
+
"expired_cache": total_cache - valid_cache,
|
| 808 |
+
"cache_expiry_time": CACHE_EXPIRY_TIME,
|
| 809 |
+
"max_cache_entries": MAX_CACHE_ENTRIES,
|
| 810 |
+
"cache_by_model": cache_by_model,
|
| 811 |
+
"request_history_count": history_count,
|
| 812 |
+
"enable_reconnect_detection": ENABLE_RECONNECT_DETECTION,
|
| 813 |
+
"remove_cache_after_use": REMOVE_CACHE_AFTER_USE,
|
| 814 |
+
# 添加活跃请求池信息
|
| 815 |
+
"active_count": active_count,
|
| 816 |
+
"active_done": active_done,
|
| 817 |
+
"active_pending": active_pending,
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
# 使用Jinja2模板引擎正确渲染HTML
|
| 821 |
+
return templates.TemplateResponse("index.html", {"request": request, **context})
|
| 822 |
+
|
| 823 |
+
# 客户端IP到最近请求的映射,用于识别重连请求
|
| 824 |
+
client_request_history: Dict[str, Dict[str, Any]] = {}
|
| 825 |
+
# 请求历史记录保留时间(秒)
|
| 826 |
+
REQUEST_HISTORY_EXPIRY_TIME = int(os.environ.get("REQUEST_HISTORY_EXPIRY_TIME", "600")) # 默认10分钟
|
| 827 |
+
# 是否启用重连检测
|
| 828 |
+
ENABLE_RECONNECT_DETECTION = os.environ.get("ENABLE_RECONNECT_DETECTION", "true").lower() in ["true", "1", "yes"]
|
| 829 |
+
|
| 830 |
+
# 定期清理缓存的定时任务
|
| 831 |
+
def schedule_cache_cleanup():
|
| 832 |
+
scheduler = BackgroundScheduler()
|
| 833 |
+
scheduler.add_job(response_cache_manager.clean_expired, 'interval', minutes=1) # 每分钟清理过期缓存
|
| 834 |
+
scheduler.add_job(active_requests_manager.clean_completed, 'interval', seconds=30) # 每30秒清理已完成的活跃请求
|
| 835 |
+
scheduler.add_job(active_requests_manager.clean_long_running, 'interval', minutes=5, args=[300]) # 每5分钟清理运行超过5分钟的任务
|
| 836 |
+
scheduler.add_job(clean_expired_stats, 'interval', minutes=5) # 每5分钟清理过期的统计数��
|
| 837 |
+
scheduler.start()
|
| 838 |
+
|
| 839 |
+
# 生成请求的唯一缓存键
|
| 840 |
+
def generate_cache_key(chat_request: ChatCompletionRequest) -> str:
|
| 841 |
+
# 创建包含请求关键信息的字典
|
| 842 |
+
request_data = {
|
| 843 |
+
'model': chat_request.model,
|
| 844 |
+
'messages': []
|
| 845 |
+
}
|
| 846 |
+
|
| 847 |
+
# 添加消息内容
|
| 848 |
+
for msg in chat_request.messages:
|
| 849 |
+
if isinstance(msg.content, str):
|
| 850 |
+
message_data = {'role': msg.role, 'content': msg.content}
|
| 851 |
+
request_data['messages'].append(message_data)
|
| 852 |
+
elif isinstance(msg.content, list):
|
| 853 |
+
content_list = []
|
| 854 |
+
for item in msg.content:
|
| 855 |
+
if item.get('type') == 'text':
|
| 856 |
+
content_list.append({'type': 'text', 'text': item.get('text')})
|
| 857 |
+
# 对于图像数据,我们只使用标识符而不是全部数据
|
| 858 |
+
elif item.get('type') == 'image_url':
|
| 859 |
+
image_data = item.get('image_url', {}).get('url', '')
|
| 860 |
+
if image_data.startswith('data:image/'):
|
| 861 |
+
# 对于base64图像,使用前32字符作为标识符
|
| 862 |
+
content_list.append({'type': 'image_url', 'hash': hashlib.md5(image_data[:32].encode()).hexdigest()})
|
| 863 |
+
else:
|
| 864 |
+
content_list.append({'type': 'image_url', 'url': image_data})
|
| 865 |
+
request_data['messages'].append({'role': msg.role, 'content': content_list})
|
| 866 |
+
|
| 867 |
+
# 将字典转换为JSON字符串并计算哈希值
|
| 868 |
+
json_data = json.dumps(request_data, sort_keys=True)
|
| 869 |
+
return hashlib.md5(json_data.encode()).hexdigest()
|
| 870 |
+
|
| 871 |
+
# 拆分process_request为更小的函数
|
| 872 |
+
|
| 873 |
+
async def process_stream_request(
|
| 874 |
+
chat_request: ChatCompletionRequest,
|
| 875 |
+
http_request: Request,
|
| 876 |
+
contents,
|
| 877 |
+
system_instruction,
|
| 878 |
+
current_api_key: str
|
| 879 |
+
) -> StreamingResponse:
|
| 880 |
+
"""处理流式API请求"""
|
| 881 |
+
|
| 882 |
+
# 创建一个直接流式响应的生成器函数
|
| 883 |
+
async def stream_response_generator():
|
| 884 |
+
# 如果启用了假流式模式,使用随机遍历API密钥的方式
|
| 885 |
+
if FAKE_STREAMING:
|
| 886 |
+
# 创建一个队列用于在任务之间传递数据
|
| 887 |
+
queue = asyncio.Queue()
|
| 888 |
+
keep_alive_task = None
|
| 889 |
+
api_request_task = None
|
| 890 |
+
|
| 891 |
+
try:
|
| 892 |
+
# 创建一个保持连接的任务,持续发送换行符
|
| 893 |
+
async def keep_alive_sender():
|
| 894 |
+
try:
|
| 895 |
+
# 创建一个Gemini客户端用于发送保持连接的换行符
|
| 896 |
+
keep_alive_client = GeminiClient(current_api_key)
|
| 897 |
+
|
| 898 |
+
# 启动保持连接的生成器
|
| 899 |
+
keep_alive_generator = keep_alive_client.stream_chat(
|
| 900 |
+
chat_request,
|
| 901 |
+
contents,
|
| 902 |
+
safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
|
| 903 |
+
system_instruction
|
| 904 |
+
)
|
| 905 |
+
|
| 906 |
+
# 持续发送换行符直到被取消
|
| 907 |
+
async for line in keep_alive_generator:
|
| 908 |
+
if line == "\n":
|
| 909 |
+
# 将换行符格式化为SSE格式
|
| 910 |
+
formatted_chunk = {
|
| 911 |
+
"id": "chatcmpl-keepalive",
|
| 912 |
+
"object": "chat.completion.chunk",
|
| 913 |
+
"created": int(time.time()),
|
| 914 |
+
"model": chat_request.model,
|
| 915 |
+
"choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]
|
| 916 |
+
}
|
| 917 |
+
# 将格式化的换行符放入队列
|
| 918 |
+
await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
|
| 919 |
+
except asyncio.CancelledError:
|
| 920 |
+
log('info', "保持连接任务被取消",
|
| 921 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 922 |
+
raise
|
| 923 |
+
except Exception as e:
|
| 924 |
+
log('error', f"保持连接任务出错: {str(e)}",
|
| 925 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 926 |
+
# 将错误放入队列
|
| 927 |
+
await queue.put(None)
|
| 928 |
+
raise
|
| 929 |
+
|
| 930 |
+
# 创建一个任务来随机遍历API密钥并请求内容
|
| 931 |
+
async def api_request_handler():
|
| 932 |
+
success = False
|
| 933 |
+
try:
|
| 934 |
+
# 重置已尝试的密钥
|
| 935 |
+
key_manager.reset_tried_keys_for_request()
|
| 936 |
+
|
| 937 |
+
# 获取可用的API密钥
|
| 938 |
+
available_keys = key_manager.api_keys.copy()
|
| 939 |
+
random.shuffle(available_keys) # 随机打乱密钥顺序
|
| 940 |
+
|
| 941 |
+
# 遍历所有API密钥尝试获取响应
|
| 942 |
+
for attempt, api_key in enumerate(available_keys, 1):
|
| 943 |
+
try:
|
| 944 |
+
log('info', f"假流式模式: 尝试API密钥 {api_key[:8]}... ({attempt}/{len(available_keys)})",
|
| 945 |
+
extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
|
| 946 |
+
|
| 947 |
+
# 创建一个新的客户端使用当前API密钥
|
| 948 |
+
non_stream_client = GeminiClient(api_key)
|
| 949 |
+
|
| 950 |
+
# 使用非流式方式请求内容
|
| 951 |
+
response_content = await asyncio.to_thread(
|
| 952 |
+
non_stream_client.complete_chat,
|
| 953 |
+
chat_request,
|
| 954 |
+
contents,
|
| 955 |
+
safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
|
| 956 |
+
system_instruction
|
| 957 |
+
)
|
| 958 |
+
|
| 959 |
+
# 检查响应是否有效
|
| 960 |
+
if response_content and response_content.text:
|
| 961 |
+
log('info', f"假流式模式: API密钥 {api_key[:8]}... 成功获取响应",
|
| 962 |
+
extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
|
| 963 |
+
|
| 964 |
+
# 将完整响应分割成小块,模拟流式返回
|
| 965 |
+
full_text = response_content.text
|
| 966 |
+
chunk_size = max(len(full_text) // 10, 1) # 至少分成10块,每块至少1个字符
|
| 967 |
+
|
| 968 |
+
for i in range(0, len(full_text), chunk_size):
|
| 969 |
+
chunk = full_text[i:i+chunk_size]
|
| 970 |
+
formatted_chunk = {
|
| 971 |
+
"id": "chatcmpl-someid",
|
| 972 |
+
"object": "chat.completion.chunk",
|
| 973 |
+
"created": int(time.time()),
|
| 974 |
+
"model": chat_request.model,
|
| 975 |
+
"choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
|
| 976 |
+
}
|
| 977 |
+
# 将格式化的内容块放入队列
|
| 978 |
+
await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
|
| 979 |
+
|
| 980 |
+
success = True
|
| 981 |
+
# 更新API调用统计
|
| 982 |
+
update_api_call_stats()
|
| 983 |
+
break # 成功获取响应,退出循环
|
| 984 |
+
else:
|
| 985 |
+
log('warning', f"假流式模式: API密钥 {api_key[:8]}... 返回空响应",
|
| 986 |
+
extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
|
| 987 |
+
except Exception as e:
|
| 988 |
+
error_detail = handle_gemini_error(e, api_key, key_manager)
|
| 989 |
+
log('error', f"假流式模式: API密钥 {api_key[:8]}... 请求失败: {error_detail}",
|
| 990 |
+
extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
|
| 991 |
+
# 继续尝试下一个API密钥
|
| 992 |
+
|
| 993 |
+
# 如果所有API密钥都尝试失败
|
| 994 |
+
if not success:
|
| 995 |
+
error_msg = "所有API密钥均请求失败,请稍后重试"
|
| 996 |
+
log('error', error_msg,
|
| 997 |
+
extra={'key': 'ALL', 'request_type': 'fake-stream', 'model': chat_request.model})
|
| 998 |
+
|
| 999 |
+
# 添加错误信息到队列
|
| 1000 |
+
error_json = {
|
| 1001 |
+
"id": "chatcmpl-error",
|
| 1002 |
+
"object": "chat.completion.chunk",
|
| 1003 |
+
"created": int(time.time()),
|
| 1004 |
+
"model": chat_request.model,
|
| 1005 |
+
"choices": [{"delta": {"content": f"\n\n[错误: {error_msg}]"}, "index": 0, "finish_reason": "error"}]
|
| 1006 |
+
}
|
| 1007 |
+
await queue.put(f"data: {json.dumps(error_json)}\n\n")
|
| 1008 |
+
|
| 1009 |
+
# 添加完成标记到队列
|
| 1010 |
+
await queue.put("data: [DONE]\n\n")
|
| 1011 |
+
# 添加None表示队列结束
|
| 1012 |
+
await queue.put(None)
|
| 1013 |
+
|
| 1014 |
+
except asyncio.CancelledError:
|
| 1015 |
+
log('info', "API请求任务被取消",
|
| 1016 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 1017 |
+
# 添加None表示队列结束
|
| 1018 |
+
await queue.put(None)
|
| 1019 |
+
raise
|
| 1020 |
+
except Exception as e:
|
| 1021 |
+
log('error', f"API请求任务出错: {str(e)}",
|
| 1022 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 1023 |
+
# 添加错误信息到队列
|
| 1024 |
+
error_json = {
|
| 1025 |
+
"id": "chatcmpl-error",
|
| 1026 |
+
"object": "chat.completion.chunk",
|
| 1027 |
+
"created": int(time.time()),
|
| 1028 |
+
"model": chat_request.model,
|
| 1029 |
+
"choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
|
| 1030 |
+
}
|
| 1031 |
+
await queue.put(f"data: {json.dumps(error_json)}\n\n")
|
| 1032 |
+
await queue.put("data: [DONE]\n\n")
|
| 1033 |
+
# 添加None表示队列结束
|
| 1034 |
+
await queue.put(None)
|
| 1035 |
+
raise
|
| 1036 |
+
|
| 1037 |
+
# 启动保持连接的任务
|
| 1038 |
+
keep_alive_task = asyncio.create_task(keep_alive_sender())
|
| 1039 |
+
# 启动API请求任务
|
| 1040 |
+
api_request_task = asyncio.create_task(api_request_handler())
|
| 1041 |
+
|
| 1042 |
+
# 从队列中获取数据并发送给客户端
|
| 1043 |
+
while True:
|
| 1044 |
+
chunk = await queue.get()
|
| 1045 |
+
if chunk is None: # None表示队列结束
|
| 1046 |
+
break
|
| 1047 |
+
yield chunk
|
| 1048 |
+
|
| 1049 |
+
# 如果API请求任务已完成,取消保持连接任务
|
| 1050 |
+
if api_request_task.done() and not keep_alive_task.done():
|
| 1051 |
+
keep_alive_task.cancel()
|
| 1052 |
+
|
| 1053 |
+
except asyncio.CancelledError:
|
| 1054 |
+
log('info', "流式响应生成器被取消",
|
| 1055 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 1056 |
+
# 取消所有任务
|
| 1057 |
+
if keep_alive_task and not keep_alive_task.done():
|
| 1058 |
+
keep_alive_task.cancel()
|
| 1059 |
+
if api_request_task and not api_request_task.done():
|
| 1060 |
+
api_request_task.cancel()
|
| 1061 |
+
except Exception as e:
|
| 1062 |
+
log('error', f"流式响应生成器出错: {str(e)}",
|
| 1063 |
+
extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
|
| 1064 |
+
# 取消所有任务
|
| 1065 |
+
if keep_alive_task and not keep_alive_task.done():
|
| 1066 |
+
keep_alive_task.cancel()
|
| 1067 |
+
if api_request_task and not api_request_task.done():
|
| 1068 |
+
api_request_task.cancel()
|
| 1069 |
+
# 发送错误信息给客户端
|
| 1070 |
+
error_json = {
|
| 1071 |
+
"id": "chatcmpl-error",
|
| 1072 |
+
"object": "chat.completion.chunk",
|
| 1073 |
+
"created": int(time.time()),
|
| 1074 |
+
"model": chat_request.model,
|
| 1075 |
+
"choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
|
| 1076 |
+
}
|
| 1077 |
+
yield f"data: {json.dumps(error_json)}\n\n"
|
| 1078 |
+
yield "data: [DONE]\n\n"
|
| 1079 |
+
finally:
|
| 1080 |
+
# 确保所有任务都被取消
|
| 1081 |
+
if keep_alive_task and not keep_alive_task.done():
|
| 1082 |
+
keep_alive_task.cancel()
|
| 1083 |
+
if api_request_task and not api_request_task.done():
|
| 1084 |
+
api_request_task.cancel()
|
| 1085 |
+
else:
|
| 1086 |
+
# 原始流式请求处理逻辑
|
| 1087 |
+
gemini_client = GeminiClient(current_api_key)
|
| 1088 |
+
success = False
|
| 1089 |
+
|
| 1090 |
+
try:
|
| 1091 |
+
# 直接迭代生成器并发送响应块
|
| 1092 |
+
async for chunk in gemini_client.stream_chat(
|
| 1093 |
+
chat_request,
|
| 1094 |
+
contents,
|
| 1095 |
+
safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
|
| 1096 |
+
system_instruction
|
| 1097 |
+
):
|
| 1098 |
+
# 空字符串跳过
|
| 1099 |
+
if not chunk:
|
| 1100 |
+
continue
|
| 1101 |
+
|
| 1102 |
+
formatted_chunk = {
|
| 1103 |
+
"id": "chatcmpl-someid",
|
| 1104 |
+
"object": "chat.completion.chunk",
|
| 1105 |
+
"created": int(time.time()),
|
| 1106 |
+
"model": chat_request.model,
|
| 1107 |
+
"choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
|
| 1108 |
+
}
|
| 1109 |
+
success = True # ��要有一个chunk成功,就标记为成功
|
| 1110 |
+
yield f"data: {json.dumps(formatted_chunk)}\n\n"
|
| 1111 |
+
|
| 1112 |
+
# 如果成功获取到响应,更新API调用统计
|
| 1113 |
+
if success:
|
| 1114 |
+
update_api_call_stats()
|
| 1115 |
+
|
| 1116 |
+
yield "data: [DONE]\n\n"
|
| 1117 |
+
|
| 1118 |
+
except asyncio.CancelledError:
|
| 1119 |
+
extra_log_cancel = {'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model, 'error_message': '客户端已断开连接'}
|
| 1120 |
+
log('info', "客户端连接已中断", extra=extra_log_cancel)
|
| 1121 |
+
except Exception as e:
|
| 1122 |
+
error_detail = handle_gemini_error(e, current_api_key, key_manager)
|
| 1123 |
+
log('error', f"流式请求失败: {error_detail}",
|
| 1124 |
+
extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
|
| 1125 |
+
# 发送错误信息给客户端
|
| 1126 |
+
error_json = {
|
| 1127 |
+
"id": "chatcmpl-error",
|
| 1128 |
+
"object": "chat.completion.chunk",
|
| 1129 |
+
"created": int(time.time()),
|
| 1130 |
+
"model": chat_request.model,
|
| 1131 |
+
"choices": [{"delta": {"content": f"\n\n[错误: {error_detail}]"}, "index": 0, "finish_reason": "error"}]
|
| 1132 |
+
}
|
| 1133 |
+
yield f"data: {json.dumps(error_json)}\n\n"
|
| 1134 |
+
yield "data: [DONE]\n\n"
|
| 1135 |
+
# 重新抛出异常,这样process_request可以捕获它
|
| 1136 |
+
raise e
|
| 1137 |
+
|
| 1138 |
+
return StreamingResponse(stream_response_generator(), media_type="text/event-stream")
|
| 1139 |
+
|
| 1140 |
+
async def run_gemini_completion(
|
| 1141 |
+
gemini_client,
|
| 1142 |
+
chat_request: ChatCompletionRequest,
|
| 1143 |
+
contents,
|
| 1144 |
+
system_instruction,
|
| 1145 |
+
request_type: str,
|
| 1146 |
+
current_api_key: str
|
| 1147 |
+
):
|
| 1148 |
+
"""运行Gemini非流式请求"""
|
| 1149 |
+
# 记录函数调用状态
|
| 1150 |
+
run_fn = run_gemini_completion
|
| 1151 |
+
|
| 1152 |
+
try:
|
| 1153 |
+
# 创建一个不会被客户端断开影响的任务
|
| 1154 |
+
response_future = asyncio.create_task(
|
| 1155 |
+
asyncio.to_thread(
|
| 1156 |
+
gemini_client.complete_chat,
|
| 1157 |
+
chat_request,
|
| 1158 |
+
contents,
|
| 1159 |
+
safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
|
| 1160 |
+
system_instruction
|
| 1161 |
+
)
|
| 1162 |
+
)
|
| 1163 |
+
|
| 1164 |
+
# 使用shield防止任务被外部取消
|
| 1165 |
+
response_content = await asyncio.shield(response_future)
|
| 1166 |
+
|
| 1167 |
+
# 只在第一次调用时记录完成日志
|
| 1168 |
+
if not hasattr(run_fn, 'logged_complete'):
|
| 1169 |
+
log('info', "非流式请求成功完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 1170 |
+
run_fn.logged_complete = True
|
| 1171 |
+
return response_content
|
| 1172 |
+
except asyncio.CancelledError:
|
| 1173 |
+
# 即使任务被取消,我们也确保正在进行的API请求能够完成
|
| 1174 |
+
if 'response_future' in locals() and not response_future.done():
|
| 1175 |
+
try:
|
| 1176 |
+
# 使用shield确保任务不被取消,并等待它完成
|
| 1177 |
+
response_content = await asyncio.shield(response_future)
|
| 1178 |
+
log('info', "API请求在客户端断开后完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 1179 |
+
return response_content
|
| 1180 |
+
except Exception as e:
|
| 1181 |
+
extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': f'API请求在客户端断开后失败: {str(e)}'}
|
| 1182 |
+
log('info', "API调用因客户端断开而失败", extra=extra_log_gemini_cancel)
|
| 1183 |
+
raise
|
| 1184 |
+
|
| 1185 |
+
# 如果任务尚未开始或已经失败,记录日志
|
| 1186 |
+
extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
|
| 1187 |
+
log('info', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
|
| 1188 |
+
raise
|
| 1189 |
+
|
| 1190 |
+
async def check_client_disconnect(http_request: Request, current_api_key: str, request_type: str, model: str):
|
| 1191 |
+
"""检查客户端是否断开连接"""
|
| 1192 |
+
while True:
|
| 1193 |
+
if await http_request.is_disconnected():
|
| 1194 |
+
extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': model, 'error_message': '检测到客户端断开连接'}
|
| 1195 |
+
log('info', "客户端连接已中断,等待API请求完成", extra=extra_log)
|
| 1196 |
+
return True
|
| 1197 |
+
await asyncio.sleep(0.5)
|
| 1198 |
+
|
| 1199 |
+
async def handle_client_disconnect(
|
| 1200 |
+
gemini_task: asyncio.Task,
|
| 1201 |
+
chat_request: ChatCompletionRequest,
|
| 1202 |
+
request_type: str,
|
| 1203 |
+
current_api_key: str,
|
| 1204 |
+
cache_key: str = None,
|
| 1205 |
+
client_ip: str = None
|
| 1206 |
+
):
|
| 1207 |
+
|
| 1208 |
+
try:
|
| 1209 |
+
# 等待API任务完成,使用shield防止它被取消
|
| 1210 |
+
response_content = await asyncio.shield(gemini_task)
|
| 1211 |
+
|
| 1212 |
+
# 检查响应文本��否为空
|
| 1213 |
+
if response_content is None or response_content.text == "":
|
| 1214 |
+
if response_content is None:
|
| 1215 |
+
log('info', "客户端断开后API任务返回None",
|
| 1216 |
+
extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 1217 |
+
else:
|
| 1218 |
+
extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
|
| 1219 |
+
log('info', "客户端断开后Gemini API 返回空响应", extra=extra_log)
|
| 1220 |
+
|
| 1221 |
+
# 删除任何现有缓存,因为响应为空
|
| 1222 |
+
if cache_key and cache_key in response_cache_manager.cache:
|
| 1223 |
+
log('info', f"因空响应,删除缓存: {cache_key[:8]}...",
|
| 1224 |
+
extra={'cache_operation': 'remove-on-empty', 'request_type': request_type})
|
| 1225 |
+
del response_cache_manager.cache[cache_key]
|
| 1226 |
+
|
| 1227 |
+
# 返回错误响应而不是None
|
| 1228 |
+
return create_error_response(chat_request.model, "AI未返回任何内容,请重试")
|
| 1229 |
+
|
| 1230 |
+
# 首先检查是否有现有缓存
|
| 1231 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 1232 |
+
if cache_hit:
|
| 1233 |
+
log('info', f"客户端断开但找到已存在缓存,将删除: {cache_key[:8]}...",
|
| 1234 |
+
extra={'cache_operation': 'disconnect-found-cache', 'request_type': request_type})
|
| 1235 |
+
|
| 1236 |
+
# 安全删除缓存
|
| 1237 |
+
if cache_key in response_cache_manager.cache:
|
| 1238 |
+
del response_cache_manager.cache[cache_key]
|
| 1239 |
+
|
| 1240 |
+
# 不返回缓存,而是创建新响应并缓存
|
| 1241 |
+
|
| 1242 |
+
# 创建新响应
|
| 1243 |
+
# log('info', f"客户端断开后创建新缓存: {cache_key[:8] if cache_key else 'none'}...",
|
| 1244 |
+
# extra={'cache_operation': 'create-after-disconnect', 'request_type': request_type})
|
| 1245 |
+
response = create_response(chat_request, response_content)
|
| 1246 |
+
|
| 1247 |
+
# 客户端已断开,此响应不会实际发送,可以考虑将其缓存以供后续使用
|
| 1248 |
+
# 如果确实需要缓存,则可以取消下面的注释
|
| 1249 |
+
# cache_response(response, cache_key, client_ip)
|
| 1250 |
+
|
| 1251 |
+
return response
|
| 1252 |
+
except asyncio.CancelledError:
|
| 1253 |
+
# 对于取消异常,仍然尝试继续完成任务
|
| 1254 |
+
log('info', "客户端断开后任务被取消,但我们仍会尝试完成",
|
| 1255 |
+
extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 1256 |
+
|
| 1257 |
+
# 检查任务是否已经完成
|
| 1258 |
+
if gemini_task.done() and not gemini_task.cancelled():
|
| 1259 |
+
try:
|
| 1260 |
+
response_content = gemini_task.result()
|
| 1261 |
+
|
| 1262 |
+
# 首先检查是否有现有缓存
|
| 1263 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 1264 |
+
if cache_hit:
|
| 1265 |
+
log('info', f"任务被取消但找到已存在缓存,将删除: {cache_key[:8]}...",
|
| 1266 |
+
extra={'cache_operation': 'cancel-found-cache', 'request_type': request_type})
|
| 1267 |
+
|
| 1268 |
+
# 安全删除缓存
|
| 1269 |
+
if cache_key in response_cache_manager.cache:
|
| 1270 |
+
del response_cache_manager.cache[cache_key]
|
| 1271 |
+
|
| 1272 |
+
# 创建但不缓存响应
|
| 1273 |
+
response = create_response(chat_request, response_content)
|
| 1274 |
+
return response
|
| 1275 |
+
except Exception as inner_e:
|
| 1276 |
+
log('error', f"客户端断开后从已完成任务获取结果失败: {str(inner_e)}",
|
| 1277 |
+
extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
|
| 1278 |
+
|
| 1279 |
+
# 删除缓存,因为出现错误
|
| 1280 |
+
if cache_key and cache_key in response_cache_manager.cache:
|
| 1281 |
+
log('info', f"因任务获取结果失败,删除缓存: {cache_key[:8]}...",
|
| 1282 |
+
extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
|
| 1283 |
+
del response_cache_manager.cache[cache_key]
|
| 1284 |
+
|
| 1285 |
+
# 创建错误响应而不是返回None
|
| 1286 |
+
return create_error_response(chat_request.model, "请求处理过程中发生错误,请重试")
|
| 1287 |
+
except Exception as e:
|
| 1288 |
+
# 处理API任务异常
|
| 1289 |
+
error_msg = str(e)
|
| 1290 |
+
extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': error_msg}
|
| 1291 |
+
log('error', f"客户端断开后处理API响应时出错: {error_msg}", extra=extra_log)
|
| 1292 |
+
|
| 1293 |
+
# 删除缓存,因为出现错误
|
| 1294 |
+
if cache_key and cache_key in response_cache_manager.cache:
|
| 1295 |
+
log('info', f"因API响应错误,删除缓存: {cache_key[:8]}...",
|
| 1296 |
+
extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
|
| 1297 |
+
del response_cache_manager.cache[cache_key]
|
| 1298 |
+
|
| 1299 |
+
# 创建错误响应而不是返回None
|
| 1300 |
+
return create_error_response(chat_request.model, f"请求处理错误: {error_msg}")
|
| 1301 |
+
|
| 1302 |
+
async def process_nonstream_request(
|
| 1303 |
+
chat_request: ChatCompletionRequest,
|
| 1304 |
+
http_request: Request,
|
| 1305 |
+
request_type: str,
|
| 1306 |
+
contents,
|
| 1307 |
+
system_instruction,
|
| 1308 |
+
current_api_key: str,
|
| 1309 |
+
cache_key: str = None,
|
| 1310 |
+
client_ip: str = None
|
| 1311 |
+
):
|
| 1312 |
+
"""处理非流式API请求"""
|
| 1313 |
+
gemini_client = GeminiClient(current_api_key)
|
| 1314 |
+
|
| 1315 |
+
# 创建任务
|
| 1316 |
+
gemini_task = asyncio.create_task(
|
| 1317 |
+
run_gemini_completion(
|
| 1318 |
+
gemini_client,
|
| 1319 |
+
chat_request,
|
| 1320 |
+
contents,
|
| 1321 |
+
system_instruction,
|
| 1322 |
+
request_type,
|
| 1323 |
+
current_api_key
|
| 1324 |
+
)
|
| 1325 |
+
)
|
| 1326 |
+
|
| 1327 |
+
disconnect_task = asyncio.create_task(
|
| 1328 |
+
check_client_disconnect(
|
| 1329 |
+
http_request,
|
| 1330 |
+
current_api_key,
|
| 1331 |
+
request_type,
|
| 1332 |
+
chat_request.model
|
| 1333 |
+
)
|
| 1334 |
+
)
|
| 1335 |
+
|
| 1336 |
+
try:
|
| 1337 |
+
# 先等待看是否API任务先完成,或者客户端先断开连接
|
| 1338 |
+
done, pending = await asyncio.wait(
|
| 1339 |
+
[gemini_task, disconnect_task],
|
| 1340 |
+
return_when=asyncio.FIRST_COMPLETED
|
| 1341 |
+
)
|
| 1342 |
+
|
| 1343 |
+
if disconnect_task in done:
|
| 1344 |
+
# 客户端已断开连接,但我们仍继续完成API请求以便缓存结果
|
| 1345 |
+
return await handle_client_disconnect(
|
| 1346 |
+
gemini_task,
|
| 1347 |
+
chat_request,
|
| 1348 |
+
request_type,
|
| 1349 |
+
current_api_key,
|
| 1350 |
+
cache_key,
|
| 1351 |
+
client_ip
|
| 1352 |
+
)
|
| 1353 |
+
else:
|
| 1354 |
+
# API任务先完成,取消断开检测任务
|
| 1355 |
+
disconnect_task.cancel()
|
| 1356 |
+
|
| 1357 |
+
# 获取响应内容
|
| 1358 |
+
response_content = await gemini_task
|
| 1359 |
+
|
| 1360 |
+
# 检查缓存是否已经存在,如果存在则不再创建新缓存
|
| 1361 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 1362 |
+
if cache_hit:
|
| 1363 |
+
log('info', f"缓存已存在,直接返回: {cache_key[:8]}...",
|
| 1364 |
+
extra={'cache_operation': 'use-existing', 'request_type': request_type})
|
| 1365 |
+
|
| 1366 |
+
# 安全删除缓存
|
| 1367 |
+
if cache_key in response_cache_manager.cache:
|
| 1368 |
+
del response_cache_manager.cache[cache_key]
|
| 1369 |
+
log('info', f"缓存使用后已删除: {cache_key[:8]}...",
|
| 1370 |
+
extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
|
| 1371 |
+
|
| 1372 |
+
return cached_response
|
| 1373 |
+
|
| 1374 |
+
# 创建响应
|
| 1375 |
+
response = create_response(chat_request, response_content)
|
| 1376 |
+
|
| 1377 |
+
# 缓存响应
|
| 1378 |
+
cache_response(response, cache_key, client_ip)
|
| 1379 |
+
|
| 1380 |
+
# 立即删除缓存,确保只能使用一次
|
| 1381 |
+
if cache_key and cache_key in response_cache_manager.cache:
|
| 1382 |
+
del response_cache_manager.cache[cache_key]
|
| 1383 |
+
log('info', f"缓存创建后立即删除: {cache_key[:8]}...",
|
| 1384 |
+
extra={'cache_operation': 'store-and-remove', 'request_type': request_type})
|
| 1385 |
+
|
| 1386 |
+
# 返回响应
|
| 1387 |
+
return response
|
| 1388 |
+
|
| 1389 |
+
except asyncio.CancelledError:
|
| 1390 |
+
extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消"}
|
| 1391 |
+
log('info', "请求取消", extra=extra_log)
|
| 1392 |
+
|
| 1393 |
+
# 在请求被取消时先检查缓存中是否已有结果
|
| 1394 |
+
cached_response, cache_hit = response_cache_manager.get(cache_key)
|
| 1395 |
+
if cache_hit:
|
| 1396 |
+
log('info', f"请求取消但找到有效缓存,使用缓存响应: {cache_key[:8]}...",
|
| 1397 |
+
extra={'cache_operation': 'use-cache-on-cancel', 'request_type': request_type})
|
| 1398 |
+
|
| 1399 |
+
# 安全删除缓存
|
| 1400 |
+
if cache_key in response_cache_manager.cache:
|
| 1401 |
+
del response_cache_manager.cache[cache_key]
|
| 1402 |
+
log('info', f"缓存使用后已删除: {cache_key[:8]}...",
|
| 1403 |
+
extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
|
| 1404 |
+
|
| 1405 |
+
return cached_response
|
| 1406 |
+
|
| 1407 |
+
# 尝试完成正在进行的API请求
|
| 1408 |
+
if not gemini_task.done():
|
| 1409 |
+
log('info', "请求取消但API请求尚未完成,继续等待...",
|
| 1410 |
+
extra={'key': current_api_key[:8], 'request_type': request_type})
|
| 1411 |
+
|
| 1412 |
+
# 使用shield确保任务不会被取消
|
| 1413 |
+
response_content = await asyncio.shield(gemini_task)
|
| 1414 |
+
|
| 1415 |
+
# 创建响应
|
| 1416 |
+
response = create_response(chat_request, response_content)
|
| 1417 |
+
|
| 1418 |
+
# 不缓存这个响应,直接返回
|
| 1419 |
+
return response
|
| 1420 |
+
else:
|
| 1421 |
+
# 任务已完成,获取结果
|
| 1422 |
+
response_content = gemini_task.result()
|
| 1423 |
+
|
| 1424 |
+
# 创建响应
|
| 1425 |
+
response = create_response(chat_request, response_content)
|
| 1426 |
+
|
| 1427 |
+
# 不缓存这个响应,直接返回
|
| 1428 |
+
return response
|
| 1429 |
+
|
| 1430 |
+
except HTTPException as e:
|
| 1431 |
+
if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
|
| 1432 |
+
extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
|
| 1433 |
+
'status_code': 408, 'error_message': '客户端连接中断'}
|
| 1434 |
+
log('error', "客户端连接中断,终止后续重试", extra=extra_log)
|
| 1435 |
+
raise
|
| 1436 |
+
else:
|
| 1437 |
+
raise
|
| 1438 |
+
|
| 1439 |
+
# 添加通用响应处理函数
|
| 1440 |
+
def create_response(
|
| 1441 |
+
chat_request, response_content
|
| 1442 |
+
):
|
| 1443 |
+
"""创建标准响应对象但不缓存"""
|
| 1444 |
+
# 创建响应对象
|
| 1445 |
+
return create_chat_response(
|
| 1446 |
+
model=chat_request.model,
|
| 1447 |
+
choices=[{
|
| 1448 |
+
"index": 0,
|
| 1449 |
+
"message": {
|
| 1450 |
+
"role": "assistant",
|
| 1451 |
+
"content": response_content.text
|
| 1452 |
+
},
|
| 1453 |
+
"finish_reason": "stop"
|
| 1454 |
+
}]
|
| 1455 |
+
)
|
| 1456 |
+
|
| 1457 |
+
def cache_response(response, cache_key, client_ip):
|
| 1458 |
+
"""将响应存入缓存"""
|
| 1459 |
+
if not cache_key:
|
| 1460 |
+
return
|
| 1461 |
+
|
| 1462 |
+
# 先检查缓存是否已存在
|
| 1463 |
+
existing_cache = cache_key in response_cache_manager.cache
|
| 1464 |
+
|
| 1465 |
+
if existing_cache:
|
| 1466 |
+
log('info', f"缓存已存在,跳过存储: {cache_key[:8]}...",
|
| 1467 |
+
extra={'cache_operation': 'skip-existing', 'request_type': 'non-stream'})
|
| 1468 |
+
else:
|
| 1469 |
+
response_cache_manager.store(cache_key, response, client_ip)
|
| 1470 |
+
log('info', f"API响应已缓存: {cache_key[:8]}...",
|
| 1471 |
+
extra={'cache_operation': 'store-new', 'request_type': 'non-stream'})
|
| 1472 |
+
|
| 1473 |
+
# 更新API调用统计
|
| 1474 |
+
update_api_call_stats()
|
| 1475 |
+
|
| 1476 |
+
# 统一的API错误处理函数
|
| 1477 |
+
async def handle_api_error(e, api_key, key_manager, request_type, model, retry_count=0):
|
| 1478 |
+
"""统一处理API错误,对500和503错误实现自动重试机制"""
|
| 1479 |
+
error_detail = handle_gemini_error(e, api_key, key_manager)
|
| 1480 |
+
|
| 1481 |
+
# 处理500和503服务器错误
|
| 1482 |
+
if isinstance(e, requests.exceptions.HTTPError) and ('500' in str(e) or '503' in str(e)):
|
| 1483 |
+
status_code = '500' if '500' in str(e) else '503'
|
| 1484 |
+
|
| 1485 |
+
# 最多重试3次
|
| 1486 |
+
if retry_count < 3:
|
| 1487 |
+
wait_time = min(RETRY_DELAY * (2 ** retry_count), MAX_RETRY_DELAY)
|
| 1488 |
+
log('warning', f"Gemini服务器错误({status_code}),等待{wait_time}秒后重试 ({retry_count+1}/3)",
|
| 1489 |
+
key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
|
| 1490 |
+
|
| 1491 |
+
# 等待后返回重试信号
|
| 1492 |
+
await asyncio.sleep(wait_time)
|
| 1493 |
+
return {'should_retry': True, 'error': error_detail, 'remove_cache': False}
|
| 1494 |
+
|
| 1495 |
+
# 重试次数用尽,直接返回错误状态码
|
| 1496 |
+
log('error', f"服务器错误({status_code})重试{retry_count}次后仍然失败",
|
| 1497 |
+
key=api_key[:8], request_type=request_type, model=model, status_code=int(status_code))
|
| 1498 |
+
|
| 1499 |
+
# 不建议切换密钥,直接抛出HTTP异常
|
| 1500 |
+
raise HTTPException(status_code=int(status_code),
|
| 1501 |
+
detail=f"Gemini API 服务器错误({status_code}),请稍后重试")
|
| 1502 |
+
|
| 1503 |
+
# 对于其他错误,返回切换密钥的信号
|
| 1504 |
+
log('error', f"API错误: {error_detail}",
|
| 1505 |
+
key=api_key[:8], request_type=request_type, model=model, error_message=error_detail)
|
| 1506 |
+
return {'should_retry': False, 'should_switch_key': True, 'error': error_detail, 'remove_cache': True}
|
app/models.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Optional, Union, Literal
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
|
| 4 |
+
class Message(BaseModel):
|
| 5 |
+
role: str
|
| 6 |
+
content: str
|
| 7 |
+
|
| 8 |
+
class ChatCompletionRequest(BaseModel):
|
| 9 |
+
model: str
|
| 10 |
+
messages: List[Message]
|
| 11 |
+
temperature: float = 0.7
|
| 12 |
+
top_p: Optional[float] = 1.0
|
| 13 |
+
n: int = 1
|
| 14 |
+
stream: bool = False
|
| 15 |
+
stop: Optional[Union[str, List[str]]] = None
|
| 16 |
+
max_tokens: Optional[int] = None
|
| 17 |
+
presence_penalty: Optional[float] = 0.0
|
| 18 |
+
frequency_penalty: Optional[float] = 0.0
|
| 19 |
+
|
| 20 |
+
class Choice(BaseModel):
|
| 21 |
+
index: int
|
| 22 |
+
message: Message
|
| 23 |
+
finish_reason: Optional[str] = None
|
| 24 |
+
|
| 25 |
+
class Usage(BaseModel):
|
| 26 |
+
prompt_tokens: int = 0
|
| 27 |
+
completion_tokens: int = 0
|
| 28 |
+
total_tokens: int = 0
|
| 29 |
+
|
| 30 |
+
class ChatCompletionResponse(BaseModel):
|
| 31 |
+
id: str
|
| 32 |
+
object: Literal["chat.completion"]
|
| 33 |
+
created: int
|
| 34 |
+
model: str
|
| 35 |
+
choices: List[Choice]
|
| 36 |
+
usage: Usage = Field(default_factory=Usage)
|
| 37 |
+
|
| 38 |
+
class ErrorResponse(BaseModel):
|
| 39 |
+
message: str
|
| 40 |
+
type: str
|
| 41 |
+
param: Optional[str] = None
|
| 42 |
+
code: Optional[str] = None
|
| 43 |
+
|
| 44 |
+
class ModelList(BaseModel):
|
| 45 |
+
object: str = "list"
|
| 46 |
+
data: List[Dict]
|
app/utils.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from fastapi import HTTPException, Request
|
| 3 |
+
import time
|
| 4 |
+
import re
|
| 5 |
+
from datetime import datetime, timedelta
|
| 6 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
| 7 |
+
import os
|
| 8 |
+
import requests
|
| 9 |
+
import httpx
|
| 10 |
+
from threading import Lock
|
| 11 |
+
import logging
|
| 12 |
+
import sys
|
| 13 |
+
from collections import deque
|
| 14 |
+
|
| 15 |
+
DEBUG = os.environ.get("DEBUG", "false").lower() == "true"
|
| 16 |
+
LOG_FORMAT_DEBUG = '%(asctime)s - %(levelname)s - [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s - %(error_message)s'
|
| 17 |
+
LOG_FORMAT_NORMAL = '[%(asctime)s] [%(levelname)s] [%(key)s]-%(request_type)s-[%(model)s]-%(status_code)s: %(message)s'
|
| 18 |
+
|
| 19 |
+
# 配置 logger
|
| 20 |
+
logger = logging.getLogger("my_logger")
|
| 21 |
+
logger.setLevel(logging.DEBUG)
|
| 22 |
+
|
| 23 |
+
# 控制台处理器
|
| 24 |
+
console_handler = logging.StreamHandler()
|
| 25 |
+
console_formatter = logging.Formatter('%(message)s')
|
| 26 |
+
console_handler.setFormatter(console_formatter)
|
| 27 |
+
logger.addHandler(console_handler)
|
| 28 |
+
|
| 29 |
+
# 日志缓存,用于在网页上显示最近的日志
|
| 30 |
+
class LogManager:
|
| 31 |
+
def __init__(self, max_logs=100):
|
| 32 |
+
self.logs = deque(maxlen=max_logs) # 使用双端队列存储最近的日志
|
| 33 |
+
self.lock = Lock()
|
| 34 |
+
|
| 35 |
+
def add_log(self, log_entry):
|
| 36 |
+
with self.lock:
|
| 37 |
+
self.logs.append(log_entry)
|
| 38 |
+
|
| 39 |
+
def get_recent_logs(self, count=50):
|
| 40 |
+
with self.lock:
|
| 41 |
+
return list(self.logs)[-count:]
|
| 42 |
+
|
| 43 |
+
# 创建日志管理器实例
|
| 44 |
+
log_manager = LogManager()
|
| 45 |
+
|
| 46 |
+
def format_log_message(level, message, extra=None):
|
| 47 |
+
extra = extra or {}
|
| 48 |
+
log_values = {
|
| 49 |
+
'asctime': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 50 |
+
'levelname': level,
|
| 51 |
+
'key': extra.get('key', 'N/A'),
|
| 52 |
+
'request_type': extra.get('request_type', 'N/A'),
|
| 53 |
+
'model': extra.get('model', 'N/A'),
|
| 54 |
+
'status_code': extra.get('status_code', 'N/A'),
|
| 55 |
+
'error_message': extra.get('error_message', ''),
|
| 56 |
+
'message': message
|
| 57 |
+
}
|
| 58 |
+
log_format = LOG_FORMAT_DEBUG if DEBUG else LOG_FORMAT_NORMAL
|
| 59 |
+
formatted_log = log_format % log_values
|
| 60 |
+
|
| 61 |
+
# 将格式化后的日志添加到日志管理器
|
| 62 |
+
log_entry = {
|
| 63 |
+
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 64 |
+
'level': level,
|
| 65 |
+
'key': extra.get('key', 'N/A'),
|
| 66 |
+
'request_type': extra.get('request_type', 'N/A'),
|
| 67 |
+
'model': extra.get('model', 'N/A'),
|
| 68 |
+
'status_code': extra.get('status_code', 'N/A'),
|
| 69 |
+
'message': message,
|
| 70 |
+
'error_message': extra.get('error_message', ''),
|
| 71 |
+
'formatted': formatted_log
|
| 72 |
+
}
|
| 73 |
+
log_manager.add_log(log_entry)
|
| 74 |
+
|
| 75 |
+
return formatted_log
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class APIKeyManager:
|
| 79 |
+
def __init__(self):
|
| 80 |
+
self.api_keys = re.findall(
|
| 81 |
+
r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('GEMINI_API_KEYS', ""))
|
| 82 |
+
self.key_stack = [] # 初始化密钥栈
|
| 83 |
+
self._reset_key_stack() # 初始化时创建随机密钥栈
|
| 84 |
+
# self.api_key_blacklist = set()
|
| 85 |
+
# self.api_key_blacklist_duration = 60
|
| 86 |
+
self.scheduler = BackgroundScheduler()
|
| 87 |
+
self.scheduler.start()
|
| 88 |
+
self.tried_keys_for_request = set() # 用于跟踪当前请求尝试中已试过的 key
|
| 89 |
+
|
| 90 |
+
def _reset_key_stack(self):
|
| 91 |
+
"""创建并随机化密钥栈"""
|
| 92 |
+
shuffled_keys = self.api_keys[:] # 创建 api_keys 的副本以避免直接修改原列表
|
| 93 |
+
random.shuffle(shuffled_keys)
|
| 94 |
+
self.key_stack = shuffled_keys
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def get_available_key(self):
|
| 98 |
+
"""从栈顶获取密钥,栈空时重新生成 (修改后)"""
|
| 99 |
+
while self.key_stack:
|
| 100 |
+
key = self.key_stack.pop()
|
| 101 |
+
# if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
|
| 102 |
+
if key not in self.tried_keys_for_request:
|
| 103 |
+
self.tried_keys_for_request.add(key)
|
| 104 |
+
return key
|
| 105 |
+
|
| 106 |
+
if not self.api_keys:
|
| 107 |
+
log_msg = format_log_message('ERROR', "没有配置任何 API 密钥!")
|
| 108 |
+
logger.error(log_msg)
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
+
self._reset_key_stack() # 重新生成密钥栈
|
| 112 |
+
|
| 113 |
+
# 再次尝试从新栈中获取密钥 (迭代一次)
|
| 114 |
+
while self.key_stack:
|
| 115 |
+
key = self.key_stack.pop()
|
| 116 |
+
# if key not in self.api_key_blacklist and key not in self.tried_keys_for_request:
|
| 117 |
+
if key not in self.tried_keys_for_request:
|
| 118 |
+
self.tried_keys_for_request.add(key)
|
| 119 |
+
return key
|
| 120 |
+
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def show_all_keys(self):
|
| 125 |
+
log_msg = format_log_message('INFO', f"当前可用API key个数: {len(self.api_keys)} ")
|
| 126 |
+
logger.info(log_msg)
|
| 127 |
+
for i, api_key in enumerate(self.api_keys):
|
| 128 |
+
log_msg = format_log_message('INFO', f"API Key{i}: {api_key[:8]}...{api_key[-3:]}")
|
| 129 |
+
logger.info(log_msg)
|
| 130 |
+
|
| 131 |
+
# def blacklist_key(self, key):
|
| 132 |
+
# log_msg = format_log_message('WARNING', f"{key[:8]} → 暂时禁用 {self.api_key_blacklist_duration} 秒")
|
| 133 |
+
# logger.warning(log_msg)
|
| 134 |
+
# self.api_key_blacklist.add(key)
|
| 135 |
+
# self.scheduler.add_job(lambda: self.api_key_blacklist.discard(key), 'date',
|
| 136 |
+
# run_date=datetime.now() + timedelta(seconds=self.api_key_blacklist_duration))
|
| 137 |
+
|
| 138 |
+
def reset_tried_keys_for_request(self):
|
| 139 |
+
"""在新的请求尝试时重置已尝试的 key 集合"""
|
| 140 |
+
self.tried_keys_for_request = set()
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def handle_gemini_error(error, current_api_key, key_manager) -> str:
|
| 144 |
+
if isinstance(error, requests.exceptions.HTTPError):
|
| 145 |
+
status_code = error.response.status_code
|
| 146 |
+
if status_code == 400:
|
| 147 |
+
try:
|
| 148 |
+
error_data = error.response.json()
|
| 149 |
+
if 'error' in error_data:
|
| 150 |
+
if error_data['error'].get('code') == "invalid_argument":
|
| 151 |
+
error_message = "无效的 API 密钥"
|
| 152 |
+
extra_log_invalid_key = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 153 |
+
log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 无效,可能已过期或被删除", extra=extra_log_invalid_key)
|
| 154 |
+
logger.error(log_msg)
|
| 155 |
+
# key_manager.blacklist_key(current_api_key)
|
| 156 |
+
|
| 157 |
+
return error_message
|
| 158 |
+
error_message = error_data['error'].get(
|
| 159 |
+
'message', 'Bad Request')
|
| 160 |
+
extra_log_400 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 161 |
+
log_msg = format_log_message('WARNING', f"400 错误请求: {error_message}", extra=extra_log_400)
|
| 162 |
+
logger.warning(log_msg)
|
| 163 |
+
return f"400 错误请求: {error_message}"
|
| 164 |
+
except ValueError:
|
| 165 |
+
error_message = "400 错误请求:响应不是有效的JSON格式"
|
| 166 |
+
extra_log_400_json = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 167 |
+
log_msg = format_log_message('WARNING', error_message, extra=extra_log_400_json)
|
| 168 |
+
logger.warning(log_msg)
|
| 169 |
+
return error_message
|
| 170 |
+
|
| 171 |
+
elif status_code == 429:
|
| 172 |
+
error_message = "API 密钥配额已用尽或其他原因"
|
| 173 |
+
extra_log_429 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 174 |
+
log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 429 官方资源耗尽或其他原因", extra=extra_log_429)
|
| 175 |
+
logger.warning(log_msg)
|
| 176 |
+
# key_manager.blacklist_key(current_api_key)
|
| 177 |
+
|
| 178 |
+
return error_message
|
| 179 |
+
|
| 180 |
+
elif status_code == 403:
|
| 181 |
+
error_message = "权限被拒绝"
|
| 182 |
+
extra_log_403 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 183 |
+
log_msg = format_log_message('ERROR', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 403 权限被拒绝", extra=extra_log_403)
|
| 184 |
+
logger.error(log_msg)
|
| 185 |
+
# key_manager.blacklist_key(current_api_key)
|
| 186 |
+
|
| 187 |
+
return error_message
|
| 188 |
+
elif status_code == 500:
|
| 189 |
+
error_message = "服务器内部错误"
|
| 190 |
+
extra_log_500 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 191 |
+
log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 500 服务器内部错误", extra=extra_log_500)
|
| 192 |
+
logger.warning(log_msg)
|
| 193 |
+
|
| 194 |
+
return "Gemini API 内部错误"
|
| 195 |
+
|
| 196 |
+
elif status_code == 503:
|
| 197 |
+
error_message = "服务不可用"
|
| 198 |
+
extra_log_503 = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 199 |
+
log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → 503 服务不可用", extra=extra_log_503)
|
| 200 |
+
logger.warning(log_msg)
|
| 201 |
+
|
| 202 |
+
return "Gemini API 服务不可用"
|
| 203 |
+
else:
|
| 204 |
+
error_message = f"未知错误: {status_code}"
|
| 205 |
+
extra_log_other = {'key': current_api_key[:8], 'status_code': status_code, 'error_message': error_message}
|
| 206 |
+
log_msg = format_log_message('WARNING', f"{current_api_key[:8]} ... {current_api_key[-3:]} → {status_code} 未知错误", extra=extra_log_other)
|
| 207 |
+
logger.warning(log_msg)
|
| 208 |
+
|
| 209 |
+
return f"未知错误/模型不可用: {status_code}"
|
| 210 |
+
|
| 211 |
+
elif isinstance(error, requests.exceptions.ConnectionError):
|
| 212 |
+
error_message = "连接错误"
|
| 213 |
+
log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
|
| 214 |
+
logger.warning(log_msg)
|
| 215 |
+
return error_message
|
| 216 |
+
|
| 217 |
+
elif isinstance(error, requests.exceptions.Timeout):
|
| 218 |
+
error_message = "请求超时"
|
| 219 |
+
log_msg = format_log_message('WARNING', error_message, extra={'error_message': error_message})
|
| 220 |
+
logger.warning(log_msg)
|
| 221 |
+
return error_message
|
| 222 |
+
else:
|
| 223 |
+
error_message = f"发生未知错误: {error}"
|
| 224 |
+
log_msg = format_log_message('ERROR', error_message, extra={'error_message': error_message})
|
| 225 |
+
logger.error(log_msg)
|
| 226 |
+
return error_message
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
async def test_api_key(api_key: str) -> bool:
|
| 230 |
+
"""
|
| 231 |
+
测试 API 密钥是否有效。
|
| 232 |
+
"""
|
| 233 |
+
try:
|
| 234 |
+
url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(api_key)
|
| 235 |
+
async with httpx.AsyncClient() as client:
|
| 236 |
+
response = await client.get(url)
|
| 237 |
+
response.raise_for_status()
|
| 238 |
+
return True
|
| 239 |
+
except Exception:
|
| 240 |
+
return False
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
rate_limit_data = {}
|
| 244 |
+
rate_limit_lock = Lock()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def protect_from_abuse(request: Request, max_requests_per_minute: int = 30, max_requests_per_day_per_ip: int = 600):
|
| 248 |
+
now = int(time.time())
|
| 249 |
+
minute = now // 60
|
| 250 |
+
day = now // (60 * 60 * 24)
|
| 251 |
+
|
| 252 |
+
minute_key = f"{request.url.path}:{minute}"
|
| 253 |
+
day_key = f"{request.client.host}:{day}"
|
| 254 |
+
|
| 255 |
+
with rate_limit_lock:
|
| 256 |
+
minute_count, minute_timestamp = rate_limit_data.get(
|
| 257 |
+
minute_key, (0, now))
|
| 258 |
+
if now - minute_timestamp >= 60:
|
| 259 |
+
minute_count = 0
|
| 260 |
+
minute_timestamp = now
|
| 261 |
+
minute_count += 1
|
| 262 |
+
rate_limit_data[minute_key] = (minute_count, minute_timestamp)
|
| 263 |
+
|
| 264 |
+
day_count, day_timestamp = rate_limit_data.get(day_key, (0, now))
|
| 265 |
+
if now - day_timestamp >= 86400:
|
| 266 |
+
day_count = 0
|
| 267 |
+
day_timestamp = now
|
| 268 |
+
day_count += 1
|
| 269 |
+
rate_limit_data[day_key] = (day_count, day_timestamp)
|
| 270 |
+
|
| 271 |
+
if minute_count > max_requests_per_minute:
|
| 272 |
+
raise HTTPException(status_code=429, detail={
|
| 273 |
+
"message": "Too many requests per minute", "limit": max_requests_per_minute})
|
| 274 |
+
if day_count > max_requests_per_day_per_ip:
|
| 275 |
+
raise HTTPException(status_code=429, detail={"message": "Too many requests per day from this IP", "limit": max_requests_per_day_per_ip})
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
httpx
|
| 4 |
+
python-dotenv
|
| 5 |
+
requests
|
| 6 |
+
apscheduler
|
| 7 |
+
jinja2
|
version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
version=0.0.3
|