Spaces:

clash-linux
/

hajimi

Paused

App Files Files Community

clash-linux commited on Apr 5, 2025

Commit

8fea9ab

verified ·

1 Parent(s): 832e5ec

Upload 32 files

Browse files

Files changed (12) hide show

app/api/auth.py +14 -0
app/api/client_disconnect.py +120 -0
app/api/gemini_handlers.py +57 -0
app/api/logging_utils.py +11 -0
app/api/nonstream_handlers.py +157 -0
app/api/request_handlers.py +165 -0
app/api/routes.py +47 -741
app/api/stream_handlers.py +284 -0
app/config/safety.py +48 -48
app/config/settings.py +5 -1
app/main.py +291 -291
app/services/gemini.py +360 -345

app/api/auth.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from fastapi import HTTPException, Request
+# 密码验证依赖
+async def verify_password(request: Request, PASSWORD: str = None):
+    """验证请求中的Bearer令牌是否与配置的密码匹配"""
+    if PASSWORD:
+        auth_header = request.headers.get("Authorization")
+        if not auth_header or not auth_header.startswith("Bearer "):
+            raise HTTPException(
+                status_code=401, detail="Unauthorized: Missing or invalid token")
+        token = auth_header.split(" ")[1]
+        if token != PASSWORD:
+            raise HTTPException(
+                status_code=401, detail="Unauthorized: Invalid token")

app/api/client_disconnect.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import asyncio
+import time
+from fastapi import Request
+from app.models import ChatCompletionRequest
+from app.utils import create_error_response
+from .logging_utils import log
+# 客户端断开检测函数
+async def check_client_disconnect(http_request: Request, current_api_key: str, request_type: str, model: str):
+    """检查客户端是否断开连接"""
+    while True:
+        if await http_request.is_disconnected():
+            extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': model, 'error_message': '检测到客户端断开连接'}
+            log('info', "客户端连接已中断，等待API请求完成", extra=extra_log)
+            return True
+        await asyncio.sleep(0.5)
+# 客户端断开处理函数
+async def handle_client_disconnect(
+    gemini_task: asyncio.Task,
+    chat_request: ChatCompletionRequest,
+    request_type: str,
+    current_api_key: str,
+    response_cache_manager,
+    cache_key: str = None,
+    client_ip: str = None
+):
+    try:
+        # 等待API任务完成，使用shield防止它被取消
+        response_content = await asyncio.shield(gemini_task)
+        # 检查响应文本是否为空
+        if response_content is None or response_content.text == "":
+            if response_content is None:
+                log('info', "客户端断开后API任务返回None",
+                    extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+            else:
+                extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
+                log('info', "客户端断开后Gemini API 返回空响应", extra=extra_log)
+            # 删除任何现有缓存，因为响应为空
+            if cache_key and cache_key in response_cache_manager.cache:
+                log('info', f"因空响应，删除缓存: {cache_key[:8]}...",
+                    extra={'cache_operation': 'remove-on-empty', 'request_type': request_type})
+                del response_cache_manager.cache[cache_key]
+            # 返回错误响应而不是None
+            return create_error_response(chat_request.model, "AI未返回任何内容，请重试")
+        # 首先检查是否有现有缓存
+        cached_response, cache_hit = response_cache_manager.get(cache_key)
+        if cache_hit:
+            log('info', f"客户端断开但找到已存在缓存，将删除: {cache_key[:8]}...",
+                extra={'cache_operation': 'disconnect-found-cache', 'request_type': request_type})
+            # 安全删除缓存
+            if cache_key in response_cache_manager.cache:
+                del response_cache_manager.cache[cache_key]
+            # 不返回缓存，而是创建新响应并缓存
+        # 创建新响应
+        from app.utils.response import create_response
+        response = create_response(chat_request, response_content)
+        # 客户端已断开，此响应不会实际发送，可以考虑将其缓存以供后续使用
+        # 如果确实需要缓存，则可以取消下面的注释
+        # cache_response(response, cache_key, client_ip)
+        return response
+    except asyncio.CancelledError:
+        # 对于取消异常，仍然尝试继续完成任务
+        log('info', "客户端断开后任务被取消，但我们仍会尝试完成",
+            extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+        # 检查任务是否已经完成
+        if gemini_task.done() and not gemini_task.cancelled():
+            try:
+                response_content = gemini_task.result()
+                # 首先检查是否有现有缓存
+                cached_response, cache_hit = response_cache_manager.get(cache_key)
+                if cache_hit:
+                    log('info', f"任务被取消但找到已存在缓存，将删除: {cache_key[:8]}...",
+                        extra={'cache_operation': 'cancel-found-cache', 'request_type': request_type})
+                    # 安全删除缓存
+                    if cache_key in response_cache_manager.cache:
+                        del response_cache_manager.cache[cache_key]
+                # 创建但不缓存响应
+                from app.utils.response import create_response
+                response = create_response(chat_request, response_content)
+                return response
+            except Exception as inner_e:
+                log('error', f"客户端断开后从已完成任务获取结果失败: {str(inner_e)}",
+                    extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+                # 删除缓存，因为出现错误
+                if cache_key and cache_key in response_cache_manager.cache:
+                    log('info', f"因任务获取结果失败，删除缓存: {cache_key[:8]}...",
+                        extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
+                    del response_cache_manager.cache[cache_key]
+        # 创建错误响应而不是返回None
+        return create_error_response(chat_request.model, "请求处理过程中发生错误，请重试")
+    except Exception as e:
+        # 处理API任务异常
+        error_msg = str(e)
+        extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': error_msg}
+        log('error', f"客户端断开后处理API响应时出错: {error_msg}", extra=extra_log)
+        # 删除缓存，因为出现错误
+        if cache_key and cache_key in response_cache_manager.cache:
+            log('info', f"因API响应错误，删除缓存: {cache_key[:8]}...",
+                extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
+            del response_cache_manager.cache[cache_key]
+        # 创建错误响应而不是返回None
+        return create_error_response(chat_request.model, f"请求处理错误: {error_msg}")

app/api/gemini_handlers.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import asyncio
+from app.models import ChatCompletionRequest
+from app.services import GeminiClient
+from .logging_utils import log
+# Gemini完成请求函数
+async def run_gemini_completion(
+    gemini_client,
+    chat_request: ChatCompletionRequest,
+    contents,
+    system_instruction,
+    request_type: str,
+    current_api_key: str,
+    safety_settings,
+    safety_settings_g2
+):
+    """运行Gemini非流式请求"""
+    # 记录函数调用状态
+    run_fn = run_gemini_completion
+    try:
+        # 创建一个不会被客户端断开影响的任务
+        response_future = asyncio.create_task(
+            asyncio.to_thread(
+                gemini_client.complete_chat,
+                chat_request,
+                contents,
+                safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
+                system_instruction
+            )
+        )
+        # 使用shield防止任务被外部取消
+        response_content = await asyncio.shield(response_future)
+        # 只在第一次调用时记录完成日志
+        if not hasattr(run_fn, 'logged_complete'):
+            log('info', "非流式请求成功完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+            run_fn.logged_complete = True
+        return response_content
+    except asyncio.CancelledError:
+        # 即使任务被取消，我们也确保正在进行的API请求能够完成
+        if 'response_future' in locals() and not response_future.done():
+            try:
+                # 使用shield确保任务不被取消，并等待它完成
+                response_content = await asyncio.shield(response_future)
+                log('info', "API请求在客户端断开后完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+                return response_content
+            except Exception as e:
+                extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': f'API请求在客户端断开后失败: {str(e)}'}
+                log('info', "API调用因客户端断开而失败", extra=extra_log_gemini_cancel)
+                raise
+        # 如果任务尚未开始或已经失败，记录日志
+        extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
+        log('info', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
+        raise

app/api/logging_utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import logging
+from app.utils import format_log_message
+# 获取logger
+logger = logging.getLogger("my_logger")
+# 日志记录函数
+def log(level: str, message: str, **extra):
+    """简化日志记录的统一函数"""
+    msg = format_log_message(level.upper(), message, extra=extra)
+    getattr(logger, level.lower())(msg)

app/api/nonstream_handlers.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import asyncio
+from fastapi import HTTPException, status, Request
+from app.models import ChatCompletionRequest
+from app.services import GeminiClient
+from app.utils import cache_response, update_api_call_stats
+from .logging_utils import log
+from .client_disconnect import check_client_disconnect, handle_client_disconnect
+from .gemini_handlers import run_gemini_completion
+# 非流式请求处理函数
+async def process_nonstream_request(
+    chat_request: ChatCompletionRequest,
+    http_request: Request,
+    request_type: str,
+    contents,
+    system_instruction,
+    current_api_key: str,
+    response_cache_manager,
+    active_requests_manager,
+    safety_settings,
+    safety_settings_g2,
+    api_call_stats,
+    cache_key: str = None,
+    client_ip: str = None
+):
+    """处理非流式API请求"""
+    gemini_client = GeminiClient(current_api_key)
+    # 创建任务
+    gemini_task = asyncio.create_task(
+        run_gemini_completion(
+            gemini_client,
+            chat_request,
+            contents,
+            system_instruction,
+            request_type,
+            current_api_key,
+            safety_settings,
+            safety_settings_g2
+        )
+    )
+    disconnect_task = asyncio.create_task(
+        check_client_disconnect(
+            http_request,
+            current_api_key,
+            request_type,
+            chat_request.model
+        )
+    )
+    try:
+        # 先等待看是否API任务先完成，或者客户端先断开连接
+        done, pending = await asyncio.wait(
+            [gemini_task, disconnect_task],
+            return_when=asyncio.FIRST_COMPLETED
+        )
+        if disconnect_task in done:
+            # 客户端已断开连接，但我们仍继续完成API请求以便缓存结果
+            return await handle_client_disconnect(
+                gemini_task,
+                chat_request,
+                request_type,
+                current_api_key,
+                response_cache_manager,
+                cache_key,
+                client_ip
+            )
+        else:
+            # API任务先完成，取消断开检测任务
+            disconnect_task.cancel()
+            # 获取响应内容
+            response_content = await gemini_task
+            # 检查缓存是否已经存在，如果存在则不再创建新缓存
+            cached_response, cache_hit = response_cache_manager.get(cache_key)
+            if cache_hit:
+                log('info', f"缓存已存在，直接返回: {cache_key[:8]}...",
+                    extra={'cache_operation': 'use-existing', 'request_type': request_type})
+                # 安全删除缓存
+                if cache_key in response_cache_manager.cache:
+                    del response_cache_manager.cache[cache_key]
+                    log('info', f"缓存使用后已删除: {cache_key[:8]}...",
+                        extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
+                return cached_response
+            # 创建响应
+            from app.utils.response import create_response
+            response = create_response(chat_request, response_content)
+            # 缓存响应
+            cache_response(response, cache_key, client_ip, response_cache_manager, update_api_call_stats, api_key=current_api_key)
+            # 立即删除缓存，确保只能使用一次
+            if cache_key and cache_key in response_cache_manager.cache:
+                del response_cache_manager.cache[cache_key]
+                log('info', f"缓存创建后立即删除: {cache_key[:8]}...",
+                    extra={'cache_operation': 'store-and-remove', 'request_type': request_type})
+            # 返回响应
+            return response
+    except asyncio.CancelledError:
+        extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消"}
+        log('info', "请求取消", extra=extra_log)
+        # 在请求被取消时先检查缓存中是否已有结果
+        cached_response, cache_hit = response_cache_manager.get(cache_key)
+        if cache_hit:
+            log('info', f"请求取消但找到有效缓存，使用缓存响应: {cache_key[:8]}...",
+                extra={'cache_operation': 'use-cache-on-cancel', 'request_type': request_type})
+            # 安全删除缓存
+            if cache_key in response_cache_manager.cache:
+                del response_cache_manager.cache[cache_key]
+                log('info', f"缓存使用后已删除: {cache_key[:8]}...",
+                    extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
+            return cached_response
+        # 尝试完成正在进行的API请求
+        if not gemini_task.done():
+            log('info', "请求取消但API请求尚未完成，继续等待...",
+                extra={'key': current_api_key[:8], 'request_type': request_type})
+            # 使用shield确保任务不会被取消
+            response_content = await asyncio.shield(gemini_task)
+            # 创建响应
+            from app.utils.response import create_response
+            response = create_response(chat_request, response_content)
+            # 不缓存这个响应，直接返回
+            return response
+        else:
+            # 任务已完成，获取结果
+            response_content = gemini_task.result()
+            # 创建响应
+            from app.utils.response import create_response
+            response = create_response(chat_request, response_content)
+            # 不缓存这个响应，直接返回
+            return response
+    except HTTPException as e:
+        if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
+            extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
+                        'status_code': 408, 'error_message': '客户端连接中断'}
+            log('error', "客户端连接中断，终止后续重试", extra=extra_log)
+            raise
+        else:
+            raise

app/api/request_handlers.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import asyncio
+import json
+from typing import Literal
+from fastapi import HTTPException, Request, status
+from fastapi.responses import StreamingResponse
+from app.models import ChatCompletionRequest
+from app.services import GeminiClient
+from app.utils import protect_from_abuse, handle_gemini_error, handle_api_error
+from .logging_utils import log
+from .stream_handlers import process_stream_request
+from .nonstream_handlers import process_nonstream_request
+# 请求处理函数
+async def process_request(
+    chat_request: ChatCompletionRequest,
+    http_request: Request,
+    request_type: Literal['stream', 'non-stream'],
+    key_manager,
+    response_cache_manager,
+    active_requests_manager,
+    safety_settings,
+    safety_settings_g2,
+    api_call_stats,
+    FAKE_STREAMING,
+    FAKE_STREAMING_INTERVAL,
+    MAX_REQUESTS_PER_MINUTE,
+    MAX_REQUESTS_PER_DAY_PER_IP,
+    cache_key: str = None,
+    client_ip: str = None
+):
+    """处理API请求的主函数，根据需要处理流式或非流式请求"""
+    global current_api_key
+    # 请求前基本检查
+    protect_from_abuse(
+        http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
+    if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
+        error_msg = "无效的模型"
+        extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
+        log('error', error_msg, extra=extra_log)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
+    # 重置已尝试的密钥
+    key_manager.reset_tried_keys_for_request()
+    # 转换消息格式
+    contents, system_instruction = GeminiClient.convert_messages(
+        GeminiClient, chat_request.messages)
+    # 设置重试次数（使用可用API密钥数量作为最大重试次数）
+    retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1
+    # 尝试使用不同API密钥
+    for attempt in range(1, retry_attempts + 1):
+        # 获取下一个密钥
+        current_api_key = key_manager.get_available_key()
+        # 检查API密钥是否可用
+        if current_api_key is None:
+            log('warning', "没有可用的 API 密钥，跳过本次尝试",
+                extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
+            break
+        # 记录当前尝试的密钥信息
+        log('info', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...",
+            extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
+        # 服务器错误重试逻辑
+        server_error_retries = 3
+        for server_retry in range(1, server_error_retries + 1):
+            try:
+                # 根据请求类型分别处理
+                if chat_request.stream:
+                    try:
+                        return await process_stream_request(
+                            chat_request,
+                            http_request,
+                            contents,
+                            system_instruction,
+                            current_api_key,
+                            key_manager,
+                            safety_settings,
+                            safety_settings_g2,
+                            api_call_stats,
+                            FAKE_STREAMING,
+                            FAKE_STREAMING_INTERVAL
+                        )
+                    except Exception as e:
+                        # 捕获流式请求的异常，但不立即返回错误
+                        # 记录错误并继续尝试下一个API密钥
+                        error_detail = handle_gemini_error(e, current_api_key, key_manager)
+                        log('error', f"流式请求失败: {error_detail}",
+                            extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
+                        # 不返回错误，而是抛出异常让外层循环处理
+                        raise
+                else:
+                    return await process_nonstream_request(
+                        chat_request,
+                        http_request,
+                        request_type,
+                        contents,
+                        system_instruction,
+                        current_api_key,
+                        response_cache_manager,
+                        active_requests_manager,
+                        safety_settings,
+                        safety_settings_g2,
+                        api_call_stats,
+                        cache_key,
+                        client_ip
+                    )
+            except HTTPException as e:
+                if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
+                    log('error', "客户端连接中断",
+                        extra={'key': current_api_key[:8], 'request_type': request_type,
+                              'model': chat_request.model, 'status_code': 408})
+                    raise
+                else:
+                    raise
+            except Exception as e:
+                # 使用统一的API错误处理函数
+                error_result = await handle_api_error(
+                    e,
+                    current_api_key,
+                    key_manager,
+                    request_type,
+                    chat_request.model,
+                    server_retry - 1
+                )
+                # 如果需要删除缓存，清除缓存
+                if error_result.get('remove_cache', False) and cache_key and cache_key in response_cache_manager.cache:
+                    log('info', f"因API错误，删除缓存: {cache_key[:8]}...",
+                        extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
+                    del response_cache_manager.cache[cache_key]
+                if error_result.get('should_retry', False):
+                    # 服务器错误需要重试（等待已在handle_api_error中完成）
+                    continue
+                elif error_result.get('should_switch_key', False) and attempt < retry_attempts:
+                    # 跳出服务器错误重试循环，获取下一个可用密钥
+                    log('info', f"API密钥 {current_api_key[:8]}... 失败，准备尝试下一个密钥",
+                        extra={'key': current_api_key[:8], 'request_type': request_type})
+                    break
+                else:
+                    # 无法处理的错误或已达到重试上限
+                    break
+    # 如果所有尝试都失败
+    msg = "所有API密钥均请求失败,请稍后重试"
+    log('error', "API key 替换失败，所有API key都已尝试，请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
+    # 对于流式请求，创建一个特殊的StreamingResponse返回错误
+    if chat_request.stream:
+        async def error_generator():
+            error_json = json.dumps({'error': {'message': msg, 'type': 'api_error'}})
+            yield f"data: {error_json}\n\n"
+            yield "data: [DONE]\n\n"
+        return StreamingResponse(error_generator(), media_type="text/event-stream")
+    else:
+        # 非流式请求使用标准HTTP异常
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)

app/api/routes.py CHANGED Viewed

@@ -1,33 +1,25 @@
 from fastapi import APIRouter, HTTPException, Request, Depends, status
 from fastapi.responses import JSONResponse, StreamingResponse
 from app.models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
-from app.services import GeminiClient, ResponseWrapper
 from app.utils import (
-    handle_gemini_error,
-    protect_from_abuse,
-    APIKeyManager,
-    test_api_key,
-    format_log_message,
-    log_manager,
     generate_cache_key,
     cache_response,
     create_chat_response,
-    create_error_response,
-    handle_api_error,
-    update_api_call_stats
 )
-import json
-import asyncio
-import time
-import logging
-import random
-from typing import Literal
 from app.config.settings import (
     api_call_stats,
     BLOCKED_MODELS
 )
-# 获取logger
-logger = logging.getLogger("my_logger")
 # 创建路由器
 router = APIRouter()
@@ -76,23 +68,9 @@ def init_router(
     MAX_REQUESTS_PER_MINUTE = _max_requests_per_minute
     MAX_REQUESTS_PER_DAY_PER_IP = _max_requests_per_day_per_ip
-# 日志记录函数
-def log(level: str, message: str, **extra):
-    """简化日志记录的统一函数"""
-    msg = format_log_message(level.upper(), message, extra=extra)
-    getattr(logger, level.lower())(msg)
-# 密码验证依赖
-async def verify_password(request: Request):
-    if PASSWORD:
-        auth_header = request.headers.get("Authorization")
-        if not auth_header or not auth_header.startswith("Bearer "):
-            raise HTTPException(
-                status_code=401, detail="Unauthorized: Missing or invalid token")
-        token = auth_header.split(" ")[1]
-        if token != PASSWORD:
-            raise HTTPException(
-                status_code=401, detail="Unauthorized: Invalid token")
 # API路由
 @router.get("/v1/models", response_model=ModelList)
@@ -102,13 +80,27 @@ def list_models():
     return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in filtered_models])
 @router.post("/v1/chat/completions", response_model=ChatCompletionResponse)
-async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(verify_password)):
     # 获取客户端IP
     client_ip = http_request.client.host if http_request.client else "unknown"
     # 流式请求直接处理，不使用缓存
     if request.stream:
-        return await process_request(request, http_request, "stream")
     # 生成完整缓存键 - 用于精确匹配
     cache_key = generate_cache_key(request)
@@ -191,7 +183,23 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
     # 创建请求处理任务
     process_task = asyncio.create_task(
-        process_request(request, http_request, "non-stream", cache_key=cache_key, client_ip=client_ip)
     )
     # 将任务添加到活跃请求池
@@ -213,706 +221,4 @@ async def chat_completions(request: ChatCompletionRequest, http_request: Request
             return cached_response
         # 重新抛出异常
-        raise
-# 请求处理函数
-async def process_request(chat_request: ChatCompletionRequest, http_request: Request, request_type: Literal['stream', 'non-stream'], cache_key: str = None, client_ip: str = None):
-    """处理API请求的主函数，根据需要处理流式或非流式请求"""
-    global current_api_key
-    # 请求前基本检查
-    protect_from_abuse(
-        http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
-    if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
-        error_msg = "无效的模型"
-        extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
-        log('error', error_msg, extra=extra_log)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
-    # 重置已尝试的密钥
-    key_manager.reset_tried_keys_for_request()
-    # 转换消息格式
-    contents, system_instruction = GeminiClient.convert_messages(
-        GeminiClient, chat_request.messages)
-    # 设置重试次数（使用可用API密钥数量作为最大重试次数）
-    retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1
-    # 尝试使用不同API密钥
-    for attempt in range(1, retry_attempts + 1):
-        # 获取下一个密钥
-        current_api_key = key_manager.get_available_key()
-        # 检查API密钥是否可用
-        if current_api_key is None:
-            log('warning', "没有可用的 API 密钥，跳过本次尝试",
-                extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
-            break
-        # 记录当前尝试的密钥信息
-        log('info', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...",
-            extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-        # 服务器错误重试逻辑
-        server_error_retries = 3
-        for server_retry in range(1, server_error_retries + 1):
-            try:
-                # 根据请求类型分别处理
-                if chat_request.stream:
-                    try:
-                        return await process_stream_request(
-                            chat_request,
-                            http_request,
-                            contents,
-                            system_instruction,
-                            current_api_key
-                        )
-                    except Exception as e:
-                        # 捕获流式请求的异常，但不立即返回错误
-                        # 记录错误并继续尝试下一个API密钥
-                        error_detail = handle_gemini_error(e, current_api_key, key_manager)
-                        log('error', f"流式请求失败: {error_detail}",
-                            extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
-                        # 不返回错误，而是抛出异常让外层循环处理
-                        raise
-                else:
-                    return await process_nonstream_request(
-                        chat_request,
-                        http_request,
-                        request_type,
-                        contents,
-                        system_instruction,
-                        current_api_key,
-                        cache_key,
-                        client_ip
-                    )
-            except HTTPException as e:
-                if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
-                    log('error', "客户端连接中断",
-                        extra={'key': current_api_key[:8], 'request_type': request_type,
-                              'model': chat_request.model, 'status_code': 408})
-                    raise
-                else:
-                    raise
-            except Exception as e:
-                # 使用统一的API错误处理函数
-                error_result = await handle_api_error(
-                    e,
-                    current_api_key,
-                    key_manager,
-                    request_type,
-                    chat_request.model,
-                    server_retry - 1
-                )
-                # 如果需要删除缓存，清除缓存
-                if error_result.get('remove_cache', False) and cache_key and cache_key in response_cache_manager.cache:
-                    log('info', f"因API错误，删除缓存: {cache_key[:8]}...",
-                        extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
-                    del response_cache_manager.cache[cache_key]
-                if error_result.get('should_retry', False):
-                    # 服务器错误需要重试（等待已在handle_api_error中完成）
-                    continue
-                elif error_result.get('should_switch_key', False) and attempt < retry_attempts:
-                    # 跳出服务器错误重试循环，获取下一个可用密钥
-                    log('info', f"API密钥 {current_api_key[:8]}... 失败，准备尝试下一个密钥",
-                        extra={'key': current_api_key[:8], 'request_type': request_type})
-                    break
-                else:
-                    # 无法处理的错误或已达到重试上限
-                    break
-    # 如果所有尝试都失败
-    msg = "所有API密钥均请求失败,请稍后重试"
-    log('error', "API key 替换失败，所有API key都已尝试，请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
-    # 对于流式请求，创建一个特殊的StreamingResponse返回错误
-    if chat_request.stream:
-        async def error_generator():
-            error_json = json.dumps({'error': {'message': msg, 'type': 'api_error'}})
-            yield f"data: {error_json}\n\n"
-            yield "data: [DONE]\n\n"
-        return StreamingResponse(error_generator(), media_type="text/event-stream")
-    else:
-        # 非流式请求使用标准HTTP异常
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)
-# 流式请求处理函数
-async def process_stream_request(
-    chat_request: ChatCompletionRequest,
-    http_request: Request,
-    contents,
-    system_instruction,
-    current_api_key: str
-) -> StreamingResponse:
-    """处理流式API请求"""
-    # 创建一个直接流式响应的生成器函数
-    async def stream_response_generator():
-        # 如果启用了假流式模式，使用随机遍历API密钥的方式
-        if FAKE_STREAMING:
-            # 创建一个队列用于在任务之间传递数据
-            queue = asyncio.Queue()
-            keep_alive_task = None
-            api_request_task = None
-            try:
-                # 创建一个保持连接的任务，持续发送换行符
-                async def keep_alive_sender():
-                    try:
-                        # 创建一个Gemini客户端用于发送保持连接的换行符
-                        keep_alive_client = GeminiClient(current_api_key)
-                        # 启动保持连接的生成器
-                        keep_alive_generator = keep_alive_client.stream_chat(
-                            chat_request,
-                            contents,
-                            safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
-                            system_instruction
-                        )
-                        # 持续发送换行符直到被取消
-                        async for line in keep_alive_generator:
-                            if line == "\n":
-                                # 将换行符格式化为SSE格式
-                                formatted_chunk = {
-                                    "id": "chatcmpl-keepalive",
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": chat_request.model,
-                                    "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]
-                                }
-                                # 将格式化的换行符放入队列
-                                await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
-                    except asyncio.CancelledError:
-                        log('info', "保持连接任务被取消",
-                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                        raise
-                    except Exception as e:
-                        log('error', f"保持连接任务出错: {str(e)}",
-                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                        # 将错误放入队列
-                        await queue.put(None)
-                        raise
-                # 创建一个任务来随机遍历API密钥并请求内容
-                async def api_request_handler():
-                    success = False
-                    try:
-                        # 重置已尝试的密钥
-                        key_manager.reset_tried_keys_for_request()
-                        # 获取可用的API密钥
-                        available_keys = key_manager.api_keys.copy()
-                        random.shuffle(available_keys)  # 随机打乱密钥顺序
-                        # 遍历所有API密钥尝试获取响应
-                        for attempt, api_key in enumerate(available_keys, 1):
-                            try:
-                                log('info', f"假流式模式: 尝试API密钥 {api_key[:8]}... ({attempt}/{len(available_keys)})",
-                                    extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
-                                # 创建一个新的客户端使用当前API密钥
-                                non_stream_client = GeminiClient(api_key)
-                                # 使用非流式方式请求内容
-                                response_content = await asyncio.to_thread(
-                                    non_stream_client.complete_chat,
-                                    chat_request,
-                                    contents,
-                                    safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
-                                    system_instruction
-                                )
-                                # 检查响应是否有效
-                                if response_content and response_content.text:
-                                    log('info', f"假流式模式: API密钥 {api_key[:8]}... 成功获取响应",
-                                        extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
-                                    # 将完整响应分割成小块，模拟流式返回
-                                    full_text = response_content.text
-                                    chunk_size = max(len(full_text) // 10, 1)  # 至少分成10块，每块至少1个字符
-                                    for i in range(0, len(full_text), chunk_size):
-                                        chunk = full_text[i:i+chunk_size]
-                                        formatted_chunk = {
-                                            "id": "chatcmpl-someid",
-                                            "object": "chat.completion.chunk",
-                                            "created": int(time.time()),
-                                            "model": chat_request.model,
-                                            "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
-                                        }
-                                        # 将格式化的内容块放入队列
-                                        await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
-                                    success = True
-                                    # 更新API调用统计
-                                    from app.utils.stats import update_api_call_stats
-                                    update_api_call_stats(api_call_stats,api_key)
-                                    break  # 成功获取响应，退出循环
-                                else:
-                                    log('warning', f"假流式模式: API密钥 {api_key[:8]}... 返回空响应",
-                                        extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
-                            except Exception as e:
-                                error_detail = handle_gemini_error(e, api_key, key_manager)
-                                log('error', f"假流式模式: API密钥 {api_key[:8]}... 请求失败: {error_detail}",
-                                    extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
-                                # 继续尝试下一个API密钥
-                        # 如果所有API密钥都尝试失败
-                        if not success:
-                            error_msg = "所有API密钥均请求失败，请稍后重试"
-                            log('error', error_msg,
-                                extra={'key': 'ALL', 'request_type': 'fake-stream', 'model': chat_request.model})
-                            # 添加错误信息到队列
-                            error_json = {
-                                "id": "chatcmpl-error",
-                                "object": "chat.completion.chunk",
-                                "created": int(time.time()),
-                                "model": chat_request.model,
-                                "choices": [{"delta": {"content": f"\n\n[错误: {error_msg}]"}, "index": 0, "finish_reason": "error"}]
-                            }
-                            await queue.put(f"data: {json.dumps(error_json)}\n\n")
-                        # 添加完成标记到队列
-                        await queue.put("data: [DONE]\n\n")
-                        # 添加None表示队列结束
-                        await queue.put(None)
-                    except asyncio.CancelledError:
-                        log('info', "API请求任务被取消",
-                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                        # 添加None表示队列结束
-                        await queue.put(None)
-                        raise
-                    except Exception as e:
-                        log('error', f"API请求任务出错: {str(e)}",
-                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                        # 添加错误信息到队列
-                        error_json = {
-                            "id": "chatcmpl-error",
-                            "object": "chat.completion.chunk",
-                            "created": int(time.time()),
-                            "model": chat_request.model,
-                            "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
-                        }
-                        await queue.put(f"data: {json.dumps(error_json)}\n\n")
-                        await queue.put("data: [DONE]\n\n")
-                        # 添加None表示队列结束
-                        await queue.put(None)
-                        raise
-                # 启动保持连接的任务
-                keep_alive_task = asyncio.create_task(keep_alive_sender())
-                # 启动API请求任务
-                api_request_task = asyncio.create_task(api_request_handler())
-                # 从队列中获取数据并发送给客户端
-                while True:
-                    chunk = await queue.get()
-                    if chunk is None:  # None表示队列结束
-                        break
-                    yield chunk
-                    # 如果API请求任务已完成，取消保持连接任务
-                    if api_request_task.done() and not keep_alive_task.done():
-                        keep_alive_task.cancel()
-            except asyncio.CancelledError:
-                log('info', "流式响应生成器被取消",
-                    extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                # 取消所有任务
-                if keep_alive_task and not keep_alive_task.done():
-                    keep_alive_task.cancel()
-                if api_request_task and not api_request_task.done():
-                    api_request_task.cancel()
-            except Exception as e:
-                log('error', f"流式响应生成器出错: {str(e)}",
-                    extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
-                # 取消所有任务
-                if keep_alive_task and not keep_alive_task.done():
-                    keep_alive_task.cancel()
-                if api_request_task and not api_request_task.done():
-                    api_request_task.cancel()
-                # 发送错误信息给客户端
-                error_json = {
-                    "id": "chatcmpl-error",
-                    "object": "chat.completion.chunk",
-                    "created": int(time.time()),
-                    "model": chat_request.model,
-                    "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
-                }
-                yield f"data: {json.dumps(error_json)}\n\n"
-                yield "data: [DONE]\n\n"
-            finally:
-                # 确保所有任务都被取消
-                if keep_alive_task and not keep_alive_task.done():
-                    keep_alive_task.cancel()
-                if api_request_task and not api_request_task.done():
-                    api_request_task.cancel()
-        else:
-            # 原始流式请求处理逻辑
-            gemini_client = GeminiClient(current_api_key)
-            success = False
-            try:
-                # 直接迭代生成器并发送响应块
-                async for chunk in gemini_client.stream_chat(
-                    chat_request,
-                    contents,
-                    safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
-                    system_instruction
-                ):
-                    # 空字符串跳过
-                    if not chunk:
-                        continue
-                    formatted_chunk = {
-                        "id": "chatcmpl-someid",
-                        "object": "chat.completion.chunk",
-                        "created": int(time.time()),
-                        "model": chat_request.model,
-                        "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
-                    }
-                    success = True  # 只要有一个chunk成功，就标记为成功
-                    yield f"data: {json.dumps(formatted_chunk)}\n\n"
-                # 如果成功获取到响应，更新API调用统计
-                if success:
-                    from app.utils.stats import update_api_call_stats
-                    update_api_call_stats(api_call_stats, current_api_key)
-                yield "data: [DONE]\n\n"
-            except asyncio.CancelledError:
-                extra_log_cancel = {'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model, 'error_message': '客户端已断开连接'}
-                log('info', "客户端连接已中断", extra=extra_log_cancel)
-            except Exception as e:
-                error_detail = handle_gemini_error(e, current_api_key, key_manager)
-                log('error', f"流式请求失败: {error_detail}",
-                    extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
-                # 发送错误信息给客户端
-                error_json = {
-                    "id": "chatcmpl-error",
-                    "object": "chat.completion.chunk",
-                    "created": int(time.time()),
-                    "model": chat_request.model,
-                    "choices": [{"delta": {"content": f"\n\n[错误: {error_detail}]"}, "index": 0, "finish_reason": "error"}]
-                }
-                yield f"data: {json.dumps(error_json)}\n\n"
-                yield "data: [DONE]\n\n"
-                # 重新抛出异常，这样process_request可以捕获它
-                raise e
-    return StreamingResponse(stream_response_generator(), media_type="text/event-stream")
-# Gemini完成请求函数
-async def run_gemini_completion(
-    gemini_client,
-    chat_request: ChatCompletionRequest,
-    contents,
-    system_instruction,
-    request_type: str,
-    current_api_key: str
-):
-    """运行Gemini非流式请求"""
-    # 记录函数调用状态
-    run_fn = run_gemini_completion
-    try:
-        # 创建一个不会被客户端断开影响的任务
-        response_future = asyncio.create_task(
-            asyncio.to_thread(
-                gemini_client.complete_chat,
-                chat_request,
-                contents,
-                safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
-                system_instruction
-            )
-        )
-        # 使用shield防止任务被外部取消
-        response_content = await asyncio.shield(response_future)
-        # 只在第一次调用时记录完成日志
-        if not hasattr(run_fn, 'logged_complete'):
-            log('info', "非流式请求成功完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-            run_fn.logged_complete = True
-        return response_content
-    except asyncio.CancelledError:
-        # 即使任务被取消，我们也确保正在进行的API请求能够完成
-        if 'response_future' in locals() and not response_future.done():
-            try:
-                # 使用shield确保任务不被取消，并等待它完成
-                response_content = await asyncio.shield(response_future)
-                log('info', "API请求在客户端断开后完成", extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-                return response_content
-            except Exception as e:
-                extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': f'API请求在客户端断开后失败: {str(e)}'}
-                log('info', "API调用因客户端断开而失败", extra=extra_log_gemini_cancel)
-                raise
-        # 如果任务尚未开始或已经失败，记录日志
-        extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
-        log('info', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
-        raise
-# 客户端断开检测函数
-async def check_client_disconnect(http_request: Request, current_api_key: str, request_type: str, model: str):
-    """检查客户端是否断开连接"""
-    while True:
-        if await http_request.is_disconnected():
-            extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': model, 'error_message': '检测到客户端断开连接'}
-            log('info', "客户端连接已中断，等待API请求完成", extra=extra_log)
-            return True
-        await asyncio.sleep(0.5)
-# 客户端断开处理函数
-async def handle_client_disconnect(
-    gemini_task: asyncio.Task,
-    chat_request: ChatCompletionRequest,
-    request_type: str,
-    current_api_key: str,
-    cache_key: str = None,
-    client_ip: str = None
-):
-    try:
-        # 等待API任务完成，使用shield防止它被取消
-        response_content = await asyncio.shield(gemini_task)
-        # 检查响应文本是否为空
-        if response_content is None or response_content.text == "":
-            if response_content is None:
-                log('info', "客户端断开后API任务返回None",
-                    extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-            else:
-                extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
-                log('info', "客户端断开后Gemini API 返回空响应", extra=extra_log)
-            # 删除任何现有缓存，因为响应为空
-            if cache_key and cache_key in response_cache_manager.cache:
-                log('info', f"因空响应，删除缓存: {cache_key[:8]}...",
-                    extra={'cache_operation': 'remove-on-empty', 'request_type': request_type})
-                del response_cache_manager.cache[cache_key]
-            # 返回错误响应而不是None
-            return create_error_response(chat_request.model, "AI未返回任何内容，请重试")
-        # 首先检查是否有现有缓存
-        cached_response, cache_hit = response_cache_manager.get(cache_key)
-        if cache_hit:
-            log('info', f"客户端断开但找到已存在缓存，将删除: {cache_key[:8]}...",
-                extra={'cache_operation': 'disconnect-found-cache', 'request_type': request_type})
-            # 安全删除缓存
-            if cache_key in response_cache_manager.cache:
-                del response_cache_manager.cache[cache_key]
-            # 不返回缓存，而是创建新响应并缓存
-        # 创建新响应
-        from app.utils.response import create_response
-        response = create_response(chat_request, response_content)
-        # 客户端已断开，此响应不会实际发送，可以考虑将其缓存以供后续使用
-        # 如果确实需要缓存，则可以取消下面的注释
-        # cache_response(response, cache_key, client_ip)
-        return response
-    except asyncio.CancelledError:
-        # 对于取消异常，仍然尝试继续完成任务
-        log('info', "客户端断开后任务被取消，但我们仍会尝试完成",
-            extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-        # 检查任务是否已经完成
-        if gemini_task.done() and not gemini_task.cancelled():
-            try:
-                response_content = gemini_task.result()
-                # 首先检查是否有现有缓存
-                cached_response, cache_hit = response_cache_manager.get(cache_key)
-                if cache_hit:
-                    log('info', f"任务被取消但找到已存在缓存，将删除: {cache_key[:8]}...",
-                        extra={'cache_operation': 'cancel-found-cache', 'request_type': request_type})
-                    # 安全删除缓存
-                    if cache_key in response_cache_manager.cache:
-                        del response_cache_manager.cache[cache_key]
-                # 创建但不缓存响应
-                from app.utils.response import create_response
-                response = create_response(chat_request, response_content)
-                return response
-            except Exception as inner_e:
-                log('error', f"客户端断开后从已完成任务获取结果失败: {str(inner_e)}",
-                    extra={'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model})
-                # 删除缓存，因为出现错误
-                if cache_key and cache_key in response_cache_manager.cache:
-                    log('info', f"因任务获取结果失败，删除缓存: {cache_key[:8]}...",
-                        extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
-                    del response_cache_manager.cache[cache_key]
-        # 创建错误响应而不是返回None
-        return create_error_response(chat_request.model, "请求处理过程中发生错误，请重试")
-    except Exception as e:
-        # 处理API任务异常
-        error_msg = str(e)
-        extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': error_msg}
-        log('error', f"客户端断开后处理API响应时出错: {error_msg}", extra=extra_log)
-        # 删除缓存，因为出现错误
-        if cache_key and cache_key in response_cache_manager.cache:
-            log('info', f"因API响应错误，删除缓存: {cache_key[:8]}...",
-                extra={'cache_operation': 'remove-on-error', 'request_type': request_type})
-            del response_cache_manager.cache[cache_key]
-        # 创建错误响应而不是返回None
-        return create_error_response(chat_request.model, f"请求处理错误: {error_msg}")
-# 非流式请求处理函数
-async def process_nonstream_request(
-    chat_request: ChatCompletionRequest,
-    http_request: Request,
-    request_type: str,
-    contents,
-    system_instruction,
-    current_api_key: str,
-    cache_key: str = None,
-    client_ip: str = None
-):
-    """处理非流式API请求"""
-    gemini_client = GeminiClient(current_api_key)
-    # 创建任务
-    gemini_task = asyncio.create_task(
-        run_gemini_completion(
-            gemini_client,
-            chat_request,
-            contents,
-            system_instruction,
-            request_type,
-            current_api_key
-        )
-    )
-    disconnect_task = asyncio.create_task(
-        check_client_disconnect(
-            http_request,
-            current_api_key,
-            request_type,
-            chat_request.model
-        )
-    )
-    try:
-        # 先等待看是否API任务先完成，或者客户端先断开连接
-        done, pending = await asyncio.wait(
-            [gemini_task, disconnect_task],
-            return_when=asyncio.FIRST_COMPLETED
-        )
-        if disconnect_task in done:
-            # 客户端已断开连接，但我们仍继续完成API请求以便缓存结果
-            return await handle_client_disconnect(
-                gemini_task,
-                chat_request,
-                request_type,
-                current_api_key,
-                cache_key,
-                client_ip
-            )
-        else:
-            # API任务先完成，取消断开检测任务
-            disconnect_task.cancel()
-            # 获取响应内容
-            response_content = await gemini_task
-            # 检查缓存是否已经存在，如果存在则不再创建新缓存
-            cached_response, cache_hit = response_cache_manager.get(cache_key)
-            if cache_hit:
-                log('info', f"缓存已存在，直接返回: {cache_key[:8]}...",
-                    extra={'cache_operation': 'use-existing', 'request_type': request_type})
-                # 安全删除缓存
-                if cache_key in response_cache_manager.cache:
-                    del response_cache_manager.cache[cache_key]
-                    log('info', f"缓存使用后已删除: {cache_key[:8]}...",
-                        extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
-                return cached_response
-            # 创建响应
-            from app.utils.response import create_response
-            response = create_response(chat_request, response_content)
-            # 缓存响应
-            cache_response(response, cache_key, client_ip, response_cache_manager, update_api_call_stats, api_key=current_api_key)
-            # 立即删除缓存，确保只能使用一次
-            if cache_key and cache_key in response_cache_manager.cache:
-                del response_cache_manager.cache[cache_key]
-                log('info', f"缓存创建后立即删除: {cache_key[:8]}...",
-                    extra={'cache_operation': 'store-and-remove', 'request_type': request_type})
-            # 返回响应
-            return response
-    except asyncio.CancelledError:
-        extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消"}
-        log('info', "请求取消", extra=extra_log)
-        # 在请求被取消时先检查缓存中是否已有结果
-        cached_response, cache_hit = response_cache_manager.get(cache_key)
-        if cache_hit:
-            log('info', f"请求取消但找到有效缓存，使用缓存响应: {cache_key[:8]}...",
-                extra={'cache_operation': 'use-cache-on-cancel', 'request_type': request_type})
-            # 安全删除缓存
-            if cache_key in response_cache_manager.cache:
-                del response_cache_manager.cache[cache_key]
-                log('info', f"缓存使用后已删除: {cache_key[:8]}...",
-                    extra={'cache_operation': 'used-and-removed', 'request_type': request_type})
-            return cached_response
-        # 尝试完成正在进行的API请求
-        if not gemini_task.done():
-            log('info', "请求取消但API请求尚未完成，继续等待...",
-                extra={'key': current_api_key[:8], 'request_type': request_type})
-            # 使用shield确保任务不会被取消
-            response_content = await asyncio.shield(gemini_task)
-            # 创建响应
-            from app.utils.response import create_response
-            response = create_response(chat_request, response_content)
-            # 不缓存这个响应，直接返回
-            return response
-        else:
-            # 任务已完成，获取结果
-            response_content = gemini_task.result()
-            # 创建响应
-            from app.utils.response import create_response
-            response = create_response(chat_request, response_content)
-            # 不缓存这个响应，直接返回
-            return response
-    except HTTPException as e:
-        if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
-            extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
-                        'status_code': 408, 'error_message': '客户端连接中断'}
-            log('error', "客户端连接中断，终止后续重试", extra=extra_log)
-            raise
-        else:
-            raise

 from fastapi import APIRouter, HTTPException, Request, Depends, status
 from fastapi.responses import JSONResponse, StreamingResponse
 from app.models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
+from app.services import GeminiClient
 from app.utils import (
     generate_cache_key,
     cache_response,
     create_chat_response,
+    create_error_response
 )
 from app.config.settings import (
     api_call_stats,
     BLOCKED_MODELS
 )
+import asyncio
+import time
+import logging
+# 导入拆分后的模块
+from .auth import verify_password
+from .logging_utils import log
+from .request_handlers import process_request
 # 创建路由器
 router = APIRouter()
     MAX_REQUESTS_PER_MINUTE = _max_requests_per_minute
     MAX_REQUESTS_PER_DAY_PER_IP = _max_requests_per_day_per_ip
+# 自定义密码验证依赖
+async def custom_verify_password(request: Request):
+    await verify_password(request, PASSWORD)
 # API路由
 @router.get("/v1/models", response_model=ModelList)
     return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in filtered_models])
 @router.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(custom_verify_password)):
     # 获取客户端IP
     client_ip = http_request.client.host if http_request.client else "unknown"
     # 流式请求直接处理，不使用缓存
     if request.stream:
+        return await process_request(
+            request,
+            http_request,
+            "stream",
+            key_manager,
+            response_cache_manager,
+            active_requests_manager,
+            safety_settings,
+            safety_settings_g2,
+            api_call_stats,
+            FAKE_STREAMING,
+            FAKE_STREAMING_INTERVAL,
+            MAX_REQUESTS_PER_MINUTE,
+            MAX_REQUESTS_PER_DAY_PER_IP
+        )
     # 生成完整缓存键 - 用于精确匹配
     cache_key = generate_cache_key(request)
     # 创建请求处理任务
     process_task = asyncio.create_task(
+        process_request(
+            request,
+            http_request,
+            "non-stream",
+            key_manager,
+            response_cache_manager,
+            active_requests_manager,
+            safety_settings,
+            safety_settings_g2,
+            api_call_stats,
+            FAKE_STREAMING,
+            FAKE_STREAMING_INTERVAL,
+            MAX_REQUESTS_PER_MINUTE,
+            MAX_REQUESTS_PER_DAY_PER_IP,
+            cache_key,
+            client_ip
+        )
     )
     # 将任务添加到活跃请求池
             return cached_response
         # 重新抛出异常
+        raise

app/api/stream_handlers.py ADDED Viewed

	@@ -0,0 +1,284 @@

+import asyncio
+import json
+import time
+import random
+from fastapi import Request
+from fastapi.responses import StreamingResponse
+from app.models import ChatCompletionRequest
+from app.services import GeminiClient
+from app.utils import handle_gemini_error, update_api_call_stats
+from .logging_utils import log
+# 流式请求处理函数
+async def process_stream_request(
+    chat_request: ChatCompletionRequest,
+    http_request: Request,
+    contents,
+    system_instruction,
+    current_api_key: str,
+    key_manager,
+    safety_settings,
+    safety_settings_g2,
+    api_call_stats,
+    FAKE_STREAMING,
+    FAKE_STREAMING_INTERVAL
+) -> StreamingResponse:
+    """处理流式API请求"""
+    # 创建一个直接流式响应的生成器函数
+    async def stream_response_generator():
+        # 如果启用了假流式模式，使用随机遍历API密钥的方式
+        if FAKE_STREAMING:
+            # 创建一个队列用于在任务之间传递数据
+            queue = asyncio.Queue()
+            keep_alive_task = None
+            api_request_task = None
+            try:
+                # 创建一个保持连接的任务，持续发送换行符
+                async def keep_alive_sender():
+                    try:
+                        # 创建一个Gemini客户端用于发送保持连接的换行符
+                        keep_alive_client = GeminiClient(current_api_key)
+                        # 启动保持连接的生成器
+                        keep_alive_generator = keep_alive_client.stream_chat(
+                            chat_request,
+                            contents,
+                            safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
+                            system_instruction
+                        )
+                        # 持续发送换行符直到被取消
+                        async for line in keep_alive_generator:
+                            if line == "\n":
+                                # 将换行符格式化为SSE格式
+                                formatted_chunk = {
+                                    "id": "chatcmpl-keepalive",
+                                    "object": "chat.completion.chunk",
+                                    "created": int(time.time()),
+                                    "model": chat_request.model,
+                                    "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]
+                                }
+                                # 将格式化的换行符放入队列
+                                await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
+                    except asyncio.CancelledError:
+                        # log('info', "保持连接任务被取消",
+                        #     extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                        raise
+                    except Exception as e:
+                        log('error', f"保持连接任务出错: {str(e)}",
+                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                        # 将错误放入队列
+                        await queue.put(None)
+                        raise
+                # 创建一个任务来随机遍历API密钥并请求内容
+                async def api_request_handler():
+                    success = False
+                    try:
+                        # 重置已尝试的密钥
+                        key_manager.reset_tried_keys_for_request()
+                        # 获取可用的API密钥
+                        available_keys = key_manager.api_keys.copy()
+                        random.shuffle(available_keys)  # 随机打乱密钥顺序
+                        # 遍历所有API密钥尝试获取响应
+                        for attempt, api_key in enumerate(available_keys, 1):
+                            try:
+                                log('info', f"假流式模式: 尝试API密钥 {api_key[:8]}... ({attempt}/{len(available_keys)})",
+                                    extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
+                                # 创建一个新的客户端使用当前API密钥
+                                non_stream_client = GeminiClient(api_key)
+                                # 使用非流式方式请求内容
+                                response_content = await asyncio.to_thread(
+                                    non_stream_client.complete_chat,
+                                    chat_request,
+                                    contents,
+                                    safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
+                                    system_instruction
+                                )
+                                # 检查响应是否有效
+                                if response_content and response_content.text:
+                                    log('info', f"假流式模式: API密钥 {api_key[:8]}... 成功获取响应",
+                                        extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
+                                    # 将完整响应分割成小块，模拟流式返回
+                                    full_text = response_content.text
+                                    chunk_size = max(len(full_text) // 10, 1)  # 至少分成10块，每块至少1个字符
+                                    for i in range(0, len(full_text), chunk_size):
+                                        chunk = full_text[i:i+chunk_size]
+                                        formatted_chunk = {
+                                            "id": "chatcmpl-someid",
+                                            "object": "chat.completion.chunk",
+                                            "created": int(time.time()),
+                                            "model": chat_request.model,
+                                            "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
+                                        }
+                                        # 将格式化的内容块放入队列
+                                        await queue.put(f"data: {json.dumps(formatted_chunk)}\n\n")
+                                    success = True
+                                    # 更新API调用统计
+                                    update_api_call_stats(api_call_stats, api_key)
+                                    break  # 成功获取响应，退出循环
+                                else:
+                                    log('warning', f"假流式模式: API密钥 {api_key[:8]}... 返回空响应",
+                                        extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
+                            except Exception as e:
+                                error_detail = handle_gemini_error(e, api_key, key_manager)
+                                log('error', f"假流式模式: API密钥 {api_key[:8]}... 请求失败: {error_detail}",
+                                    extra={'key': api_key[:8], 'request_type': 'fake-stream', 'model': chat_request.model})
+                                # 继续尝试下一个API密钥
+                        # 如果所有API密钥都尝试失败
+                        if not success:
+                            error_msg = "所有API密钥均请求失败，请稍后重试"
+                            log('error', error_msg,
+                                extra={'key': 'ALL', 'request_type': 'fake-stream', 'model': chat_request.model})
+                            # 添加错误信息到队列
+                            error_json = {
+                                "id": "chatcmpl-error",
+                                "object": "chat.completion.chunk",
+                                "created": int(time.time()),
+                                "model": chat_request.model,
+                                "choices": [{"delta": {"content": f"\n\n[错误: {error_msg}]"}, "index": 0, "finish_reason": "error"}]
+                            }
+                            await queue.put(f"data: {json.dumps(error_json)}\n\n")
+                        # 添加完成标记到队列
+                        await queue.put("data: [DONE]\n\n")
+                        # 添加None表示队列结束
+                        await queue.put(None)
+                    except asyncio.CancelledError:
+                        log('info', "API请求任务被取消",
+                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                        # 添加None表示队列结束
+                        await queue.put(None)
+                        raise
+                    except Exception as e:
+                        log('error', f"API请求任务出错: {str(e)}",
+                            extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                        # 添加错误信息到队列
+                        error_json = {
+                            "id": "chatcmpl-error",
+                            "object": "chat.completion.chunk",
+                            "created": int(time.time()),
+                            "model": chat_request.model,
+                            "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
+                        }
+                        await queue.put(f"data: {json.dumps(error_json)}\n\n")
+                        await queue.put("data: [DONE]\n\n")
+                        # 添加None表示队列结束
+                        await queue.put(None)
+                        raise
+                # 启动保持连接的任务
+                keep_alive_task = asyncio.create_task(keep_alive_sender())
+                # 启动API请求任务
+                api_request_task = asyncio.create_task(api_request_handler())
+                # 从队列中获取数据并发送给客户端
+                while True:
+                    chunk = await queue.get()
+                    if chunk is None:  # None表示队列结束
+                        break
+                    yield chunk
+                    # 如果API请求任务已完成，取消保持连接任务
+                    if api_request_task.done() and not keep_alive_task.done():
+                        keep_alive_task.cancel()
+            except asyncio.CancelledError:
+                log('info', "流式响应生成器被取消",
+                    extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                # 取消所有任务
+                if keep_alive_task and not keep_alive_task.done():
+                    keep_alive_task.cancel()
+                if api_request_task and not api_request_task.done():
+                    api_request_task.cancel()
+            except Exception as e:
+                log('error', f"流式响应生成器出错: {str(e)}",
+                    extra={'key': current_api_key[:8], 'request_type': 'fake-stream'})
+                # 取消所有任务
+                if keep_alive_task and not keep_alive_task.done():
+                    keep_alive_task.cancel()
+                if api_request_task and not api_request_task.done():
+                    api_request_task.cancel()
+                # 发送错误信息给客户端
+                error_json = {
+                    "id": "chatcmpl-error",
+                    "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": chat_request.model,
+                    "choices": [{"delta": {"content": f"\n\n[错误: {str(e)}]"}, "index": 0, "finish_reason": "error"}]
+                }
+                yield f"data: {json.dumps(error_json)}\n\n"
+                yield "data: [DONE]\n\n"
+            finally:
+                # 确保所有任务都被取消
+                if keep_alive_task and not keep_alive_task.done():
+                    keep_alive_task.cancel()
+                if api_request_task and not api_request_task.done():
+                    api_request_task.cancel()
+        else:
+            # 原始流式请求处理逻辑
+            gemini_client = GeminiClient(current_api_key)
+            success = False
+            try:
+                # 直接迭代生成器并发送响应块
+                async for chunk in gemini_client.stream_chat(
+                    chat_request,
+                    contents,
+                    safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings,
+                    system_instruction
+                ):
+                    # 空字符串跳过
+                    if not chunk:
+                        continue
+                    formatted_chunk = {
+                        "id": "chatcmpl-someid",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": chat_request.model,
+                        "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]
+                    }
+                    success = True  # 只要有一个chunk成功，就标记为成功
+                    yield f"data: {json.dumps(formatted_chunk)}\n\n"
+                # 如果成功获取到响应，更新API调用统计
+                if success:
+                    update_api_call_stats(api_call_stats, current_api_key)
+                yield "data: [DONE]\n\n"
+            except asyncio.CancelledError:
+                extra_log_cancel = {'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model, 'error_message': '客户端已断开连接'}
+                log('info', "客户端连接已中断", extra=extra_log_cancel)
+            except Exception as e:
+                error_detail = handle_gemini_error(e, current_api_key, key_manager)
+                log('error', f"流式请求失败: {error_detail}",
+                    extra={'key': current_api_key[:8], 'request_type': 'stream', 'model': chat_request.model})
+                # 发送错误信息给客户端
+                error_json = {
+                    "id": "chatcmpl-error",
+                    "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": chat_request.model,
+                    "choices": [{"delta": {"content": f"\n\n[错误: {error_detail}]"}, "index": 0, "finish_reason": "error"}]
+                }
+                yield f"data: {json.dumps(error_json)}\n\n"
+                yield "data: [DONE]\n\n"
+                # 重新抛出异常，这样process_request可以捕获它
+                raise e
+    return StreamingResponse(stream_response_generator(), media_type="text/event-stream")

app/config/safety.py CHANGED Viewed

@@ -1,49 +1,49 @@
-# 安全设置配置
-# Gemini 1.0 安全设置
-SAFETY_SETTINGS = [
-    {
-        "category": "HARM_CATEGORY_HARASSMENT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_HATE_SPEECH",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
-        "threshold": 'BLOCK_NONE'
-    }
-]
-# Gemini 2.0 安全设置
-SAFETY_SETTINGS_G2 = [
-    {
-        "category": "HARM_CATEGORY_HARASSMENT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_HATE_SPEECH",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-        "threshold": "BLOCK_NONE"
-    },
-    {
-        "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
-        "threshold": 'BLOCK_NONE'
-    }
 ]

+# 安全设置配置
+# Gemini 1.0 安全设置
+SAFETY_SETTINGS = [
+    {
+        "category": "HARM_CATEGORY_HARASSMENT",
+        "threshold": "BLOCK_NONE"
+    },
+    {
+        "category": "HARM_CATEGORY_HATE_SPEECH",
+        "threshold": "BLOCK_NONE"
+    },
+    {
+        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "threshold": "BLOCK_NONE"
+    },
+    {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "threshold": "BLOCK_NONE"
+    },
+    {
+        "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
+        "threshold": 'BLOCK_NONE'
+    }
+]
+# Gemini 2.0 安全设置
+SAFETY_SETTINGS_G2 = [
+    {
+        "category": "HARM_CATEGORY_HARASSMENT",
+        "threshold": "OFF"
+    },
+    {
+        "category": "HARM_CATEGORY_HATE_SPEECH",
+        "threshold": "OFF"
+    },
+    {
+        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "threshold": "OFF"
+    },
+    {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "threshold": "OFF"
+    },
+    {
+        "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
+        "threshold": 'OFF'
+    }
 ]

app/config/settings.py CHANGED Viewed

@@ -8,6 +8,10 @@ BASE_DIR = pathlib.Path(__file__).parent.parent
 # 流式响应配置
 FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
 # 假流式请求的空内容返回间隔（秒）
 FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
@@ -67,4 +71,4 @@ DEFAULT_BLOCKED_MODELS = []
 # 环境变量格式应为逗号分隔的模型名称字符串
 BLOCKED_MODELS = os.environ.get("BLOCKED_MODELS", ",".join(DEFAULT_BLOCKED_MODELS))
 # 将字符串转换为列表
-BLOCKED_MODELS = [model.strip() for model in BLOCKED_MODELS.split(",") if model.strip()]

 # 流式响应配置
 FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
+#随机字符串
+RANDOM_STRING = os.environ.get("RANDOM_STRING", "true").lower() in ["true", "1", "yes"]
+RANDOM_STRING_LENGTH = int(os.environ.get("RANDOM_STRING_LENGTH", "25"))
 # 假流式请求的空内容返回间隔（秒）
 FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
 # 环境变量格式应为逗号分隔的模型名称字符串
 BLOCKED_MODELS = os.environ.get("BLOCKED_MODELS", ",".join(DEFAULT_BLOCKED_MODELS))
 # 将字符串转换为列表
+BLOCKED_MODELS = [model.strip() for model in BLOCKED_MODELS.split(",") if model.strip()]

app/main.py CHANGED Viewed

@@ -1,292 +1,292 @@
-from fastapi import FastAPI, HTTPException, Request, status
-from fastapi.responses import JSONResponse, HTMLResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
-from app.models import ErrorResponse
-from app.services import GeminiClient
-from app.utils import (
-    APIKeyManager,
-    test_api_key,
-    format_log_message,
-    log_manager,
-    ResponseCacheManager,
-    ActiveRequestsManager,
-    clean_expired_stats,
-    update_api_call_stats,
-    check_version,
-    schedule_cache_cleanup,
-    handle_exception,
-    log
-)
-from app.api import router, init_router, dashboard_router, init_dashboard_router
-from app.config.settings import (
-    FAKE_STREAMING,
-    FAKE_STREAMING_INTERVAL,
-    PASSWORD,
-    MAX_REQUESTS_PER_MINUTE,
-    MAX_REQUESTS_PER_DAY_PER_IP,
-    RETRY_DELAY,
-    MAX_RETRY_DELAY,
-    CACHE_EXPIRY_TIME,
-    MAX_CACHE_ENTRIES,
-    REMOVE_CACHE_AFTER_USE,
-    REQUEST_HISTORY_EXPIRY_TIME,
-    ENABLE_RECONNECT_DETECTION,
-    api_call_stats,
-    client_request_history,
-    local_version,
-    remote_version,
-    has_update,
-    API_KEY_DAILY_LIMIT
-)
-from app.config.safety import SAFETY_SETTINGS, SAFETY_SETTINGS_G2
-import os
-import json
-import asyncio
-import time
-import logging
-from datetime import datetime, timedelta
-import sys
-import pathlib
-# 设置模板目录
-BASE_DIR = pathlib.Path(__file__).parent
-templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
-app = FastAPI()
-# --------------- 全局实例 ---------------
-# 初始化API密钥管理器
-key_manager = APIKeyManager()
-current_api_key = key_manager.get_available_key()
-# 创建全局缓存字典，将作为缓存管理器的内部存储
-response_cache = {}
-# 初始化缓存管理器，使用全局字典作为存储
-response_cache_manager = ResponseCacheManager(
-    expiry_time=CACHE_EXPIRY_TIME,
-    max_entries=MAX_CACHE_ENTRIES,
-    remove_after_use=REMOVE_CACHE_AFTER_USE,
-    cache_dict=response_cache
-)
-# 活跃请求池 - 将作为活跃请求管理器的内部存储
-active_requests_pool = {}
-# 初始化活跃请求管理器
-active_requests_manager = ActiveRequestsManager(requests_pool=active_requests_pool)
-# --------------- 工具函数 ---------------
-def switch_api_key():
-    global current_api_key
-    key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
-    if key:
-        current_api_key = key
-        log('info', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
-    else:
-        log('error', "API key 替换失败，所有API key都已尝试，请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
-async def check_keys():
-    available_keys = []
-    for key in key_manager.api_keys:
-        is_valid = await test_api_key(key)
-        status_msg = "有效" if is_valid else "无效"
-        log('info', f"API Key {key[:10]}... {status_msg}.")
-        if is_valid:
-            available_keys.append(key)
-    if not available_keys:
-        log('error', "没有可用的 API 密钥！", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
-    return available_keys
-# 设置全局异常处理
-sys.excepthook = handle_exception
-# --------------- 事件处理 ---------------
-@app.on_event("startup")
-async def startup_event():
-    log('info', "Starting Gemini API proxy...")
-    # 启动缓存清理定时任务
-    schedule_cache_cleanup(response_cache_manager, active_requests_manager)
-    # 检查版本
-    await check_version()
-    available_keys = await check_keys()
-    if available_keys:
-        key_manager.api_keys = available_keys
-        key_manager._reset_key_stack() # 启动时也确保创建随机栈
-        key_manager.show_all_keys()
-        log('info', f"可用 API 密钥数量：{len(key_manager.api_keys)}")
-        log('info', f"最大重试次数设置为：{len(key_manager.api_keys)}")
-        if key_manager.api_keys:
-            all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
-            GeminiClient.AVAILABLE_MODELS = [model.replace(
-                "models/", "") for model in all_models]
-            log('info', "Available models loaded.")
-    # 初始化路由器
-    init_router(
-        key_manager,
-        response_cache_manager,
-        active_requests_manager,
-        SAFETY_SETTINGS,
-        SAFETY_SETTINGS_G2,
-        current_api_key,
-        FAKE_STREAMING,
-        FAKE_STREAMING_INTERVAL,
-        PASSWORD,
-        MAX_REQUESTS_PER_MINUTE,
-        MAX_REQUESTS_PER_DAY_PER_IP
-    )
-    # 初始化仪表盘路由器
-    init_dashboard_router(
-        key_manager,
-        response_cache_manager,
-        active_requests_manager
-    )
-# --------------- 异常处理 ---------------
-@app.exception_handler(Exception)
-async def global_exception_handler(request: Request, exc: Exception):
-    from app.utils import translate_error
-    error_message = translate_error(str(exc))
-    extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
-    log('error', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
-    return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
-# --------------- 路由 ---------------
-# 包含API路由
-app.include_router(router)
-app.include_router(dashboard_router)
-@app.get("/", response_class=HTMLResponse)
-async def root(request: Request):
-    # 先清理过期数据，确保统计数据是最新的
-    clean_expired_stats(api_call_stats)
-    response_cache_manager.clean_expired()  # 使用管理器清理缓存
-    active_requests_manager.clean_completed()  # 使用管理器清理活跃请求
-    # 获取当前统计数据
-    now = datetime.now()
-    # 计算过去24小时的调用总数
-    last_24h_calls = sum(api_call_stats['last_24h']['total'].values())
-    # 计算过去一小时内的调用总数
-    one_hour_ago = now - timedelta(hours=1)
-    hourly_calls = 0
-    for hour_key, count in api_call_stats['hourly']['total'].items():
-        try:
-            hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
-            if hour_time >= one_hour_ago:
-                hourly_calls += count
-        except ValueError:
-            continue
-    # 计算过去一分钟内的调用总数
-    one_minute_ago = now - timedelta(minutes=1)
-    minute_calls = 0
-    for minute_key, count in api_call_stats['minute']['total'].items():
-        try:
-            minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
-            if minute_time >= one_minute_ago:
-                minute_calls += count
-        except ValueError:
-            continue
-    # 获取最近的日志
-    recent_logs = log_manager.get_recent_logs(50)  # 获取最近50条日志
-    # 获取缓存统计
-    total_cache = len(response_cache_manager.cache)
-    valid_cache = sum(1 for _, data in response_cache_manager.cache.items()
-                     if time.time() < data.get('expiry_time', 0))
-    cache_by_model = {}
-    # 分析缓存数据
-    for _, cache_data in response_cache_manager.cache.items():
-        if time.time() < cache_data.get('expiry_time', 0):
-            # 按模型统计缓存
-            model = cache_data.get('response', {}).model
-            if model:
-                if model in cache_by_model:
-                    cache_by_model[model] += 1
-                else:
-                    cache_by_model[model] = 1
-    # 获取请求历史统计
-    history_count = len(client_request_history)
-    # 获取活跃请求统计
-    active_count = len(active_requests_manager.active_requests)
-    active_done = sum(1 for task in active_requests_manager.active_requests.values() if task.done())
-    active_pending = active_count - active_done
-    # 获取API密钥使用统计
-    api_key_stats = []
-    for api_key in key_manager.api_keys:
-        # 获取API密钥前8位作为标识
-        api_key_id = api_key[:8]
-        # 计算24小时内的调用次数
-        calls_24h = 0
-        if 'by_endpoint' in api_call_stats['last_24h'] and api_key in api_call_stats['last_24h']['by_endpoint']:
-            calls_24h = sum(api_call_stats['last_24h']['by_endpoint'][api_key].values())
-        # 计算使用百分比
-        usage_percent = (calls_24h / API_KEY_DAILY_LIMIT) * 100 if API_KEY_DAILY_LIMIT > 0 else 0
-        # 添加到结果列表
-        api_key_stats.append({
-            'api_key': api_key_id,
-            'calls_24h': calls_24h,
-            'limit': API_KEY_DAILY_LIMIT,
-            'usage_percent': round(usage_percent, 2)
-        })
-    # 按使用百分比降序排序
-    api_key_stats.sort(key=lambda x: x['usage_percent'], reverse=True)
-    # 准备模板上下文
-    context = {
-        "key_count": len(key_manager.api_keys),
-        "model_count": len(GeminiClient.AVAILABLE_MODELS),
-        "retry_count": len(key_manager.api_keys),
-        "last_24h_calls": last_24h_calls,
-        "hourly_calls": hourly_calls,
-        "minute_calls": minute_calls,
-        "max_requests_per_minute": MAX_REQUESTS_PER_MINUTE,
-        "max_requests_per_day_per_ip": MAX_REQUESTS_PER_DAY_PER_IP,
-        "current_time": datetime.now().strftime('%H:%M:%S'),
-        "logs": recent_logs,
-        # 添加版本信息
-        "local_version": local_version,
-        "remote_version": remote_version,
-        "has_update": has_update,
-        # 添加缓存信息
-        "cache_entries": total_cache,
-        "valid_cache": valid_cache,
-        "expired_cache": total_cache - valid_cache,
-        "cache_expiry_time": CACHE_EXPIRY_TIME,
-        "max_cache_entries": MAX_CACHE_ENTRIES,
-        "cache_by_model": cache_by_model,
-        "request_history_count": history_count,
-        "enable_reconnect_detection": ENABLE_RECONNECT_DETECTION,
-        "remove_cache_after_use": REMOVE_CACHE_AFTER_USE,
-        # 添加活跃请求池信息
-        "active_count": active_count,
-        "active_done": active_done,
-        "active_pending": active_pending,
-        # 添加API密钥统计
-        "api_key_stats": api_key_stats,
-    }
-    # 使用Jinja2模板引擎正确渲染HTML
     return templates.TemplateResponse("index.html", {"request": request, **context})

+from fastapi import FastAPI, HTTPException, Request, status
+from fastapi.responses import JSONResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from app.models import ErrorResponse
+from app.services import GeminiClient
+from app.utils import (
+    APIKeyManager,
+    test_api_key,
+    format_log_message,
+    log_manager,
+    ResponseCacheManager,
+    ActiveRequestsManager,
+    clean_expired_stats,
+    update_api_call_stats,
+    check_version,
+    schedule_cache_cleanup,
+    handle_exception,
+    log
+)
+from app.api import router, init_router, dashboard_router, init_dashboard_router
+from app.config.settings import (
+    FAKE_STREAMING,
+    FAKE_STREAMING_INTERVAL,
+    PASSWORD,
+    MAX_REQUESTS_PER_MINUTE,
+    MAX_REQUESTS_PER_DAY_PER_IP,
+    RETRY_DELAY,
+    MAX_RETRY_DELAY,
+    CACHE_EXPIRY_TIME,
+    MAX_CACHE_ENTRIES,
+    REMOVE_CACHE_AFTER_USE,
+    REQUEST_HISTORY_EXPIRY_TIME,
+    ENABLE_RECONNECT_DETECTION,
+    api_call_stats,
+    client_request_history,
+    local_version,
+    remote_version,
+    has_update,
+    API_KEY_DAILY_LIMIT
+)
+from app.config.safety import SAFETY_SETTINGS, SAFETY_SETTINGS_G2
+import os
+import json
+import asyncio
+import time
+import logging
+from datetime import datetime, timedelta
+import sys
+import pathlib
+# 设置模板目录
+BASE_DIR = pathlib.Path(__file__).parent
+templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
+app = FastAPI()
+# --------------- 全局实例 ---------------
+# 初始化API密钥管理器
+key_manager = APIKeyManager()
+current_api_key = key_manager.get_available_key()
+# 创建全局缓存字典，将作为缓存管理器的内部存储
+response_cache = {}
+# 初始化缓存管理器，使用全局字典作为存储
+response_cache_manager = ResponseCacheManager(
+    expiry_time=CACHE_EXPIRY_TIME,
+    max_entries=MAX_CACHE_ENTRIES,
+    remove_after_use=REMOVE_CACHE_AFTER_USE,
+    cache_dict=response_cache
+)
+# 活跃请求池 - 将作为活跃请求管理器的内部存储
+active_requests_pool = {}
+# 初始化活跃请求管理器
+active_requests_manager = ActiveRequestsManager(requests_pool=active_requests_pool)
+# --------------- 工具函数 ---------------
+def switch_api_key():
+    global current_api_key
+    key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
+    if key:
+        current_api_key = key
+        log('info', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
+    else:
+        log('error', "API key 替换失败，所有API key都已尝试，请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
+async def check_keys():
+    available_keys = []
+    for key in key_manager.api_keys:
+        is_valid = await test_api_key(key)
+        status_msg = "有效" if is_valid else "无效"
+        log('info', f"API Key {key[:10]}... {status_msg}.")
+        if is_valid:
+            available_keys.append(key)
+    if not available_keys:
+        log('error', "没有可用的 API 密钥！", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
+    return available_keys
+# 设置全局异常处理
+sys.excepthook = handle_exception
+# --------------- 事件处理 ---------------
+@app.on_event("startup")
+async def startup_event():
+    log('info', "Starting Gemini API proxy...")
+    # 启动缓存清理定时任务
+    schedule_cache_cleanup(response_cache_manager, active_requests_manager)
+    # 检查版本
+    await check_version()
+    available_keys = await check_keys()
+    if available_keys:
+        key_manager.api_keys = available_keys
+        key_manager._reset_key_stack() # 启动时也确保创建随机栈
+        key_manager.show_all_keys()
+        log('info', f"可用 API 密钥数量：{len(key_manager.api_keys)}")
+        log('info', f"最大重试次数设置为：{len(key_manager.api_keys)}")
+        if key_manager.api_keys:
+            all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
+            GeminiClient.AVAILABLE_MODELS = [model.replace(
+                "models/", "") for model in all_models]
+            log('info', "Available models loaded.")
+    # 初始化路由器
+    init_router(
+        key_manager,
+        response_cache_manager,
+        active_requests_manager,
+        SAFETY_SETTINGS,
+        SAFETY_SETTINGS_G2,
+        current_api_key,
+        FAKE_STREAMING,
+        FAKE_STREAMING_INTERVAL,
+        PASSWORD,
+        MAX_REQUESTS_PER_MINUTE,
+        MAX_REQUESTS_PER_DAY_PER_IP
+    )
+    # 初始化仪表盘路由器
+    init_dashboard_router(
+        key_manager,
+        response_cache_manager,
+        active_requests_manager
+    )
+# --------------- 异常处理 ---------------
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    from app.utils import translate_error
+    error_message = translate_error(str(exc))
+    extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
+    log('error', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
+    return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
+# --------------- 路由 ---------------
+# 包含API路由
+app.include_router(router)
+app.include_router(dashboard_router)
+@app.get("/", response_class=HTMLResponse)
+async def root(request: Request):
+    # 先清理过期数据，确保统计数据是最新的
+    clean_expired_stats(api_call_stats)
+    response_cache_manager.clean_expired()  # 使用管理器清理缓存
+    active_requests_manager.clean_completed()  # 使用管理器清理活跃请求
+    # 获取当前统计数据
+    now = datetime.now()
+    # 计算过去24小时的调用总数
+    last_24h_calls = sum(api_call_stats['last_24h']['total'].values())
+    # 计算过去一小时内的调用总数
+    one_hour_ago = now - timedelta(hours=1)
+    hourly_calls = 0
+    for hour_key, count in api_call_stats['hourly']['total'].items():
+        try:
+            hour_time = datetime.strptime(hour_key, '%Y-%m-%d %H:00')
+            if hour_time >= one_hour_ago:
+                hourly_calls += count
+        except ValueError:
+            continue
+    # 计算过去一分钟内的调用总数
+    one_minute_ago = now - timedelta(minutes=1)
+    minute_calls = 0
+    for minute_key, count in api_call_stats['minute']['total'].items():
+        try:
+            minute_time = datetime.strptime(minute_key, '%Y-%m-%d %H:%M')
+            if minute_time >= one_minute_ago:
+                minute_calls += count
+        except ValueError:
+            continue
+    # 获取最近的日志
+    recent_logs = log_manager.get_recent_logs(50)  # 获取最近50条日志
+    # 获取缓存统计
+    total_cache = len(response_cache_manager.cache)
+    valid_cache = sum(1 for _, data in response_cache_manager.cache.items()
+                     if time.time() < data.get('expiry_time', 0))
+    cache_by_model = {}
+    # 分析缓存数据
+    for _, cache_data in response_cache_manager.cache.items():
+        if time.time() < cache_data.get('expiry_time', 0):
+            # 按模型统计缓存
+            model = cache_data.get('response', {}).model
+            if model:
+                if model in cache_by_model:
+                    cache_by_model[model] += 1
+                else:
+                    cache_by_model[model] = 1
+    # 获取请求历史统计
+    history_count = len(client_request_history)
+    # 获取活跃请求统计
+    active_count = len(active_requests_manager.active_requests)
+    active_done = sum(1 for task in active_requests_manager.active_requests.values() if task.done())
+    active_pending = active_count - active_done
+    # 获取API密钥使用统计
+    api_key_stats = []
+    for api_key in key_manager.api_keys:
+        # 获取API密钥前8位作为标识
+        api_key_id = api_key[:8]
+        # 计算24小时内的调用次数
+        calls_24h = 0
+        if 'by_endpoint' in api_call_stats['last_24h'] and api_key in api_call_stats['last_24h']['by_endpoint']:
+            calls_24h = sum(api_call_stats['last_24h']['by_endpoint'][api_key].values())
+        # 计算使用百分比
+        usage_percent = (calls_24h / API_KEY_DAILY_LIMIT) * 100 if API_KEY_DAILY_LIMIT > 0 else 0
+        # 添加到结果列表
+        api_key_stats.append({
+            'api_key': api_key_id,
+            'calls_24h': calls_24h,
+            'limit': API_KEY_DAILY_LIMIT,
+            'usage_percent': round(usage_percent, 2)
+        })
+    # 按使用百分比降序排序
+    api_key_stats.sort(key=lambda x: x['usage_percent'], reverse=True)
+    # 准备模板上下文
+    context = {
+        "key_count": len(key_manager.api_keys),
+        "model_count": len(GeminiClient.AVAILABLE_MODELS),
+        "retry_count": len(key_manager.api_keys),
+        "last_24h_calls": last_24h_calls,
+        "hourly_calls": hourly_calls,
+        "minute_calls": minute_calls,
+        "max_requests_per_minute": MAX_REQUESTS_PER_MINUTE,
+        "max_requests_per_day_per_ip": MAX_REQUESTS_PER_DAY_PER_IP,
+        "current_time": datetime.now().strftime('%H:%M:%S'),
+        "logs": recent_logs,
+        # 添加版本信息
+        "local_version": local_version,
+        "remote_version": remote_version,
+        "has_update": has_update,
+        # 添加缓存信息
+        "cache_entries": total_cache,
+        "valid_cache": valid_cache,
+        "expired_cache": total_cache - valid_cache,
+        "cache_expiry_time": CACHE_EXPIRY_TIME,
+        "max_cache_entries": MAX_CACHE_ENTRIES,
+        "cache_by_model": cache_by_model,
+        "request_history_count": history_count,
+        "enable_reconnect_detection": ENABLE_RECONNECT_DETECTION,
+        "remove_cache_after_use": REMOVE_CACHE_AFTER_USE,
+        # 添加活跃请求池信息
+        "active_count": active_count,
+        "active_done": active_done,
+        "active_pending": active_pending,
+        # 添加API密钥统计
+        "api_key_stats": api_key_stats,
+    }
+    # 使用Jinja2模板引擎正确渲染HTML
     return templates.TemplateResponse("index.html", {"request": request, **context})

app/services/gemini.py CHANGED Viewed

@@ -1,346 +1,361 @@
-import requests
-import json
-import os
-import asyncio
-import time
-from app.models import ChatCompletionRequest, Message
-from dataclasses import dataclass
-from typing import Optional, Dict, Any, List
-import httpx
-import logging
-from app.utils import format_log_message
-logger = logging.getLogger('my_logger')
-# 是否启用假流式请求 默认启用
-FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
-# 假流式请求的空内容返回间隔（秒）
-FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
-@dataclass
-class GeneratedText:
-    text: str
-    finish_reason: Optional[str] = None
-class ResponseWrapper:
-    def __init__(self, data: Dict[Any, Any]):  # 正确的初始化方法名
-        self._data = data
-        self._text = self._extract_text()
-        self._finish_reason = self._extract_finish_reason()
-        self._prompt_token_count = self._extract_prompt_token_count()
-        self._candidates_token_count = self._extract_candidates_token_count()
-        self._total_token_count = self._extract_total_token_count()
-        self._thoughts = self._extract_thoughts()
-        self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
-    def _extract_thoughts(self) -> Optional[str]:
-        try:
-            for part in self._data['candidates'][0]['content']['parts']:
-                if 'thought' in part:
-                    return part['text']
-            return ""
-        except (KeyError, IndexError):
-            return ""
-    def _extract_text(self) -> str:
-        try:
-            for part in self._data['candidates'][0]['content']['parts']:
-                if 'thought' not in part:
-                    return part['text']
-            return ""
-        except (KeyError, IndexError):
-            return ""
-    def _extract_finish_reason(self) -> Optional[str]:
-        try:
-            return self._data['candidates'][0].get('finishReason')
-        except (KeyError, IndexError):
-            return None
-    def _extract_prompt_token_count(self) -> Optional[int]:
-        try:
-            return self._data['usageMetadata'].get('promptTokenCount')
-        except (KeyError):
-            return None
-    def _extract_candidates_token_count(self) -> Optional[int]:
-        try:
-            return self._data['usageMetadata'].get('candidatesTokenCount')
-        except (KeyError):
-            return None
-    def _extract_total_token_count(self) -> Optional[int]:
-        try:
-            return self._data['usageMetadata'].get('totalTokenCount')
-        except (KeyError):
-            return None
-    @property
-    def text(self) -> str:
-        return self._text
-    @property
-    def finish_reason(self) -> Optional[str]:
-        return self._finish_reason
-    @property
-    def prompt_token_count(self) -> Optional[int]:
-        return self._prompt_token_count
-    @property
-    def candidates_token_count(self) -> Optional[int]:
-        return self._candidates_token_count
-    @property
-    def total_token_count(self) -> Optional[int]:
-        return self._total_token_count
-    @property
-    def thoughts(self) -> Optional[str]:
-        return self._thoughts
-    @property
-    def json_dumps(self) -> str:
-        return self._json_dumps
-class GeminiClient:
-    AVAILABLE_MODELS = []
-    EXTRA_MODELS = os.environ.get("EXTRA_MODELS", "").split(",")
-    def __init__(self, api_key: str):
-        self.api_key = api_key
-    async def stream_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
-        extra_log = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'N/A'}
-        log_msg = format_log_message('INFO', "流式请求开始", extra=extra_log)
-        logger.info(log_msg)
-        # 检查是否启用假流式请求
-        if FAKE_STREAMING:
-            log_msg = format_log_message('INFO', "使用假流式请求模式（发送换行符保持连接）", extra=extra_log)
-            logger.info(log_msg)
-            try:
-                # 这个方法不再直接使用self.api_key，而是由main.py提供API密钥列表和管理
-                # 在这里，我们只负责持续发送换行符，直到main.py那边获取到响应
-                # 持续发送换行符，直到外部取消此生成器
-                start_time = time.time()
-                while True:
-                    # 发送换行符作为保活消息
-                    yield "\n"
-                    # 等待一段时间
-                    await asyncio.sleep(FAKE_STREAMING_INTERVAL)
-                    # 如果等待时间过长（超过300秒），防止无限等待
-                    if time.time() - start_time > 300:
-                        log_msg = format_log_message('WARNING', "假流式请求等待时间过长，强制结束", extra=extra_log)
-                        logger.warning(log_msg)
-                        # 抛出超时异常，让外部处理
-                        error_msg = "假流式请求等待时间过长，所有API密钥均已尝试"
-                        extra_log_timeout = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'TIMEOUT', 'error_message': error_msg}
-                        log_msg = format_log_message('ERROR', error_msg, extra=extra_log_timeout)
-                        logger.error(log_msg)
-                        raise TimeoutError(error_msg)
-            except Exception as e:
-                if not isinstance(e, asyncio.CancelledError):  # 忽略取消异常的日志记录
-                    error_msg = f"假流式处理期间发生错误: {str(e)}"
-                    extra_log_error = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
-                    log_msg = format_log_message('ERROR', error_msg, extra=extra_log_error)
-                    logger.error(log_msg)
-                raise e
-            finally:
-                log_msg = format_log_message('INFO', "假流式请求结束", extra=extra_log)
-                logger.info(log_msg)
-        else:
-            # 原始流式请求处理逻辑
-            api_version = "v1beta"  # 统一使用 v1beta
-            url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:streamGenerateContent?key={self.api_key}&alt=sse"
-            headers = {
-                "Content-Type": "application/json",
-            }
-            data = {
-                "contents": contents,
-                "generationConfig": {
-                    "temperature": request.temperature,
-                    "maxOutputTokens": request.max_tokens,
-                },
-                "safetySettings": safety_settings,
-            }
-            if system_instruction:
-                data["system_instruction"] = system_instruction
-            async with httpx.AsyncClient() as client:
-                async with client.stream("POST", url, headers=headers, json=data, timeout=600) as response:
-                    buffer = b""
-                    try:
-                        async for line in response.aiter_lines():
-                            if not line.strip():
-                                continue
-                            if line.startswith("data: "):
-                                line = line[len("data: "):]
-                            buffer += line.encode('utf-8')
-                            try:
-                                data = json.loads(buffer.decode('utf-8'))
-                                buffer = b""
-                                if 'candidates' in data and data['candidates']:
-                                    candidate = data['candidates'][0]
-                                    if 'content' in candidate:
-                                        content = candidate['content']
-                                        if 'parts' in content and content['parts']:
-                                            parts = content['parts']
-                                            text = ""
-                                            for part in parts:
-                                                if 'text' in part:
-                                                    text += part['text']
-                                            if text:
-                                                yield text
-                                    if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
-                                        error_msg = f"模型的响应被截断: {candidate.get('finishReason')}"
-                                        extra_log_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
-                                        log_msg = format_log_message('WARNING', error_msg, extra=extra_log_error)
-                                        logger.warning(log_msg)
-                                        raise ValueError(error_msg)
-                                    if 'safetyRatings' in candidate:
-                                        for rating in candidate['safetyRatings']:
-                                            if rating['probability'] == 'HIGH':
-                                                error_msg = f"模型的响应被截断: {rating['category']}"
-                                                extra_log_safety = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
-                                                log_msg = format_log_message('WARNING', error_msg, extra=extra_log_safety)
-                                                logger.warning(log_msg)
-                                                raise ValueError(error_msg)
-                            except json.JSONDecodeError:
-                                continue
-                            except Exception as e:
-                                error_msg = f"流式处理期间发生错误: {str(e)}"
-                                extra_log_stream_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
-                                log_msg = format_log_message('ERROR', error_msg, extra=extra_log_stream_error)
-                                logger.error(log_msg)
-                                raise e
-                    except Exception as e:
-                        raise e
-                    finally:
-                        log_msg = format_log_message('INFO', "流式请求结束", extra=extra_log)
-                        logger.info(log_msg)
-    def complete_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
-        extra_log = {'key': self.api_key[:8], 'request_type': 'non-stream', 'model': request.model, 'status_code': 'N/A'}
-        log_msg = format_log_message('INFO', "非流式请求开始", extra=extra_log)
-        logger.info(log_msg)
-        api_version = "v1beta"  # 统一使用 v1beta
-        url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:generateContent?key={self.api_key}"
-        headers = {
-            "Content-Type": "application/json",
-        }
-        data = {
-            "contents": contents,
-            "generationConfig": {
-                "temperature": request.temperature,
-                "maxOutputTokens": request.max_tokens,
-            },
-            "safetySettings": safety_settings,
-        }
-        if system_instruction:
-            data["system_instruction"] = system_instruction
-        try:
-            response = requests.post(url, headers=headers, json=data)
-            response.raise_for_status()
-            log_msg = format_log_message('INFO', "非流式请求成功完成", extra=extra_log)
-            logger.info(log_msg)
-            return ResponseWrapper(response.json())
-        except Exception as e:
-            raise
-    def convert_messages(self, messages, use_system_prompt=False):
-        gemini_history = []
-        errors = []
-        system_instruction_text = ""
-        is_system_phase = use_system_prompt
-        for i, message in enumerate(messages):
-            role = message.role
-            content = message.content
-            if isinstance(content, str):
-                if is_system_phase and role == 'system':
-                    if system_instruction_text:
-                        system_instruction_text += "\n" + content
-                    else:
-                        system_instruction_text = content
-                else:
-                    is_system_phase = False
-                    if role in ['user', 'system']:
-                        role_to_use = 'user'
-                    elif role == 'assistant':
-                        role_to_use = 'model'
-                    else:
-                        errors.append(f"Invalid role: {role}")
-                        continue
-                    if gemini_history and gemini_history[-1]['role'] == role_to_use:
-                        gemini_history[-1]['parts'].append({"text": content})
-                    else:
-                        gemini_history.append(
-                            {"role": role_to_use, "parts": [{"text": content}]})
-            elif isinstance(content, list):
-                parts = []
-                for item in content:
-                    if item.get('type') == 'text':
-                        parts.append({"text": item.get('text')})
-                    elif item.get('type') == 'image_url':
-                        image_data = item.get('image_url', {}).get('url', '')
-                        if image_data.startswith('data:image/'):
-                            try:
-                                mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
-                                parts.append({
-                                    "inline_data": {
-                                        "mime_type": mime_type,
-                                        "data": base64_data
-                                    }
-                                })
-                            except (IndexError, ValueError):
-                                errors.append(
-                                    f"Invalid data URI for image: {image_data}")
-                        else:
-                            errors.append(
-                                f"Invalid image URL format for item: {item}")
-                if parts:
-                    if role in ['user', 'system']:
-                        role_to_use = 'user'
-                    elif role == 'assistant':
-                        role_to_use = 'model'
-                    else:
-                        errors.append(f"Invalid role: {role}")
-                        continue
-                    if gemini_history and gemini_history[-1]['role'] == role_to_use:
-                        gemini_history[-1]['parts'].extend(parts)
-                    else:
-                        gemini_history.append(
-                            {"role": role_to_use, "parts": parts})
-        if errors:
-            return errors
-        else:
-            return gemini_history, {"parts": [{"text": system_instruction_text}]}
-    @staticmethod
-    async def list_available_models(api_key) -> list:
-        url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(
-            api_key)
-        async with httpx.AsyncClient() as client:
-            response = await client.get(url)
-            response.raise_for_status()
-            data = response.json()
-            models = [model["name"] for model in data.get("models", [])]
-            models.extend(GeminiClient.EXTRA_MODELS)
             return models

+import requests
+import json
+import os
+import asyncio
+import time
+from app.models import ChatCompletionRequest, Message
+from dataclasses import dataclass
+from typing import Optional, Dict, Any, List
+import httpx
+import logging
+import secrets
+import string
+from app.utils import format_log_message
+from app.config.settings import (
+    RANDOM_STRING,
+    RANDOM_STRING_LENGTH
+)
+def generate_secure_random_string(length):
+    all_characters = string.ascii_letters + string.digits
+    secure_random_string = ''.join(secrets.choice(all_characters) for _ in range(length))
+    return secure_random_string
+logger = logging.getLogger('my_logger')
+# 是否启用假流式请求 默认启用
+FAKE_STREAMING = os.environ.get("FAKE_STREAMING", "true").lower() in ["true", "1", "yes"]
+# 假流式请求的空内容返回间隔（秒）
+FAKE_STREAMING_INTERVAL = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1"))
+@dataclass
+class GeneratedText:
+    text: str
+    finish_reason: Optional[str] = None
+class ResponseWrapper:
+    def __init__(self, data: Dict[Any, Any]):  # 正确的初始化方法名
+        self._data = data
+        self._text = self._extract_text()
+        self._finish_reason = self._extract_finish_reason()
+        self._prompt_token_count = self._extract_prompt_token_count()
+        self._candidates_token_count = self._extract_candidates_token_count()
+        self._total_token_count = self._extract_total_token_count()
+        self._thoughts = self._extract_thoughts()
+        self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
+    def _extract_thoughts(self) -> Optional[str]:
+        try:
+            for part in self._data['candidates'][0]['content']['parts']:
+                if 'thought' in part:
+                    return part['text']
+            return ""
+        except (KeyError, IndexError):
+            return ""
+    def _extract_text(self) -> str:
+        try:
+            for part in self._data['candidates'][0]['content']['parts']:
+                if 'thought' not in part:
+                    return part['text']
+            return ""
+        except (KeyError, IndexError):
+            return ""
+    def _extract_finish_reason(self) -> Optional[str]:
+        try:
+            return self._data['candidates'][0].get('finishReason')
+        except (KeyError, IndexError):
+            return None
+    def _extract_prompt_token_count(self) -> Optional[int]:
+        try:
+            return self._data['usageMetadata'].get('promptTokenCount')
+        except (KeyError):
+            return None
+    def _extract_candidates_token_count(self) -> Optional[int]:
+        try:
+            return self._data['usageMetadata'].get('candidatesTokenCount')
+        except (KeyError):
+            return None
+    def _extract_total_token_count(self) -> Optional[int]:
+        try:
+            return self._data['usageMetadata'].get('totalTokenCount')
+        except (KeyError):
+            return None
+    @property
+    def text(self) -> str:
+        return self._text
+    @property
+    def finish_reason(self) -> Optional[str]:
+        return self._finish_reason
+    @property
+    def prompt_token_count(self) -> Optional[int]:
+        return self._prompt_token_count
+    @property
+    def candidates_token_count(self) -> Optional[int]:
+        return self._candidates_token_count
+    @property
+    def total_token_count(self) -> Optional[int]:
+        return self._total_token_count
+    @property
+    def thoughts(self) -> Optional[str]:
+        return self._thoughts
+    @property
+    def json_dumps(self) -> str:
+        return self._json_dumps
+class GeminiClient:
+    AVAILABLE_MODELS = []
+    EXTRA_MODELS = os.environ.get("EXTRA_MODELS", "").split(",")
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+    async def stream_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
+        extra_log = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'N/A'}
+        log_msg = format_log_message('INFO', "流式请求开始", extra=extra_log)
+        logger.info(log_msg)
+        # 检查是否启用假流式请求
+        if FAKE_STREAMING:
+            log_msg = format_log_message('INFO', "使用假流式请求模式（发送换行符保持连接）", extra=extra_log)
+            logger.info(log_msg)
+            try:
+                # 这个方法不再直接使用self.api_key，而是由main.py提供API密钥列表和管理
+                # 在这里，我们只负责持续发送换行符，直到main.py那边获取到响应
+                # 持续发送换行符，直到外部取消此生成器
+                start_time = time.time()
+                while True:
+                    # 发送换行符作为保活消息
+                    yield "\n"
+                    # 等待一段时间
+                    await asyncio.sleep(FAKE_STREAMING_INTERVAL)
+                    # 如果等待时间过长（超过300秒），防止无限等待
+                    if time.time() - start_time > 300:
+                        log_msg = format_log_message('WARNING', "假流式请求等待时间过长，强制结束", extra=extra_log)
+                        logger.warning(log_msg)
+                        # 抛出超时异常，让外部处理
+                        error_msg = "假流式请求等待时间过长，所有API密钥均已尝试"
+                        extra_log_timeout = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'TIMEOUT', 'error_message': error_msg}
+                        log_msg = format_log_message('ERROR', error_msg, extra=extra_log_timeout)
+                        logger.error(log_msg)
+                        raise TimeoutError(error_msg)
+            except Exception as e:
+                if not isinstance(e, asyncio.CancelledError):  # 忽略取消异常的日志记录
+                    error_msg = f"假流式处理期间发生错误: {str(e)}"
+                    extra_log_error = {'key': self.api_key[:8], 'request_type': 'fake-stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
+                    log_msg = format_log_message('ERROR', error_msg, extra=extra_log_error)
+                    logger.error(log_msg)
+                raise e
+            finally:
+                log_msg = format_log_message('INFO', "假流式请求结束", extra=extra_log)
+                logger.info(log_msg)
+        else:
+            # 原始流式请求处理逻辑
+            api_version = "v1alpha" if "think" in request.model else "v1beta"
+            url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:streamGenerateContent?key={self.api_key}&alt=sse"
+            headers = {
+                "Content-Type": "application/json",
+            }
+            data = {
+                "contents": contents,
+                "generationConfig": {
+                    "temperature": request.temperature,
+                    "maxOutputTokens": request.max_tokens,
+                },
+                "safetySettings": safety_settings,
+            }
+            if system_instruction:
+                data["system_instruction"] = system_instruction
+            async with httpx.AsyncClient() as client:
+                async with client.stream("POST", url, headers=headers, json=data, timeout=600) as response:
+                    buffer = b""
+                    try:
+                        async for line in response.aiter_lines():
+                            if not line.strip():
+                                continue
+                            if line.startswith("data: "):
+                                line = line[len("data: "):]
+                            buffer += line.encode('utf-8')
+                            try:
+                                data = json.loads(buffer.decode('utf-8'))
+                                buffer = b""
+                                if 'candidates' in data and data['candidates']:
+                                    candidate = data['candidates'][0]
+                                    if 'content' in candidate:
+                                        content = candidate['content']
+                                        if 'parts' in content and content['parts']:
+                                            parts = content['parts']
+                                            text = ""
+                                            for part in parts:
+                                                if 'text' in part:
+                                                    text += part['text']
+                                            if text:
+                                                yield text
+                                    if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
+                                        error_msg = f"模型的响应被截断: {candidate.get('finishReason')}"
+                                        extra_log_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
+                                        log_msg = format_log_message('WARNING', error_msg, extra=extra_log_error)
+                                        logger.warning(log_msg)
+                                        raise ValueError(error_msg)
+                                    if 'safetyRatings' in candidate:
+                                        for rating in candidate['safetyRatings']:
+                                            if rating['probability'] == 'HIGH':
+                                                error_msg = f"模型的响应被截断: {rating['category']}"
+                                                extra_log_safety = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
+                                                log_msg = format_log_message('WARNING', error_msg, extra=extra_log_safety)
+                                                logger.warning(log_msg)
+                                                raise ValueError(error_msg)
+                            except json.JSONDecodeError:
+                                continue
+                            except Exception as e:
+                                error_msg = f"流式处理期间发生错误: {str(e)}"
+                                extra_log_stream_error = {'key': self.api_key[:8], 'request_type': 'stream', 'model': request.model, 'status_code': 'ERROR', 'error_message': error_msg}
+                                log_msg = format_log_message('ERROR', error_msg, extra=extra_log_stream_error)
+                                logger.error(log_msg)
+                                raise e
+                    except Exception as e:
+                        raise e
+                    finally:
+                        log_msg = format_log_message('INFO', "流式请求结束", extra=extra_log)
+                        logger.info(log_msg)
+    def complete_chat(self, request: ChatCompletionRequest, contents, safety_settings, system_instruction):
+        extra_log = {'key': self.api_key[:8], 'request_type': 'non-stream', 'model': request.model, 'status_code': 'N/A'}
+        log_msg = format_log_message('INFO', "非流式请求开始", extra=extra_log)
+        logger.info(log_msg)
+        api_version = "v1alpha" if "think" in request.model else "v1beta"
+        url = f"https://generativelanguage.googleapis.com/{api_version}/models/{request.model}:generateContent?key={self.api_key}"
+        headers = {
+            "Content-Type": "application/json",
+        }
+        data = {
+            "contents": contents,
+            "generationConfig": {
+                "temperature": request.temperature,
+                "maxOutputTokens": request.max_tokens,
+            },
+            "safetySettings": safety_settings,
+        }
+        if system_instruction:
+            data["system_instruction"] = system_instruction
+        try:
+            response = requests.post(url, headers=headers, json=data)
+            response.raise_for_status()
+            log_msg = format_log_message('INFO', "非流式请求成功完成", extra=extra_log)
+            logger.info(log_msg)
+            return ResponseWrapper(response.json())
+        except Exception as e:
+            raise
+    def convert_messages(self, messages, use_system_prompt=False):
+        gemini_history = []
+        errors = []
+        system_instruction_text = ""
+        is_system_phase = use_system_prompt
+        for i, message in enumerate(messages):
+            role = message.role
+            content = message.content
+            if isinstance(content, str):
+                if is_system_phase and role == 'system':
+                    if system_instruction_text:
+                        system_instruction_text += "\n" + content
+                    else:
+                        system_instruction_text = content
+                else:
+                    is_system_phase = False
+                    if role in ['user', 'system']:
+                        role_to_use = 'user'
+                    elif role == 'assistant':
+                        role_to_use = 'model'
+                    else:
+                        errors.append(f"Invalid role: {role}")
+                        continue
+                    if gemini_history and gemini_history[-1]['role'] == role_to_use:
+                        gemini_history[-1]['parts'].append({"text": content})
+                    else:
+                        gemini_history.append(
+                            {"role": role_to_use, "parts": [{"text": content}]})
+            elif isinstance(content, list):
+                parts = []
+                for item in content:
+                    if item.get('type') == 'text':
+                        parts.append({"text": item.get('text')})
+                    elif item.get('type') == 'image_url':
+                        image_data = item.get('image_url', {}).get('url', '')
+                        if image_data.startswith('data:image/'):
+                            try:
+                                mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
+                                parts.append({
+                                    "inline_data": {
+                                        "mime_type": mime_type,
+                                        "data": base64_data
+                                    }
+                                })
+                            except (IndexError, ValueError):
+                                errors.append(
+                                    f"Invalid data URI for image: {image_data}")
+                        else:
+                            errors.append(
+                                f"Invalid image URL format for item: {item}")
+                if parts:
+                    if role in ['user', 'system']:
+                        role_to_use = 'user'
+                    elif role == 'assistant':
+                        role_to_use = 'model'
+                    else:
+                        errors.append(f"Invalid role: {role}")
+                        continue
+                    if gemini_history and gemini_history[-1]['role'] == role_to_use:
+                        gemini_history[-1]['parts'].extend(parts)
+                    else:
+                        gemini_history.append(
+                            {"role": role_to_use, "parts": parts})
+        if errors:
+            return errors
+        else:
+            if RANDOM_STRING:
+                gemini_history.insert(1,{'role': 'user', 'parts': [{'text': generate_secure_random_string(RANDOM_STRING_LENGTH)}]})
+                gemini_history.insert(len(gemini_history)-1,{'role': 'user', 'parts': [{'text': generate_secure_random_string(RANDOM_STRING_LENGTH)}]})
+                log_msg = format_log_message('INFO', "伪装消息成功")
+                logger.info(log_msg)
+            return gemini_history, {"parts": [{"text": system_instruction_text}]}
+    @staticmethod
+    async def list_available_models(api_key) -> list:
+        url = "https://generativelanguage.googleapis.com/v1beta/models?key={}".format(
+            api_key)
+        async with httpx.AsyncClient() as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            data = response.json()
+            models = [model["name"] for model in data.get("models", [])]
+            models.extend(GeminiClient.EXTRA_MODELS)
             return models