| | from fastapi import APIRouter, HTTPException, Request, Depends, status |
| | from fastapi.responses import JSONResponse, StreamingResponse |
| | from app.models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList |
| | from app.services import GeminiClient |
| | from app.utils import generate_cache_key |
| |
|
| | from app.config.settings import ( |
| | api_call_stats, |
| | BLOCKED_MODELS |
| | ) |
| | import asyncio |
| | import time |
| | from app.utils.logging import log |
| |
|
| | |
| | from .auth import verify_password |
| | from .request_handlers import process_request |
| |
|
| | |
| | router = APIRouter() |
| |
|
| | |
| | key_manager = None |
| | response_cache_manager = None |
| | active_requests_manager = None |
| | safety_settings = None |
| | safety_settings_g2 = None |
| | current_api_key = None |
| | FAKE_STREAMING = None |
| | FAKE_STREAMING_INTERVAL = None |
| | PASSWORD = None |
| | MAX_REQUESTS_PER_MINUTE = None |
| | MAX_REQUESTS_PER_DAY_PER_IP = None |
| |
|
| | |
| | def init_router( |
| | _key_manager, |
| | _response_cache_manager, |
| | _active_requests_manager, |
| | _safety_settings, |
| | _safety_settings_g2, |
| | _current_api_key, |
| | _fake_streaming, |
| | _fake_streaming_interval, |
| | _password, |
| | _max_requests_per_minute, |
| | _max_requests_per_day_per_ip |
| | ): |
| | global key_manager, response_cache_manager, active_requests_manager |
| | global safety_settings, safety_settings_g2, current_api_key |
| | global FAKE_STREAMING, FAKE_STREAMING_INTERVAL |
| | global PASSWORD, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP |
| | |
| | key_manager = _key_manager |
| | response_cache_manager = _response_cache_manager |
| | active_requests_manager = _active_requests_manager |
| | safety_settings = _safety_settings |
| | safety_settings_g2 = _safety_settings_g2 |
| | current_api_key = _current_api_key |
| | FAKE_STREAMING = _fake_streaming |
| | FAKE_STREAMING_INTERVAL = _fake_streaming_interval |
| | PASSWORD = _password |
| | MAX_REQUESTS_PER_MINUTE = _max_requests_per_minute |
| | MAX_REQUESTS_PER_DAY_PER_IP = _max_requests_per_day_per_ip |
| |
|
| | |
| | async def custom_verify_password(request: Request): |
| | await verify_password(request, PASSWORD) |
| |
|
| | |
| | @router.get("/v1/models", response_model=ModelList) |
| | def list_models(): |
| | log('info', "Received request to list models", extra={'request_type': 'list_models', 'status_code': 200}) |
| | filtered_models = [model for model in GeminiClient.AVAILABLE_MODELS if model not in BLOCKED_MODELS] |
| | return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in filtered_models]) |
| |
|
| | @router.post("/v1/chat/completions", response_model=ChatCompletionResponse) |
| | async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(custom_verify_password)): |
| | |
| | client_ip = http_request.client.host if http_request.client else "unknown" |
| | |
| | if request.stream: |
| | return await process_request( |
| | request, |
| | http_request, |
| | "stream", |
| | key_manager, |
| | response_cache_manager, |
| | active_requests_manager, |
| | safety_settings, |
| | safety_settings_g2, |
| | api_call_stats, |
| | FAKE_STREAMING, |
| | FAKE_STREAMING_INTERVAL, |
| | MAX_REQUESTS_PER_MINUTE, |
| | MAX_REQUESTS_PER_DAY_PER_IP |
| | ) |
| | |
| | cache_key = generate_cache_key(request) |
| | |
| | |
| | log('info', f"请求缓存键: {cache_key[:8]}...", |
| | extra={'cache_key': cache_key[:8], 'request_type': 'non-stream'}) |
| | |
| | |
| | cached_response, cache_hit = response_cache_manager.get(cache_key) |
| | if cache_hit: |
| | |
| | log('info', f"精确缓存命中: {cache_key[:8]}...", |
| | extra={'cache_operation': 'hit', 'request_type': 'non-stream'}) |
| | |
| | |
| | active_requests_manager.remove_by_prefix(f"cache:{cache_key}") |
| | |
| | |
| | if cache_key in response_cache_manager.cache: |
| | del response_cache_manager.cache[cache_key] |
| | log('info', f"缓存使用后已删除: {cache_key[:8]}...", |
| | extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'}) |
| | |
| | |
| | return cached_response |
| | |
| | |
| | pool_key = f"cache:{cache_key}" |
| | |
| | |
| | active_task = active_requests_manager.get(pool_key) |
| | if active_task and not active_task.done(): |
| | log('info', f"发现相同请求的进行中任务", |
| | extra={'request_type': 'non-stream', 'model': request.model}) |
| | |
| | |
| | try: |
| | |
| | await asyncio.wait_for(active_task, timeout=180) |
| | |
| | |
| | cached_response, cache_hit = response_cache_manager.get(cache_key) |
| | if cache_hit: |
| | |
| | if cache_key in response_cache_manager.cache: |
| | del response_cache_manager.cache[cache_key] |
| | log('info', f"使用已完成任务的缓存后删除: {cache_key[:8]}...", |
| | extra={'cache_operation': 'used-and-removed', 'request_type': 'non-stream'}) |
| | |
| | return cached_response |
| | |
| | |
| | if active_task.done() and not active_task.cancelled(): |
| | result = active_task.result() |
| | if result: |
| | |
| | |
| | new_response = ChatCompletionResponse( |
| | id=f"chatcmpl-{int(time.time()*1000)}", |
| | object="chat.completion", |
| | created=int(time.time()), |
| | model=result.model, |
| | choices=result.choices |
| | ) |
| | |
| | |
| | return new_response |
| | except (asyncio.TimeoutError, asyncio.CancelledError) as e: |
| | |
| | error_type = "超时" if isinstance(e, asyncio.TimeoutError) else "被取消" |
| | log('warning', f"等待已有任务{error_type}: {pool_key}", |
| | extra={'request_type': 'non-stream', 'model': request.model}) |
| | |
| | |
| | if active_task.done() or active_task.cancelled(): |
| | active_requests_manager.remove(pool_key) |
| | log('info', f"已从活跃请求池移除{error_type}任务: {pool_key}", |
| | extra={'request_type': 'non-stream'}) |
| | |
| | |
| | process_task = asyncio.create_task( |
| | process_request( |
| | request, |
| | http_request, |
| | "non-stream", |
| | key_manager, |
| | response_cache_manager, |
| | active_requests_manager, |
| | safety_settings, |
| | safety_settings_g2, |
| | api_call_stats, |
| | FAKE_STREAMING, |
| | FAKE_STREAMING_INTERVAL, |
| | MAX_REQUESTS_PER_MINUTE, |
| | MAX_REQUESTS_PER_DAY_PER_IP, |
| | cache_key, |
| | client_ip |
| | ) |
| | ) |
| | |
| | |
| | active_requests_manager.add(pool_key, process_task) |
| | |
| | |
| | try: |
| | response = await process_task |
| | return response |
| | except Exception as e: |
| | |
| | active_requests_manager.remove(pool_key) |
| | |
| | |
| | cached_response, cache_hit = response_cache_manager.get(cache_key) |
| | if cache_hit: |
| | log('info', f"任务失败但找到缓存,使用缓存结果: {cache_key[:8]}...", |
| | extra={'request_type': 'non-stream', 'model': request.model}) |
| | return cached_response |
| | |
| | |
| | raise |