Spaces:

devme
/

aqpa

Paused

App Files Files Community

devme commited on Nov 23, 2025

Commit

bc43157

verified ·

1 Parent(s): e51ffd6

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -472

app.py DELETED Viewed

@@ -1,472 +0,0 @@
-import json
-import os
-import traceback
-import uuid
-import time
-import asyncio
-import importlib.util
-from pathlib import Path
-from typing import Dict, Optional, List, Any, AsyncGenerator, Tuple
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, Depends, HTTPException, Header
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse, RedirectResponse
-from dotenv import load_dotenv
-import httpx
-import hashlib
-from utils import get_proxies, create_proxy_mounts
-# ------------------------------------------------------------------------------
-# Bootstrap
-# ------------------------------------------------------------------------------
-BASE_DIR = Path(__file__).resolve().parent
-load_dotenv(BASE_DIR / ".env")
-app = FastAPI(title="v2 OpenAI-compatible Server (Amazon Q Backend)")
-# CORS for simple testing in browser
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# ------------------------------------------------------------------------------
-# Dynamic import of replicate.py to avoid package __init__ needs
-# ------------------------------------------------------------------------------
-def _load_replicate_module():
-    mod_path = BASE_DIR / "replicate.py"
-    spec = importlib.util.spec_from_file_location("v2_replicate", str(mod_path))
-    module = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
-    assert spec is not None and spec.loader is not None
-    spec.loader.exec_module(module)  # type: ignore[attr-defined]
-    return module
-_replicate = _load_replicate_module()
-send_chat_request = _replicate.send_chat_request
-# ------------------------------------------------------------------------------
-# Dynamic import of Claude modules
-# ------------------------------------------------------------------------------
-def _load_claude_modules():
-    # claude_types
-    spec_types = importlib.util.spec_from_file_location("v2_claude_types", str(BASE_DIR / "claude_types.py"))
-    mod_types = importlib.util.module_from_spec(spec_types)
-    spec_types.loader.exec_module(mod_types)
-    # claude_converter
-    spec_conv = importlib.util.spec_from_file_location("v2_claude_converter", str(BASE_DIR / "claude_converter.py"))
-    mod_conv = importlib.util.module_from_spec(spec_conv)
-    import sys
-    sys.modules["v2.claude_types"] = mod_types
-    spec_conv.loader.exec_module(mod_conv)
-    # claude_stream
-    spec_stream = importlib.util.spec_from_file_location("v2_claude_stream", str(BASE_DIR / "claude_stream.py"))
-    mod_stream = importlib.util.module_from_spec(spec_stream)
-    spec_stream.loader.exec_module(mod_stream)
-    return mod_types, mod_conv, mod_stream
-_claude_types, _claude_converter, _claude_stream = _load_claude_modules()
-ClaudeRequest = _claude_types.ClaudeRequest
-convert_claude_to_amazonq_request = _claude_converter.convert_claude_to_amazonq_request
-ClaudeStreamHandler = _claude_stream.ClaudeStreamHandler
-# ------------------------------------------------------------------------------
-# Global HTTP Client
-# ------------------------------------------------------------------------------
-GLOBAL_CLIENT: Optional[httpx.AsyncClient] = None
-async def _init_global_client():
-    global GLOBAL_CLIENT
-    mounts = create_proxy_mounts()
-    # Increased limits for high concurrency with streaming
-    # max_connections: 总连接数上限
-    # max_keepalive_connections: 保持活跃的连接数
-    # keepalive_expiry: 连接保持时间
-    limits = httpx.Limits(
-        max_keepalive_connections=60,
-        max_connections=60,  # 提高到500以支持更高并发
-        keepalive_expiry=30.0  # 30秒后释放空闲连接
-    )
-    # 为流式响应设置更长的超时
-    timeout = httpx.Timeout(
-        connect=30.0,  # 连接超时，TLS 握手需要足够时间
-        read=300.0,    # 读取超时(流式响应需要更长时间)
-        write=30.0,    # 写入超时
-        pool=10.0      # 从连接池获取连接的超时时间
-    )
-    # 只在有代理时才传递 mounts 参数
-    if mounts:
-        GLOBAL_CLIENT = httpx.AsyncClient(mounts=mounts, timeout=timeout, limits=limits)
-    else:
-        GLOBAL_CLIENT = httpx.AsyncClient(timeout=timeout, limits=limits)
-async def _close_global_client():
-    global GLOBAL_CLIENT
-    if GLOBAL_CLIENT:
-        await GLOBAL_CLIENT.aclose()
-        GLOBAL_CLIENT = None
-# ------------------------------------------------------------------------------
-# Token 缓存和管理
-# ------------------------------------------------------------------------------
-# 内存缓存: {hash: {accessToken, refreshToken, clientId, clientSecret, lastRefresh}}
-TOKEN_MAP: Dict[str, Dict[str, Any]] = {}
-def _sha256(text: str) -> str:
-    """计算 SHA256 哈希"""
-    return hashlib.sha256(text.encode()).hexdigest()
-def _parse_bearer_token(bearer_token: str) -> Tuple[str, str, str]:
-    """
-    ��析 Bearer token: clientId:clientSecret:refreshToken
-    重要: refreshToken 中可能包含冒号,所以要正确处理
-    """
-    temp_array = bearer_token.split(":")
-    client_id = temp_array[0] if len(temp_array) > 0 else ""
-    client_secret = temp_array[1] if len(temp_array) > 1 else ""
-    refresh_token = ":".join(temp_array[2:]) if len(temp_array) > 2 else ""
-    return client_id, client_secret, refresh_token
-async def _handle_token_refresh(client_id: str, client_secret: str, refresh_token: str) -> Optional[str]:
-    """刷新 access token"""
-    payload = {
-        "grantType": "refresh_token",
-        "clientId": client_id,
-        "clientSecret": client_secret,
-        "refreshToken": refresh_token,
-    }
-    try:
-        client = GLOBAL_CLIENT
-        if not client:
-            async with httpx.AsyncClient(timeout=60.0) as temp_client:
-                r = await temp_client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
-                r.raise_for_status()
-                data = r.json()
-        else:
-            r = await client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
-            r.raise_for_status()
-            data = r.json()
-        return data.get("accessToken")
-    except httpx.HTTPStatusError as e:
-        print(f"Token refresh HTTP error: {e.response.status_code} - {e.response.text}")
-        traceback.print_exc()
-        return None
-    except Exception as e:
-        print(f"Token refresh error: {e}")
-        traceback.print_exc()
-        return None
-# ------------------------------------------------------------------------------
-# 全局 Token 刷新器
-# ------------------------------------------------------------------------------
-async def _global_token_refresher():
-    """全局刷新器: 每 45 分钟刷新所有缓存的 token"""
-    while True:
-        try:
-            await asyncio.sleep(45 * 60)  # 45 minutes
-            if not TOKEN_MAP:
-                continue
-            print(f"[Token Refresher] Starting token refresh cycle...")
-            refresh_count = 0
-            for hash_key, token_data in list(TOKEN_MAP.items()):
-                try:
-                    new_token = await _handle_token_refresh(
-                        token_data["clientId"],
-                        token_data["clientSecret"],
-                        token_data["refreshToken"]
-                    )
-                    if new_token:
-                        TOKEN_MAP[hash_key]["accessToken"] = new_token
-                        TOKEN_MAP[hash_key]["lastRefresh"] = time.time()
-                        refresh_count += 1
-                    else:
-                        print(f"[Token Refresher] Failed to refresh token for hash: {hash_key[:8]}...")
-                except Exception as e:
-                    print(f"[Token Refresher] Exception refreshing token: {e}")
-                    traceback.print_exc()
-            print(f"[Token Refresher] Refreshed {refresh_count}/{len(TOKEN_MAP)} tokens")
-        except Exception:
-            traceback.print_exc()
-            await asyncio.sleep(60)  # 发生异常时等待 1 分钟后重试
-# ------------------------------------------------------------------------------
-# Token refresh (OIDC)
-# ------------------------------------------------------------------------------
-OIDC_BASE = "https://oidc.us-east-1.amazonaws.com"
-TOKEN_URL = f"{OIDC_BASE}/token"
-def _oidc_headers() -> Dict[str, str]:
-    return {
-        "content-type": "application/json",
-        "user-agent": "aws-sdk-rust/1.3.9 os/windows lang/rust/1.87.0",
-        "x-amz-user-agent": "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/windows lang/rust/1.87.0 m/E app/AmazonQ-For-CLI",
-        "amz-sdk-request": "attempt=1; max=3",
-        "amz-sdk-invocation-id": str(uuid.uuid4()),
-    }
-# ------------------------------------------------------------------------------
-# 认证中间件
-# ------------------------------------------------------------------------------
-async def auth_middleware(
-    authorization: Optional[str] = Header(default=None),
-    x_api_key: Optional[str] = Header(default=None, alias="x-api-key")
-) -> Dict[str, Any]:
-    """
-    认证中间件: 支持 OpenAI Bearer token 和 Claude x-api-key
-    Token 格式: clientId:clientSecret:refreshToken
-    """
-    # 优先使用 x-api-key (Claude 格式)
-    token = x_api_key if x_api_key else None
-    # 如果没有 x-api-key，尝试从 Authorization header 获取 (OpenAI 格式)
-    if not token and authorization and authorization.startswith("Bearer "):
-        token = authorization[7:]
-    if not token:
-        raise HTTPException(status_code=401, detail="Missing authentication. Provide Authorization header or x-api-key")
-    token_hash = _sha256(token)
-    # 检查缓存
-    if token_hash in TOKEN_MAP:
-        return {
-            "accessToken": TOKEN_MAP[token_hash]["accessToken"],
-            "clientId": TOKEN_MAP[token_hash]["clientId"],
-            "clientSecret": TOKEN_MAP[token_hash]["clientSecret"],
-            "refreshToken": TOKEN_MAP[token_hash]["refreshToken"],
-        }
-    # 解析 token
-    client_id, client_secret, refresh_token = _parse_bearer_token(token)
-    if not client_id or not client_secret or not refresh_token:
-        raise HTTPException(status_code=401, detail="Invalid token format. Expected: clientId:clientSecret:refreshToken")
-    # 刷新 token
-    access_token = await _handle_token_refresh(client_id, client_secret, refresh_token)
-    if not access_token:
-        raise HTTPException(status_code=401, detail="Failed to refresh access token")
-    # 缓存
-    TOKEN_MAP[token_hash] = {
-        "accessToken": access_token,
-        "refreshToken": refresh_token,
-        "clientId": client_id,
-        "clientSecret": client_secret,
-        "lastRefresh": time.time()
-    }
-    return {
-        "accessToken": access_token,
-        "clientId": client_id,
-        "clientSecret": client_secret,
-        "refreshToken": refresh_token,
-    }
-# ------------------------------------------------------------------------------
-# Dependencies
-# ------------------------------------------------------------------------------
-async def require_account(
-    authorization: Optional[str] = Header(default=None),
-    x_api_key: Optional[str] = Header(default=None, alias="x-api-key")
-) -> Dict[str, Any]:
-    return await auth_middleware(authorization, x_api_key)
-# ------------------------------------------------------------------------------
-# Root endpoint
-# ------------------------------------------------------------------------------
-@app.get("/")
-async def root():
-    return RedirectResponse(url="https://www.bilibili.com/video/BV1SMH5zfEwe/?spm_id_from=333.337.search-card.all.click&vd_source=1f3b8eb28230105c578a443fa6481550")
-# ------------------------------------------------------------------------------
-# Claude Messages API endpoint
-# ------------------------------------------------------------------------------
-@app.post("/v1/messages")
-async def claude_messages(req: ClaudeRequest, account: Dict[str, Any] = Depends(require_account)):
-    """
-    Claude-compatible messages endpoint.
-    """
-    # 1. Convert request
-    try:
-        aq_request = convert_claude_to_amazonq_request(req)
-    except Exception as e:
-        traceback.print_exc()
-        raise HTTPException(status_code=400, detail=f"Request conversion failed: {str(e)}")
-    # 2. Send upstream - always stream from upstream to get full event details
-    try:
-        access = account.get("accessToken")
-        if not access:
-            raise HTTPException(status_code=502, detail="Access token unavailable")
-        # We call with stream=True to get the event iterator
-        _, _, tracker, event_iter = await send_chat_request(
-            access_token=access,
-            messages=[],
-            model=req.model,
-            stream=True,
-            client=GLOBAL_CLIENT,
-            raw_payload=aq_request
-        )
-        if not event_iter:
-             raise HTTPException(status_code=502, detail="No event stream returned")
-        # Handler
-        # Estimate input tokens (simple count or 0)
-        # For now 0 or simple len
-        input_tokens = 0
-        handler = ClaudeStreamHandler(model=req.model, input_tokens=input_tokens)
-        async def event_generator():
-            try:
-                async for event_type, payload in event_iter:
-                    async for sse in handler.handle_event(event_type, payload):
-                        yield sse
-                async for sse in handler.finish():
-                    yield sse
-            except GeneratorExit:
-                # Client disconnected
-                raise
-            except Exception:
-                raise
-        if req.stream:
-            return StreamingResponse(event_generator(), media_type="text/event-stream")
-        else:
-            # Accumulate for non-streaming
-            # This is a bit complex because we need to reconstruct the full response object
-            # For now, let's just support streaming as it's the main use case for Claude Code
-            # But to be nice, let's try to support non-streaming by consuming the generator
-            content_blocks = []
-            usage = {"input_tokens": 0, "output_tokens": 0}
-            stop_reason = None
-            # We need to parse the SSE strings back to objects... inefficient but works
-            # Or we could refactor handler to yield objects.
-            # For now, let's just raise error for non-streaming or implement basic text
-            # Claude Code uses streaming.
-            # Let's implement a basic accumulator from the SSE stream
-            final_content = []
-            async for sse_line in event_generator():
-                if sse_line.startswith("data: "):
-                    data_str = sse_line[6:].strip()
-                    if data_str == "[DONE]": continue
-                    try:
-                        data = json.loads(data_str)
-                        dtype = data.get("type")
-                        if dtype == "content_block_start":
-                            idx = data.get("index", 0)
-                            while len(final_content) <= idx:
-                                final_content.append(None)
-                            final_content[idx] = data.get("content_block")
-                        elif dtype == "content_block_delta":
-                            idx = data.get("index", 0)
-                            delta = data.get("delta", {})
-                            if final_content[idx]:
-                                if delta.get("type") == "text_delta":
-                                    final_content[idx]["text"] += delta.get("text", "")
-                                elif delta.get("type") == "input_json_delta":
-                                    # We need to accumulate partial json
-                                    # But wait, content_block for tool_use has 'input' as dict?
-                                    # No, in start it is empty.
-                                    # We need to track partial json string
-                                    if "partial_json" not in final_content[idx]:
-                                        final_content[idx]["partial_json"] = ""
-                                    final_content[idx]["partial_json"] += delta.get("partial_json", "")
-                        elif dtype == "content_block_stop":
-                            idx = data.get("index", 0)
-                            # If tool use, parse json
-                            if final_content[idx] and final_content[idx]["type"] == "tool_use":
-                                if "partial_json" in final_content[idx]:
-                                    try:
-                                        final_content[idx]["input"] = json.loads(final_content[idx]["partial_json"])
-                                    except:
-                                        pass
-                                    del final_content[idx]["partial_json"]
-                        elif dtype == "message_delta":
-                            usage = data.get("usage", usage)
-                            stop_reason = data.get("delta", {}).get("stop_reason")
-                    except:
-                        pass
-            return {
-                "id": f"msg_{uuid.uuid4()}",
-                "type": "message",
-                "role": "assistant",
-                "model": req.model,
-                "content": [c for c in final_content if c is not None],
-                "stop_reason": stop_reason,
-                "stop_sequence": None,
-                "usage": usage
-            }
-    except Exception as e:
-        raise
-# ------------------------------------------------------------------------------
-# Startup / Shutdown Events
-# ------------------------------------------------------------------------------
-async def _startup():
-    """初始化全局客户端和启动后台任务"""
-    await _init_global_client()
-    asyncio.create_task(_global_token_refresher())
-async def _shutdown():
-    """清理资源"""
-    await _close_global_client()
-# 更新 lifespan 上下文管理器使用实际的启动/关闭逻辑
-@asynccontextmanager
-async def lifespan(app_instance: FastAPI):
-    """
-    管理应用生命周期事件
-    启动时初始化数据库和后台任务,关闭时清理资源
-    """
-    await _startup()
-    yield
-    await _shutdown()
-# 将 lifespan 设置到 app
-app.router.lifespan_context = lifespan
-# ------------------------------------------------------------------------------
-# 直接运行支持
-# ------------------------------------------------------------------------------
-if __name__ == "__main__":
-    import uvicorn
-    port = int(os.getenv("PORT", "8000"))
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=port,
-        log_level="info"
-    )