Spaces:

devme
/

aqpa

Paused

App Files Files Community

devme commited on Nov 21, 2025

Commit

7ea1434

verified ·

1 Parent(s): 10c515f

Upload 10 files

Browse files

Files changed (10) hide show

Dockerfile +12 -0
app.py +452 -0
claude_converter.py +386 -0
claude_parser.py +222 -0
claude_stream.py +145 -0
claude_types.py +20 -0
config.py +40 -0
replicate.py +199 -0
requirements.txt +5 -0
utils.py +53 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 8000
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import json
+import os
+import traceback
+import uuid
+import time
+import asyncio
+import importlib.util
+from pathlib import Path
+from typing import Dict, Optional, List, Any, AsyncGenerator, Tuple
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Depends, HTTPException, Header
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from dotenv import load_dotenv
+import httpx
+import hashlib
+from utils import get_proxies, create_proxy_mounts
+# ------------------------------------------------------------------------------
+# Bootstrap
+# ------------------------------------------------------------------------------
+BASE_DIR = Path(__file__).resolve().parent
+load_dotenv(BASE_DIR / ".env")
+app = FastAPI(title="v2 OpenAI-compatible Server (Amazon Q Backend)")
+# CORS for simple testing in browser
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ------------------------------------------------------------------------------
+# Dynamic import of replicate.py to avoid package __init__ needs
+# ------------------------------------------------------------------------------
+def _load_replicate_module():
+    mod_path = BASE_DIR / "replicate.py"
+    spec = importlib.util.spec_from_file_location("v2_replicate", str(mod_path))
+    module = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
+    assert spec is not None and spec.loader is not None
+    spec.loader.exec_module(module)  # type: ignore[attr-defined]
+    return module
+_replicate = _load_replicate_module()
+send_chat_request = _replicate.send_chat_request
+# ------------------------------------------------------------------------------
+# Dynamic import of Claude modules
+# ------------------------------------------------------------------------------
+def _load_claude_modules():
+    # claude_types
+    spec_types = importlib.util.spec_from_file_location("v2_claude_types", str(BASE_DIR / "claude_types.py"))
+    mod_types = importlib.util.module_from_spec(spec_types)
+    spec_types.loader.exec_module(mod_types)
+    # claude_converter
+    spec_conv = importlib.util.spec_from_file_location("v2_claude_converter", str(BASE_DIR / "claude_converter.py"))
+    mod_conv = importlib.util.module_from_spec(spec_conv)
+    import sys
+    sys.modules["v2.claude_types"] = mod_types
+    spec_conv.loader.exec_module(mod_conv)
+    # claude_stream
+    spec_stream = importlib.util.spec_from_file_location("v2_claude_stream", str(BASE_DIR / "claude_stream.py"))
+    mod_stream = importlib.util.module_from_spec(spec_stream)
+    spec_stream.loader.exec_module(mod_stream)
+    return mod_types, mod_conv, mod_stream
+_claude_types, _claude_converter, _claude_stream = _load_claude_modules()
+ClaudeRequest = _claude_types.ClaudeRequest
+convert_claude_to_amazonq_request = _claude_converter.convert_claude_to_amazonq_request
+ClaudeStreamHandler = _claude_stream.ClaudeStreamHandler
+# ------------------------------------------------------------------------------
+# Global HTTP Client
+# ------------------------------------------------------------------------------
+GLOBAL_CLIENT: Optional[httpx.AsyncClient] = None
+async def _init_global_client():
+    global GLOBAL_CLIENT
+    mounts = create_proxy_mounts()
+    # Increased limits for high concurrency with streaming
+    # max_connections: 总连接数上限
+    # max_keepalive_connections: 保持活跃的连接数
+    # keepalive_expiry: 连接保持时间
+    limits = httpx.Limits(
+        max_keepalive_connections=60,
+        max_connections=60,  # 提高到500以支持更高并发
+        keepalive_expiry=30.0  # 30秒后释放空闲连接
+    )
+    # 为流式响应设置更长的超时
+    timeout = httpx.Timeout(
+        connect=30.0,  # 连接超时，TLS 握手需要足够时间
+        read=300.0,    # 读取超时(流式响应需要更长时间)
+        write=30.0,    # 写入超时
+        pool=10.0      # 从连接池获取连接的超时时间
+    )
+    # 只在有代理时才传递 mounts 参数
+    if mounts:
+        GLOBAL_CLIENT = httpx.AsyncClient(mounts=mounts, timeout=timeout, limits=limits)
+    else:
+        GLOBAL_CLIENT = httpx.AsyncClient(timeout=timeout, limits=limits)
+async def _close_global_client():
+    global GLOBAL_CLIENT
+    if GLOBAL_CLIENT:
+        await GLOBAL_CLIENT.aclose()
+        GLOBAL_CLIENT = None
+# ------------------------------------------------------------------------------
+# Token 缓存和管理
+# ------------------------------------------------------------------------------
+# 内存缓存: {hash: {accessToken, refreshToken, clientId, clientSecret, lastRefresh}}
+TOKEN_MAP: Dict[str, Dict[str, Any]] = {}
+def _sha256(text: str) -> str:
+    """计算 SHA256 哈希"""
+    return hashlib.sha256(text.encode()).hexdigest()
+def _parse_bearer_token(bearer_token: str) -> Tuple[str, str, str]:
+    """
+    解析 Bearer token: clientId:clientSecret:refreshToken
+    重要: refreshToken 中可能包含冒号,所以要正确处理
+    """
+    temp_array = bearer_token.split(":")
+    client_id = temp_array[0] if len(temp_array) > 0 else ""
+    client_secret = temp_array[1] if len(temp_array) > 1 else ""
+    refresh_token = ":".join(temp_array[2:]) if len(temp_array) > 2 else ""
+    return client_id, client_secret, refresh_token
+async def _handle_token_refresh(client_id: str, client_secret: str, refresh_token: str) -> Optional[str]:
+    """刷新 access token"""
+    payload = {
+        "grantType": "refresh_token",
+        "clientId": client_id,
+        "clientSecret": client_secret,
+        "refreshToken": refresh_token,
+    }
+    try:
+        client = GLOBAL_CLIENT
+        if not client:
+            async with httpx.AsyncClient(timeout=60.0) as temp_client:
+                r = await temp_client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
+                r.raise_for_status()
+                data = r.json()
+        else:
+            r = await client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
+            r.raise_for_status()
+            data = r.json()
+        return data.get("accessToken")
+    except httpx.HTTPStatusError as e:
+        print(f"Token refresh HTTP error: {e.response.status_code} - {e.response.text}")
+        traceback.print_exc()
+        return None
+    except Exception as e:
+        print(f"Token refresh error: {e}")
+        traceback.print_exc()
+        return None
+# ------------------------------------------------------------------------------
+# 全局 Token 刷新器
+# ------------------------------------------------------------------------------
+async def _global_token_refresher():
+    """全局刷新器: 每 45 分钟刷新所有缓存的 token"""
+    while True:
+        try:
+            await asyncio.sleep(45 * 60)  # 45 minutes
+            if not TOKEN_MAP:
+                continue
+            print(f"[Token Refresher] Starting token refresh cycle...")
+            refresh_count = 0
+            for hash_key, token_data in list(TOKEN_MAP.items()):
+                try:
+                    new_token = await _handle_token_refresh(
+                        token_data["clientId"],
+                        token_data["clientSecret"],
+                        token_data["refreshToken"]
+                    )
+                    if new_token:
+                        TOKEN_MAP[hash_key]["accessToken"] = new_token
+                        TOKEN_MAP[hash_key]["lastRefresh"] = time.time()
+                        refresh_count += 1
+                    else:
+                        print(f"[Token Refresher] Failed to refresh token for hash: {hash_key[:8]}...")
+                except Exception as e:
+                    print(f"[Token Refresher] Exception refreshing token: {e}")
+                    traceback.print_exc()
+            print(f"[Token Refresher] Refreshed {refresh_count}/{len(TOKEN_MAP)} tokens")
+        except Exception:
+            traceback.print_exc()
+            await asyncio.sleep(60)  # 发生异常时等待 1 分钟后重试
+# ------------------------------------------------------------------------------
+# Token refresh (OIDC)
+# ------------------------------------------------------------------------------
+OIDC_BASE = "https://oidc.us-east-1.amazonaws.com"
+TOKEN_URL = f"{OIDC_BASE}/token"
+def _oidc_headers() -> Dict[str, str]:
+    return {
+        "content-type": "application/json",
+        "user-agent": "aws-sdk-rust/1.3.9 os/windows lang/rust/1.87.0",
+        "x-amz-user-agent": "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/windows lang/rust/1.87.0 m/E app/AmazonQ-For-CLI",
+        "amz-sdk-request": "attempt=1; max=3",
+        "amz-sdk-invocation-id": str(uuid.uuid4()),
+    }
+# ------------------------------------------------------------------------------
+# 认证中间件
+# ------------------------------------------------------------------------------
+async def auth_middleware(authorization: Optional[str] = Header(default=None)) -> Dict[str, Any]:
+    """
+    认证中间件: 解析 Bearer token 并返回账户信息
+    Bearer token 格式: clientId:clientSecret:refreshToken
+    """
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
+    bearer_token = authorization[7:]  # 移除 "Bearer " 前缀
+    token_hash = _sha256(bearer_token)
+    # 检查缓存
+    if token_hash in TOKEN_MAP:
+        return {
+            "accessToken": TOKEN_MAP[token_hash]["accessToken"],
+            "clientId": TOKEN_MAP[token_hash]["clientId"],
+            "clientSecret": TOKEN_MAP[token_hash]["clientSecret"],
+            "refreshToken": TOKEN_MAP[token_hash]["refreshToken"],
+        }
+    # 解析 bearer token
+    client_id, client_secret, refresh_token = _parse_bearer_token(bearer_token)
+    if not client_id or not client_secret or not refresh_token:
+        raise HTTPException(status_code=401, detail="Invalid token format. Expected: clientId:clientSecret:refreshToken")
+    # 刷新 token
+    access_token = await _handle_token_refresh(client_id, client_secret, refresh_token)
+    if not access_token:
+        raise HTTPException(status_code=401, detail="Failed to refresh access token")
+    # 缓存
+    TOKEN_MAP[token_hash] = {
+        "accessToken": access_token,
+        "refreshToken": refresh_token,
+        "clientId": client_id,
+        "clientSecret": client_secret,
+        "lastRefresh": time.time()
+    }
+    return {
+        "accessToken": access_token,
+        "clientId": client_id,
+        "clientSecret": client_secret,
+        "refreshToken": refresh_token,
+    }
+# ------------------------------------------------------------------------------
+# Dependencies
+# ------------------------------------------------------------------------------
+async def require_account(authorization: Optional[str] = Header(default=None)) -> Dict[str, Any]:
+    return await auth_middleware(authorization)
+# ------------------------------------------------------------------------------
+# Claude Messages API endpoint
+# ------------------------------------------------------------------------------
+@app.post("/v1/messages")
+async def claude_messages(req: ClaudeRequest, account: Dict[str, Any] = Depends(require_account)):
+    """
+    Claude-compatible messages endpoint.
+    """
+    # 1. Convert request
+    try:
+        aq_request = convert_claude_to_amazonq_request(req)
+    except Exception as e:
+        traceback.print_exc()
+        raise HTTPException(status_code=400, detail=f"Request conversion failed: {str(e)}")
+    # 2. Send upstream - always stream from upstream to get full event details
+    try:
+        access = account.get("accessToken")
+        if not access:
+            raise HTTPException(status_code=502, detail="Access token unavailable")
+        # We call with stream=True to get the event iterator
+        _, _, tracker, event_iter = await send_chat_request(
+            access_token=access,
+            messages=[],
+            model=req.model,
+            stream=True,
+            client=GLOBAL_CLIENT,
+            raw_payload=aq_request
+        )
+        if not event_iter:
+             raise HTTPException(status_code=502, detail="No event stream returned")
+        # Handler
+        # Estimate input tokens (simple count or 0)
+        # For now 0 or simple len
+        input_tokens = 0
+        handler = ClaudeStreamHandler(model=req.model, input_tokens=input_tokens)
+        async def event_generator():
+            try:
+                async for event_type, payload in event_iter:
+                    async for sse in handler.handle_event(event_type, payload):
+                        yield sse
+                async for sse in handler.finish():
+                    yield sse
+            except GeneratorExit:
+                # Client disconnected
+                raise
+            except Exception:
+                raise
+        if req.stream:
+            return StreamingResponse(event_generator(), media_type="text/event-stream")
+        else:
+            # Accumulate for non-streaming
+            # This is a bit complex because we need to reconstruct the full response object
+            # For now, let's just support streaming as it's the main use case for Claude Code
+            # But to be nice, let's try to support non-streaming by consuming the generator
+            content_blocks = []
+            usage = {"input_tokens": 0, "output_tokens": 0}
+            stop_reason = None
+            # We need to parse the SSE strings back to objects... inefficient but works
+            # Or we could refactor handler to yield objects.
+            # For now, let's just raise error for non-streaming or implement basic text
+            # Claude Code uses streaming.
+            # Let's implement a basic accumulator from the SSE stream
+            final_content = []
+            async for sse_line in event_generator():
+                if sse_line.startswith("data: "):
+                    data_str = sse_line[6:].strip()
+                    if data_str == "[DONE]": continue
+                    try:
+                        data = json.loads(data_str)
+                        dtype = data.get("type")
+                        if dtype == "content_block_start":
+                            idx = data.get("index", 0)
+                            while len(final_content) <= idx:
+                                final_content.append(None)
+                            final_content[idx] = data.get("content_block")
+                        elif dtype == "content_block_delta":
+                            idx = data.get("index", 0)
+                            delta = data.get("delta", {})
+                            if final_content[idx]:
+                                if delta.get("type") == "text_delta":
+                                    final_content[idx]["text"] += delta.get("text", "")
+                                elif delta.get("type") == "input_json_delta":
+                                    # We need to accumulate partial json
+                                    # But wait, content_block for tool_use has 'input' as dict?
+                                    # No, in start it is empty.
+                                    # We need to track partial json string
+                                    if "partial_json" not in final_content[idx]:
+                                        final_content[idx]["partial_json"] = ""
+                                    final_content[idx]["partial_json"] += delta.get("partial_json", "")
+                        elif dtype == "content_block_stop":
+                            idx = data.get("index", 0)
+                            # If tool use, parse json
+                            if final_content[idx] and final_content[idx]["type"] == "tool_use":
+                                if "partial_json" in final_content[idx]:
+                                    try:
+                                        final_content[idx]["input"] = json.loads(final_content[idx]["partial_json"])
+                                    except:
+                                        pass
+                                    del final_content[idx]["partial_json"]
+                        elif dtype == "message_delta":
+                            usage = data.get("usage", usage)
+                            stop_reason = data.get("delta", {}).get("stop_reason")
+                    except:
+                        pass
+            return {
+                "id": f"msg_{uuid.uuid4()}",
+                "type": "message",
+                "role": "assistant",
+                "model": req.model,
+                "content": [c for c in final_content if c is not None],
+                "stop_reason": stop_reason,
+                "stop_sequence": None,
+                "usage": usage
+            }
+    except Exception as e:
+        raise
+# ------------------------------------------------------------------------------
+# Startup / Shutdown Events
+# ------------------------------------------------------------------------------
+async def _startup():
+    """初始化全局客户端和启动后台任务"""
+    await _init_global_client()
+    asyncio.create_task(_global_token_refresher())
+async def _shutdown():
+    """清理资源"""
+    await _close_global_client()
+# 更新 lifespan 上下文管理器使用实际的启动/关闭逻辑
+@asynccontextmanager
+async def lifespan(app_instance: FastAPI):
+    """
+    管理应用生命周期事件
+    启动时初始化数据库和后台任务,关闭时清理资源
+    """
+    await _startup()
+    yield
+    await _shutdown()
+# 将 lifespan 设置到 app
+app.router.lifespan_context = lifespan
+# ------------------------------------------------------------------------------
+# 直接运行支持
+# ------------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.getenv("PORT", "8000"))
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        log_level="info"
+    )

claude_converter.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import json
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Union
+try:
+    from .claude_types import ClaudeRequest, ClaudeMessage, ClaudeTool
+except ImportError:
+    # Fallback for dynamic loading where relative import might fail
+    # We assume claude_types is available in sys.modules or we can import it directly if in same dir
+    import sys
+    if "v2.claude_types" in sys.modules:
+        from v2.claude_types import ClaudeRequest, ClaudeMessage, ClaudeTool
+    else:
+        # Try absolute import assuming v2 is in path or current dir
+        try:
+            from claude_types import ClaudeRequest, ClaudeMessage, ClaudeTool
+        except ImportError:
+             # Last resort: if loaded via importlib in app.py, we might need to rely on app.py injecting it
+             # But app.py loads this module.
+             pass
+def get_current_timestamp() -> str:
+    """Get current timestamp in Amazon Q format."""
+    now = datetime.now().astimezone()
+    weekday = now.strftime("%A")
+    iso_time = now.isoformat(timespec='milliseconds')
+    return f"{weekday}, {iso_time}"
+def map_model_name(claude_model: str) -> str:
+    """Map Claude model name to Amazon Q model ID."""
+    model_lower = claude_model.lower()
+    if model_lower.startswith("claude-sonnet-4.5") or model_lower.startswith("claude-sonnet-4-5"):
+        return "claude-sonnet-4.5"
+    return "claude-sonnet-4"
+def extract_text_from_content(content: Union[str, List[Dict[str, Any]]]) -> str:
+    """Extract text from Claude content."""
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, list):
+        parts = []
+        for block in content:
+            if isinstance(block, dict):
+                if block.get("type") == "text":
+                    parts.append(block.get("text", ""))
+        return "\n".join(parts)
+    return ""
+def process_tool_result_block(block: Dict[str, Any], tool_results: List[Dict[str, Any]]) -> None:
+    """
+    处理单个 tool_result 块，提取内容并添加到 tool_results 列表
+    Args:
+        block: tool_result 类型的内容块
+        tool_results: 用于存储处理结果的列表
+    """
+    tool_use_id = block.get("tool_use_id")
+    raw_c = block.get("content", [])
+    aq_content = []
+    if isinstance(raw_c, str):
+        aq_content = [{"text": raw_c}]
+    elif isinstance(raw_c, list):
+        for item in raw_c:
+            if isinstance(item, dict):
+                if item.get("type") == "text":
+                    aq_content.append({"text": item.get("text", "")})
+                elif "text" in item:
+                    aq_content.append({"text": item["text"]})
+            elif isinstance(item, str):
+                aq_content.append({"text": item})
+    if not any(i.get("text", "").strip() for i in aq_content):
+        aq_content = [{"text": "Tool use was cancelled by the user"}]
+    # Merge if exists
+    existing = next((r for r in tool_results if r["toolUseId"] == tool_use_id), None)
+    if existing:
+        existing["content"].extend(aq_content)
+    else:
+        tool_results.append({
+            "toolUseId": tool_use_id,
+            "content": aq_content,
+            "status": block.get("status", "success")
+        })
+def extract_images_from_content(content: Union[str, List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
+    """Extract images from Claude content and convert to Amazon Q format."""
+    if not isinstance(content, list):
+        return None
+    images = []
+    for block in content:
+        if isinstance(block, dict) and block.get("type") == "image":
+            source = block.get("source", {})
+            if source.get("type") == "base64":
+                media_type = source.get("media_type", "image/png")
+                fmt = media_type.split("/")[-1] if "/" in media_type else "png"
+                images.append({
+                    "format": fmt,
+                    "source": {
+                        "bytes": source.get("data", "")
+                    }
+                })
+    return images if images else None
+def convert_tool(tool: ClaudeTool) -> Dict[str, Any]:
+    """Convert Claude tool to Amazon Q tool."""
+    desc = tool.description or ""
+    if len(desc) > 10240:
+        desc = desc[:10100] + "\n\n...(Full description provided in TOOL DOCUMENTATION section)"
+    return {
+        "toolSpecification": {
+            "name": tool.name,
+            "description": desc,
+            "inputSchema": {"json": tool.input_schema}
+        }
+    }
+def merge_user_messages(messages: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """Merge consecutive user messages, keeping only the last 2 messages' images."""
+    if not messages:
+        return {}
+    all_contents = []
+    base_context = None
+    base_origin = None
+    base_model = None
+    all_images = []
+    for msg in messages:
+        content = msg.get("content", "")
+        if base_context is None:
+            base_context = msg.get("userInputMessageContext", {})
+        if base_origin is None:
+            base_origin = msg.get("origin", "CLI")
+        if base_model is None:
+            base_model = msg.get("modelId")
+        if content:
+            all_contents.append(content)
+        # Collect images from each message
+        msg_images = msg.get("images")
+        if msg_images:
+            all_images.append(msg_images)
+    result = {
+        "content": "\n\n".join(all_contents),
+        "userInputMessageContext": base_context or {},
+        "origin": base_origin or "CLI",
+        "modelId": base_model
+    }
+    # Only keep images from the last 2 messages that have images
+    if all_images:
+        kept_images = []
+        for img_list in all_images[-2:]:  # Take last 2 messages' images
+            kept_images.extend(img_list)
+        if kept_images:
+            result["images"] = kept_images
+    return result
+def process_history(messages: List[ClaudeMessage]) -> List[Dict[str, Any]]:
+    """Process history messages to match Amazon Q format (alternating user/assistant)."""
+    history = []
+    seen_tool_use_ids = set()
+    raw_history = []
+    # First pass: convert individual messages
+    for msg in messages:
+        if msg.role == "user":
+            content = msg.content
+            text_content = ""
+            tool_results = None
+            images = extract_images_from_content(content)
+            if isinstance(content, list):
+                text_parts = []
+                for block in content:
+                    if isinstance(block, dict):
+                        btype = block.get("type")
+                        if btype == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif btype == "tool_result":
+                            if tool_results is None:
+                                tool_results = []
+                            process_tool_result_block(block, tool_results)
+                text_content = "\n".join(text_parts)
+            else:
+                text_content = extract_text_from_content(content)
+            user_ctx = {
+                "envState": {
+                    "operatingSystem": "macos",
+                    "currentWorkingDirectory": "/"
+                }
+            }
+            if tool_results:
+                user_ctx["toolResults"] = tool_results
+            u_msg = {
+                "content": text_content,
+                "userInputMessageContext": user_ctx,
+                "origin": "CLI"
+            }
+            if images:
+                u_msg["images"] = images
+            raw_history.append({"userInputMessage": u_msg})
+        elif msg.role == "assistant":
+            content = msg.content
+            text_content = extract_text_from_content(content)
+            entry = {
+                "assistantResponseMessage": {
+                    "messageId": str(uuid.uuid4()),
+                    "content": text_content
+                }
+            }
+            if isinstance(content, list):
+                tool_uses = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "tool_use":
+                        tid = block.get("id")
+                        if tid and tid not in seen_tool_use_ids:
+                            seen_tool_use_ids.add(tid)
+                            tool_uses.append({
+                                "toolUseId": tid,
+                                "name": block.get("name"),
+                                "input": block.get("input", {})
+                            })
+                if tool_uses:
+                    entry["assistantResponseMessage"]["toolUses"] = tool_uses
+            raw_history.append(entry)
+    # Second pass: merge consecutive user messages
+    pending_user_msgs = []
+    for item in raw_history:
+        if "userInputMessage" in item:
+            pending_user_msgs.append(item["userInputMessage"])
+        elif "assistantResponseMessage" in item:
+            if pending_user_msgs:
+                merged = merge_user_messages(pending_user_msgs)
+                history.append({"userInputMessage": merged})
+                pending_user_msgs = []
+            history.append(item)
+    if pending_user_msgs:
+        merged = merge_user_messages(pending_user_msgs)
+        history.append({"userInputMessage": merged})
+    return history
+def convert_claude_to_amazonq_request(req: ClaudeRequest, conversation_id: Optional[str] = None) -> Dict[str, Any]:
+    """Convert ClaudeRequest to Amazon Q request body."""
+    if conversation_id is None:
+        conversation_id = str(uuid.uuid4())
+    # 1. Tools
+    aq_tools = []
+    long_desc_tools = []
+    if req.tools:
+        for t in req.tools:
+            if t.description and len(t.description) > 10240:
+                long_desc_tools.append({"name": t.name, "full_description": t.description})
+            aq_tools.append(convert_tool(t))
+    # 2. Current Message (last user message)
+    last_msg = req.messages[-1] if req.messages else None
+    prompt_content = ""
+    tool_results = None
+    has_tool_result = False
+    images = None
+    if last_msg and last_msg.role == "user":
+        content = last_msg.content
+        images = extract_images_from_content(content)
+        if isinstance(content, list):
+            text_parts = []
+            for block in content:
+                if isinstance(block, dict):
+                    btype = block.get("type")
+                    if btype == "text":
+                        text_parts.append(block.get("text", ""))
+                    elif btype == "tool_result":
+                        has_tool_result = True
+                        if tool_results is None:
+                            tool_results = []
+                        process_tool_result_block(block, tool_results)
+            prompt_content = "\n".join(text_parts)
+        else:
+            prompt_content = extract_text_from_content(content)
+    # 3. Context
+    user_ctx = {
+        "envState": {
+            "operatingSystem": "macos",
+            "currentWorkingDirectory": "/"
+        }
+    }
+    if aq_tools:
+        user_ctx["tools"] = aq_tools
+    if tool_results:
+        user_ctx["toolResults"] = tool_results
+    # 4. Format Content
+    formatted_content = ""
+    if has_tool_result and not prompt_content:
+        formatted_content = ""
+    else:
+        formatted_content = (
+            "--- CONTEXT ENTRY BEGIN ---\n"
+            f"Current time: {get_current_timestamp()}\n"
+            "--- CONTEXT ENTRY END ---\n\n"
+            "--- USER MESSAGE BEGIN ---\n"
+            f"{prompt_content}\n"
+            "--- USER MESSAGE END ---"
+        )
+    if long_desc_tools:
+        docs = []
+        for info in long_desc_tools:
+            docs.append(f"Tool: {info['name']}\nFull Description:\n{info['full_description']}\n")
+        formatted_content = (
+            "--- TOOL DOCUMENTATION BEGIN ---\n"
+            f"{''.join(docs)}"
+            "--- TOOL DOCUMENTATION END ---\n\n"
+            f"{formatted_content}"
+        )
+    if req.system and formatted_content:
+        sys_text = ""
+        if isinstance(req.system, str):
+            sys_text = req.system
+        elif isinstance(req.system, list):
+            parts = []
+            for b in req.system:
+                if isinstance(b, dict) and b.get("type") == "text":
+                    parts.append(b.get("text", ""))
+            sys_text = "\n".join(parts)
+        if sys_text:
+            formatted_content = (
+                "--- SYSTEM PROMPT BEGIN ---\n"
+                f"{sys_text}\n"
+                "--- SYSTEM PROMPT END ---\n\n"
+                f"{formatted_content}"
+            )
+    # 5. Model
+    model_id = map_model_name(req.model)
+    # 6. User Input Message
+    user_input_msg = {
+        "content": formatted_content,
+        "userInputMessageContext": user_ctx,
+        "origin": "CLI",
+        "modelId": model_id
+    }
+    if images:
+        user_input_msg["images"] = images
+    # 7. History
+    history_msgs = req.messages[:-1] if len(req.messages) > 1 else []
+    aq_history = process_history(history_msgs)
+    # 8. Final Body
+    return {
+        "conversationState": {
+            "conversationId": conversation_id,
+            "history": aq_history,
+            "currentMessage": {
+                "userInputMessage": user_input_msg
+            },
+            "chatTriggerType": "MANUAL"
+        }
+    }

claude_parser.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import json
+import struct
+import logging
+from typing import Optional, Dict, Any, AsyncIterator
+logger = logging.getLogger(__name__)
+class EventStreamParser:
+    """AWS Event Stream binary format parser (v2 style)."""
+    @staticmethod
+    def parse_headers(headers_data: bytes) -> Dict[str, str]:
+        """Parse event stream headers."""
+        headers = {}
+        offset = 0
+        while offset < len(headers_data):
+            if offset >= len(headers_data):
+                break
+            name_length = headers_data[offset]
+            offset += 1
+            if offset + name_length > len(headers_data):
+                break
+            name = headers_data[offset:offset + name_length].decode('utf-8')
+            offset += name_length
+            if offset >= len(headers_data):
+                break
+            value_type = headers_data[offset]
+            offset += 1
+            if offset + 2 > len(headers_data):
+                break
+            value_length = struct.unpack('>H', headers_data[offset:offset + 2])[0]
+            offset += 2
+            if offset + value_length > len(headers_data):
+                break
+            if value_type == 7:
+                value = headers_data[offset:offset + value_length].decode('utf-8')
+            else:
+                value = headers_data[offset:offset + value_length]
+            offset += value_length
+            headers[name] = value
+        return headers
+    @staticmethod
+    def parse_message(data: bytes) -> Optional[Dict[str, Any]]:
+        """Parse single Event Stream message."""
+        try:
+            if len(data) < 16:
+                return None
+            total_length = struct.unpack('>I', data[0:4])[0]
+            headers_length = struct.unpack('>I', data[4:8])[0]
+            if len(data) < total_length:
+                logger.warning(f"Incomplete message: expected {total_length} bytes, got {len(data)}")
+                return None
+            headers_data = data[12:12 + headers_length]
+            headers = EventStreamParser.parse_headers(headers_data)
+            payload_start = 12 + headers_length
+            payload_end = total_length - 4
+            payload_data = data[payload_start:payload_end]
+            payload = None
+            if payload_data:
+                try:
+                    payload = json.loads(payload_data.decode('utf-8'))
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    payload = payload_data
+            return {
+                'headers': headers,
+                'payload': payload,
+                'total_length': total_length
+            }
+        except Exception as e:
+            logger.error(f"Failed to parse message: {e}", exc_info=True)
+            return None
+    @staticmethod
+    async def parse_stream(byte_stream: AsyncIterator[bytes]) -> AsyncIterator[Dict[str, Any]]:
+        """Parse byte stream and extract events."""
+        buffer = bytearray()
+        async for chunk in byte_stream:
+            buffer.extend(chunk)
+            while len(buffer) >= 12:
+                try:
+                    total_length = struct.unpack('>I', buffer[0:4])[0]
+                except struct.error:
+                    break
+                if len(buffer) < total_length:
+                    break
+                message_data = bytes(buffer[:total_length])
+                buffer = buffer[total_length:]
+                message = EventStreamParser.parse_message(message_data)
+                if message:
+                    yield message
+def extract_event_info(message: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Extract event information from parsed message."""
+    headers = message.get('headers', {})
+    payload = message.get('payload')
+    event_type = headers.get(':event-type') or headers.get('event-type')
+    content_type = headers.get(':content-type') or headers.get('content-type')
+    message_type = headers.get(':message-type') or headers.get('message-type')
+    return {
+        'event_type': event_type,
+        'content_type': content_type,
+        'message_type': message_type,
+        'payload': payload
+    }
+def _sse_format(event_type: str, data: Dict[str, Any]) -> str:
+    """Format SSE event."""
+    json_data = json.dumps(data, ensure_ascii=False)
+    return f"event: {event_type}\ndata: {json_data}\n\n"
+def build_message_start(conversation_id: str, model: str = "claude-sonnet-4.5", input_tokens: int = 0) -> str:
+    """Build message_start SSE event."""
+    data = {
+        "type": "message_start",
+        "message": {
+            "id": conversation_id,
+            "type": "message",
+            "role": "assistant",
+            "content": [],
+            "model": model,
+            "stop_reason": None,
+            "stop_sequence": None,
+            "usage": {"input_tokens": input_tokens, "output_tokens": 0}
+        }
+    }
+    return _sse_format("message_start", data)
+def build_content_block_start(index: int, block_type: str = "text") -> str:
+    """Build content_block_start SSE event."""
+    data = {
+        "type": "content_block_start",
+        "index": index,
+        "content_block": {"type": block_type, "text": ""} if block_type == "text" else {"type": block_type}
+    }
+    return _sse_format("content_block_start", data)
+def build_content_block_delta(index: int, text: str) -> str:
+    """Build content_block_delta SSE event (text)."""
+    data = {
+        "type": "content_block_delta",
+        "index": index,
+        "delta": {"type": "text_delta", "text": text}
+    }
+    return _sse_format("content_block_delta", data)
+def build_content_block_stop(index: int) -> str:
+    """Build content_block_stop SSE event."""
+    data = {
+        "type": "content_block_stop",
+        "index": index
+    }
+    return _sse_format("content_block_stop", data)
+def build_ping() -> str:
+    """Build ping SSE event."""
+    data = {"type": "ping"}
+    return _sse_format("ping", data)
+def build_message_stop(input_tokens: int, output_tokens: int, stop_reason: Optional[str] = None) -> str:
+    """Build message_delta and message_stop SSE events."""
+    delta_data = {
+        "type": "message_delta",
+        "delta": {"stop_reason": stop_reason or "end_turn", "stop_sequence": None},
+        "usage": {"output_tokens": output_tokens}
+    }
+    delta_event = _sse_format("message_delta", delta_data)
+    stop_data = {
+        "type": "message_stop"
+    }
+    stop_event = _sse_format("message_stop", stop_data)
+    return delta_event + stop_event
+def build_tool_use_start(index: int, tool_use_id: str, tool_name: str) -> str:
+    """Build tool_use content_block_start SSE event."""
+    data = {
+        "type": "content_block_start",
+        "index": index,
+        "content_block": {
+            "type": "tool_use",
+            "id": tool_use_id,
+            "name": tool_name,
+            "input": {}
+        }
+    }
+    return _sse_format("content_block_start", data)
+def build_tool_use_input_delta(index: int, input_json_delta: str) -> str:
+    """Build tool_use input_json_delta SSE event."""
+    data = {
+        "type": "content_block_delta",
+        "index": index,
+        "delta": {
+            "type": "input_json_delta",
+            "partial_json": input_json_delta
+        }
+    }
+    return _sse_format("content_block_delta", data)

claude_stream.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import json
+import logging
+from pathlib import Path
+from typing import AsyncGenerator, Optional, Dict, Any, List, Set
+from utils import load_module
+logger = logging.getLogger(__name__)
+_parser = load_module("v2_claude_parser", "claude_parser.py")
+build_message_start = _parser.build_message_start
+build_content_block_start = _parser.build_content_block_start
+build_content_block_delta = _parser.build_content_block_delta
+build_content_block_stop = _parser.build_content_block_stop
+build_ping = _parser.build_ping
+build_message_stop = _parser.build_message_stop
+build_tool_use_start = _parser.build_tool_use_start
+build_tool_use_input_delta = _parser.build_tool_use_input_delta
+class ClaudeStreamHandler:
+    def __init__(self, model: str, input_tokens: int = 0):
+        self.model = model
+        self.input_tokens = input_tokens
+        self.response_buffer: List[str] = []
+        self.content_block_index: int = -1
+        self.content_block_started: bool = False
+        self.content_block_start_sent: bool = False
+        self.content_block_stop_sent: bool = False
+        self.message_start_sent: bool = False
+        self.conversation_id: Optional[str] = None
+        # Tool use state
+        self.current_tool_use: Optional[Dict[str, Any]] = None
+        self.tool_input_buffer: List[str] = []
+        self.tool_use_id: Optional[str] = None
+        self.tool_name: Optional[str] = None
+        self._processed_tool_use_ids: Set[str] = set()
+        self.all_tool_inputs: List[str] = []
+    async def handle_event(self, event_type: str, payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
+        """Process a single Amazon Q event and yield Claude SSE events."""
+        # 1. Message Start (initial-response)
+        if event_type == "initial-response":
+            if not self.message_start_sent:
+                conv_id = payload.get('conversationId', self.conversation_id or 'unknown')
+                self.conversation_id = conv_id
+                yield build_message_start(conv_id, self.model, self.input_tokens)
+                self.message_start_sent = True
+                yield build_ping()
+        # 2. Content Block Delta (assistantResponseEvent)
+        elif event_type == "assistantResponseEvent":
+            content = payload.get("content", "")
+            # Close any open tool use block
+            if self.current_tool_use and not self.content_block_stop_sent:
+                yield build_content_block_stop(self.content_block_index)
+                self.content_block_stop_sent = True
+                self.current_tool_use = None
+            # Start content block if needed
+            if not self.content_block_start_sent:
+                self.content_block_index += 1
+                yield build_content_block_start(self.content_block_index, "text")
+                self.content_block_start_sent = True
+                self.content_block_started = True
+            # Send delta
+            if content:
+                self.response_buffer.append(content)
+                yield build_content_block_delta(self.content_block_index, content)
+        # 3. Tool Use (toolUseEvent)
+        elif event_type == "toolUseEvent":
+            tool_use_id = payload.get("toolUseId")
+            tool_name = payload.get("name")
+            tool_input = payload.get("input", {})
+            is_stop = payload.get("stop", False)
+            # Start new tool use
+            if tool_use_id and tool_name and not self.current_tool_use:
+                # Close previous text block if open
+                if self.content_block_start_sent and not self.content_block_stop_sent:
+                    yield build_content_block_stop(self.content_block_index)
+                    self.content_block_stop_sent = True
+                self._processed_tool_use_ids.add(tool_use_id)
+                self.content_block_index += 1
+                yield build_tool_use_start(self.content_block_index, tool_use_id, tool_name)
+                self.content_block_started = True
+                self.current_tool_use = {"toolUseId": tool_use_id, "name": tool_name}
+                self.tool_use_id = tool_use_id
+                self.tool_name = tool_name
+                self.tool_input_buffer = []
+                self.content_block_stop_sent = False
+                self.content_block_start_sent = True
+            # Accumulate input
+            if self.current_tool_use and tool_input:
+                fragment = ""
+                if isinstance(tool_input, str):
+                    fragment = tool_input
+                else:
+                    fragment = json.dumps(tool_input, ensure_ascii=False)
+                self.tool_input_buffer.append(fragment)
+                yield build_tool_use_input_delta(self.content_block_index, fragment)
+            # Stop tool use
+            if is_stop and self.current_tool_use:
+                full_input = "".join(self.tool_input_buffer)
+                self.all_tool_inputs.append(full_input)
+                yield build_content_block_stop(self.content_block_index)
+                self.content_block_stop_sent = True
+                self.content_block_started = False
+                self.current_tool_use = None
+                self.tool_use_id = None
+                self.tool_name = None
+                self.tool_input_buffer = []
+        # 4. Assistant Response End (assistantResponseEnd)
+        elif event_type == "assistantResponseEnd":
+            # Close any open block
+            if self.content_block_started and not self.content_block_stop_sent:
+                yield build_content_block_stop(self.content_block_index)
+                self.content_block_stop_sent = True
+    async def finish(self) -> AsyncGenerator[str, None]:
+        """Send final events."""
+        # Ensure last block is closed
+        if self.content_block_started and not self.content_block_stop_sent:
+            yield build_content_block_stop(self.content_block_index)
+            self.content_block_stop_sent = True
+        # Calculate output tokens (approximate)
+        full_text = "".join(self.response_buffer)
+        full_tool_input = "".join(self.all_tool_inputs)
+        # Simple approximation: 4 chars per token
+        output_tokens = max(1, (len(full_text) + len(full_tool_input)) // 4)
+        yield build_message_stop(self.input_tokens, output_tokens, "end_turn")

claude_types.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from typing import List, Optional, Union, Dict, Any, Literal
+from pydantic import BaseModel
+class ClaudeMessage(BaseModel):
+    role: str
+    content: Union[str, List[Dict[str, Any]]]
+class ClaudeTool(BaseModel):
+    name: str
+    description: Optional[str] = ""
+    input_schema: Dict[str, Any]
+class ClaudeRequest(BaseModel):
+    model: str
+    messages: List[ClaudeMessage]
+    max_tokens: int = 8192
+    temperature: Optional[float] = None
+    tools: Optional[List[ClaudeTool]] = None
+    stream: bool = False
+    system: Optional[Union[str, List[Dict[str, Any]]]] = None

config.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+Amazon Q API 配置文件
+包含请求模板和默认配置
+"""
+# Amazon Q API 端点
+AMAZONQ_API_URL = "https://q.us-east-1.amazonaws.com/"
+# 默认请求头模板
+DEFAULT_HEADERS = {
+    "content-type": "application/x-amz-json-1.0",
+    "x-amz-target": "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
+    "user-agent": "aws-sdk-rust/1.3.9 ua/2.1 api/codewhispererstreaming/0.1.11582 os/windows lang/rust/1.87.0 md/appVersion-1.19.4 app/AmazonQ-For-CLI",
+    "x-amz-user-agent": "aws-sdk-rust/1.3.9 ua/2.1 api/codewhispererstreaming/0.1.11582 os/windows lang/rust/1.87.0 m/F app/AmazonQ-For-CLI",
+    "x-amzn-codewhisperer-optout": "false",
+    "amz-sdk-request": "attempt=1; max=3"
+}
+# 默认请求体模板（仅作为结构参考，实际使用时会被 raw_payload 替换）
+DEFAULT_BODY_TEMPLATE = {
+    "conversationState": {
+        "conversationId": "",  # 运行时动态生成
+        "history": [],
+        "currentMessage": {
+            "userInputMessage": {
+                "content": "",
+                "userInputMessageContext": {
+                    "envState": {
+                        "operatingSystem": "windows",
+                        "currentWorkingDirectory": ""
+                    },
+                    "tools": []
+                },
+                "origin": "CLI",
+                "modelId": "claude-sonnet-4"
+            }
+        },
+        "chatTriggerType": "MANUAL"
+    }
+}

replicate.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import json
+import uuid
+import asyncio
+from typing import Dict, Optional, Tuple, List, AsyncGenerator, Any
+import httpx
+from utils import get_proxies, load_module, create_proxy_mounts
+from config import AMAZONQ_API_URL, DEFAULT_HEADERS
+try:
+    _parser = load_module("v2_claude_parser", "claude_parser.py")
+    EventStreamParser = _parser.EventStreamParser
+    extract_event_info = _parser.extract_event_info
+except Exception as e:
+    print(f"Warning: Failed to load claude_parser: {e}")
+    EventStreamParser = None
+    extract_event_info = None
+class StreamTracker:
+    def __init__(self):
+        self.has_content = False
+    async def track(self, gen: AsyncGenerator[str, None]) -> AsyncGenerator[str, None]:
+        async for item in gen:
+            if item:
+                self.has_content = True
+            yield item
+def load_template() -> Tuple[str, Dict[str, str]]:
+    """
+    加载 Amazon Q API 请求模板
+    Returns:
+        (url, headers): API 端点 URL 和默认请求头
+    """
+    return AMAZONQ_API_URL, DEFAULT_HEADERS.copy()
+def _merge_headers(as_log: Dict[str, str], bearer_token: str) -> Dict[str, str]:
+    headers = dict(as_log)
+    for k in list(headers.keys()):
+        kl = k.lower()
+        if kl in ("content-length","host","connection","transfer-encoding"):
+            headers.pop(k, None)
+    def set_header(name: str, value: str):
+        for key in list(headers.keys()):
+            if key.lower() == name.lower():
+                del headers[key]
+        headers[name] = value
+    set_header("Authorization", f"Bearer {bearer_token}")
+    set_header("amz-sdk-invocation-id", str(uuid.uuid4()))
+    return headers
+async def send_chat_request(
+    access_token: str,
+    messages: List[Dict[str, Any]],
+    model: Optional[str] = None,
+    stream: bool = False,
+    timeout: Tuple[int,int] = (30,300),
+    client: Optional[httpx.AsyncClient] = None,
+    raw_payload: Dict[str, Any] = None
+) -> Tuple[Optional[str], Optional[AsyncGenerator[str, None]], StreamTracker, Optional[AsyncGenerator[Any, None]]]:
+    """
+    发送聊天请求到 Amazon Q API
+    Args:
+        access_token: Amazon Q access token
+        messages: 消息列表(已废弃,使用 raw_payload)
+        model: 模型名称(已废弃,使用 raw_payload)
+        stream: 是否流式响应
+        timeout: 超时配置
+        client: HTTP 客户端
+        raw_payload: Claude API 转换后的请求体(必需)
+    """
+    if raw_payload is None:
+        raise ValueError("raw_payload is required")
+    url, headers_from_log = load_template()
+    headers_from_log["amz-sdk-invocation-id"] = str(uuid.uuid4())
+    # Use raw payload (for Claude API)
+    body_json = raw_payload
+    # Ensure conversationId is set if missing
+    if "conversationState" in body_json and "conversationId" not in body_json["conversationState"]:
+         body_json["conversationState"]["conversationId"] = str(uuid.uuid4())
+    payload_str = json.dumps(body_json, ensure_ascii=False)
+    headers = _merge_headers(headers_from_log, access_token)
+    local_client = False
+    if client is None:
+        local_client = True
+        mounts = create_proxy_mounts()
+        # 增加连接超时时间，避免 TLS 握手超时
+        timeout_config = httpx.Timeout(connect=60.0, read=timeout[1], write=timeout[0], pool=10.0)
+        # 只在有代理时才传递 mounts 参数
+        if mounts:
+            client = httpx.AsyncClient(mounts=mounts, timeout=timeout_config)
+        else:
+            client = httpx.AsyncClient(timeout=timeout_config)
+    # Use manual request sending to control stream lifetime
+    req = client.build_request("POST", url, headers=headers, content=payload_str)
+    resp = None
+    try:
+        resp = await client.send(req, stream=True)
+        if resp.status_code >= 400:
+            try:
+                await resp.read()
+                err = resp.text
+            except Exception:
+                err = f"HTTP {resp.status_code}"
+            await resp.aclose()
+            if local_client:
+                await client.aclose()
+            raise httpx.HTTPError(f"Upstream error {resp.status_code}: {err}")
+        tracker = StreamTracker()
+        # Track if the response has been consumed to avoid double-close
+        response_consumed = False
+        async def _iter_events() -> AsyncGenerator[Any, None]:
+            nonlocal response_consumed
+            try:
+                # Use EventStreamParser from claude_parser.py
+                async def byte_gen():
+                    async for chunk in resp.aiter_bytes():
+                        if chunk:
+                            yield chunk
+                async for message in EventStreamParser.parse_stream(byte_gen()):
+                    event_info = extract_event_info(message)
+                    if event_info:
+                        event_type = event_info.get('event_type')
+                        payload = event_info.get('payload')
+                        if event_type and payload:
+                            yield (event_type, payload)
+            except Exception:
+                if not tracker.has_content:
+                    raise
+            finally:
+                response_consumed = True
+                await resp.aclose()
+                if local_client:
+                    await client.aclose()
+        if stream:
+            # Wrap generator to ensure cleanup on early termination
+            async def _safe_iter_events():
+                try:
+                    # 托底方案: 300秒强制超时
+                    async with asyncio.timeout(300):
+                        async for item in _iter_events():
+                            yield item
+                except asyncio.TimeoutError:
+                    # 超时强制关闭
+                    if resp and not resp.is_closed:
+                        await resp.aclose()
+                    if local_client and client:
+                        await client.aclose()
+                    raise
+                except GeneratorExit:
+                    # Generator was closed without being fully consumed
+                    # Ensure cleanup happens even if finally block wasn't reached
+                    if resp and not resp.is_closed:
+                        await resp.aclose()
+                    if local_client and client:
+                        await client.aclose()
+                    raise
+                except Exception:
+                    # Any exception should also trigger cleanup
+                    if resp and not resp.is_closed:
+                        await resp.aclose()
+                    if local_client and client:
+                        await client.aclose()
+                    raise
+            return None, None, tracker, _safe_iter_events()
+        else:
+            # Non-streaming: consume all events
+            try:
+                async for _ in _iter_events():
+                    pass
+            finally:
+                # Ensure response is closed even if iteration is incomplete
+                if not response_consumed and resp:
+                    await resp.aclose()
+                    if local_client:
+                        await client.aclose()
+            return None, None, tracker, None
+    except Exception:
+        # Critical: close response on any exception before generators are created
+        if resp and not resp.is_closed:
+            await resp.aclose()
+        if local_client and client:
+            await client.aclose()
+        raise

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.115.5
+uvicorn[standard]==0.32.0
+pydantic==2.9.2
+python-dotenv==1.0.1
+httpx==0.28.1

utils.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""公共工具函数"""
+import os
+import importlib.util
+import httpx
+from pathlib import Path
+from typing import Dict, Optional
+def get_proxies() -> Optional[Dict[str, str]]:
+    """
+    从环境变量获取代理配置
+    读取 HTTP_PROXY 环境变量并返回代理字典
+    """
+    proxy = os.getenv("HTTP_PROXY", "").strip()
+    if proxy:
+        return {"http": proxy, "https": proxy}
+    return None
+def load_module(module_name: str, file_name: str):
+    """
+    动态加载指定模块
+    Args:
+        module_name: 模块名称
+        file_name: 文件名（相对于当前目录）
+    Returns:
+        加载的模块对象
+    """
+    base_dir = Path(__file__).resolve().parent
+    spec = importlib.util.spec_from_file_location(module_name, str(base_dir / file_name))
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+def create_proxy_mounts() -> Optional[Dict[str, httpx.AsyncHTTPTransport]]:
+    """
+    创建代理传输层配置
+    Returns:
+        代理挂载配置字典，如果没有配置代理则返回 None
+    """
+    proxies = get_proxies()
+    if proxies:
+        proxy_url = proxies.get("https") or proxies.get("http")
+        if proxy_url:
+            return {
+                "https://": httpx.AsyncHTTPTransport(proxy=proxy_url),
+                "http://": httpx.AsyncHTTPTransport(proxy=proxy_url),
+            }
+    return None