Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Aug 6, 2025

Commit

40459ae

verified ·

1 Parent(s): d47fbbf

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +321 -103

proxy_handler.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """
-Proxy handler for Z.AI API requests (OpenAI-compatible)
 """
 import json
 import logging
 import time
 import uuid
 from typing import AsyncGenerator, Dict, Any, Optional
 import httpx
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
@@ -24,7 +25,6 @@ logger = logging.getLogger(__name__)
 class ProxyHandler:
     def __init__(self):
-        # Z.AI 端連線逾時 60 秒
         self.client = httpx.AsyncClient(timeout=60.0)
     async def __aenter__(self):
@@ -33,137 +33,355 @@ class ProxyHandler:
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self.client.aclose()
-    # --------- 文字前處理 ---------
     def transform_content(self, content: str) -> str:
-        """
-        依照專案設定將 Z.AI 傳回的 HTML / THINK TAG 等轉成純文字
-        """
         if not content:
             return content
-        # 例：過濾 <br/> 與 <think>
-        content = content.replace("<br/>", "\n")
         if not settings.SHOW_THINK_TAGS:
-            content = content.replace("<think>", "").replace("</think>", "")
         return content.strip()
-    # --------- 主要進入點 ---------
-    async def proxy_request(self, request: ChatCompletionRequest):
-        """
-        OpenAI API 相容的 proxy 入口
-        """
         cookie = await cookie_manager.get_next_cookie()
         if not cookie:
             raise HTTPException(status_code=503, detail="No available cookies")
-        # 若對外聲稱的 model 與內部實際 model 不同，在此轉換
         target_model = (
             settings.UPSTREAM_MODEL
             if request.model == settings.MODEL_NAME
             else request.model
         )
-        # 決定是否串流
-        is_streaming: bool = (
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
-        # 向 Z.AI 串流或一次性取資料
         if is_streaming:
-            # 建立 SSE StreamingResponse
             return StreamingResponse(
-                self.stream_response(request, target_model, cookie),
                 media_type="text/event-stream",
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
-                    # 若有 nginx 建議加上 X-Accel-Buffering: no
                 },
             )
         else:
-            # 非串流：拿到完整內容後包成 ChatCompletionResponse
-            content = await self.get_full_response(request, target_model, cookie)
-            return ChatCompletionResponse(
-                id=f"chatcmpl-{uuid.uuid4()}",
-                created=int(time.time()),
-                model=target_model,
-                choices=[
-                    {
-                        "index": 0,
-                        "message": {"role": "assistant", "content": content},
-                        "finish_reason": "stop",
-                    }
-                ],
-            )
-    # --------- 非串流邏輯 ---------
-    async def get_full_response(
-        self,
-        request: ChatCompletionRequest,
-        target_model: str,
-        cookie: str,
-    ) -> str:
-        """
-        向 Z.AI 取完整回覆並回傳轉換後文字
-        """
-        resp = await self.client.post(
-            settings.ZAI_ENDPOINT,
-            headers={"Cookie": cookie},
-            json=request.model_dump(exclude_none=True),
-        )
-        resp.raise_for_status()
-        data = resp.json()
-        return self.transform_content(data["choices"][0]["message"]["content"])
-    # --------- 串流邏輯 ---------
-    async def stream_response(
-        self,
-        request: ChatCompletionRequest,
-        target_model: str,
-        cookie: str,
-    ) -> AsyncGenerator[str, None]:
-        """
-        將 Z.AI 串流資料即時轉成 OpenAI SSE 片段
-        """
-        # 呼叫 Z.AI 串流端點（假設支援 HTTP chunk）
-        async with self.client.stream(
-            "POST",
-            settings.ZAI_STREAM_ENDPOINT,
-            headers={"Cookie": cookie},
-            json=request.model_dump(exclude_none=True),
-        ) as resp:
-            resp.raise_for_status()
-            async for line in resp.aiter_lines():
-                if not line:
-                    continue
-                # Z.AI 每行可能已是 json；自行視格式解析
-                try:
-                    raw = json.loads(line)
-                except json.JSONDecodeError:
-                    logger.debug("skip non-json line from Z.AI: %s", line)
-                    continue
-                # 取得文字增量
-                delta_text = self.transform_content(raw.get("delta", ""))
-                if delta_text == "":
-                    continue
-                # 組成 OpenAI stream chunk
-                chunk: Dict[str, Any] = {
-                    "id": raw.get("id", f"chatcmpl-{uuid.uuid4()}"),
-                    "object": "chat.completion.chunk",
-                    "created": int(time.time()),
-                    "model": target_model,
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {"content": delta_text},
-                            "finish_reason": None,
-                        }
-                    ],
-                }
-                # 送出 SSE formatted line
-                yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
-            # Z.AI 結束後送出 [DONE]
-            yield "data: [DONE]\n\n"

 """
+Proxy handler for Z.AI API requests
 """
 import json
 import logging
+import re
 import time
 import uuid
 from typing import AsyncGenerator, Dict, Any, Optional
 import httpx
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
 class ProxyHandler:
     def __init__(self):
         self.client = httpx.AsyncClient(timeout=60.0)
     async def __aenter__(self):
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self.client.aclose()
     def transform_content(self, content: str) -> str:
+        """Transform content by replacing HTML tags and optionally removing think tags"""
         if not content:
             return content
+        logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
+        # Optionally remove thinking content based on configuration
         if not settings.SHOW_THINK_TAGS:
+            logger.debug("Removing thinking content from response")
+            original_length = len(content)
+            # Remove <details> blocks (thinking content) - handle both closed and unclosed tags
+            # First try to remove complete <details>...</details> blocks
+            content = re.sub(
+                r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL
+            )
+            # Then remove any remaining <details> opening tags and everything after them until we hit answer content
+            # Look for pattern: <details...><summary>...</summary>...content... and remove the thinking part
+            content = re.sub(
+                r"<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)",
+                "",
+                content,
+                flags=re.DOTALL,
+            )
+            content = content.strip()
+            logger.debug(
+                f"Content length after removing thinking content: {original_length} -> {len(content)}"
+            )
+        else:
+            logger.debug("Keeping thinking content, converting to <think> tags")
+            # Replace <details> with <think>
+            content = re.sub(r"<details[^>]*>", "<think>", content)
+            content = content.replace("</details>", "</think>")
+            # Remove <summary> tags and their content
+            content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
+            # If there's no closing </think>, add it at the end of thinking content
+            if "<think>" in content and "</think>" not in content:
+                # Find where thinking ends and answer begins
+                think_start = content.find("<think>")
+                if think_start != -1:
+                    # Look for the start of the actual answer (usually starts with a capital letter or number)
+                    answer_match = re.search(r"\n\s*[A-Z0-9]", content[think_start:])
+                    if answer_match:
+                        insert_pos = think_start + answer_match.start()
+                        content = (
+                            content[:insert_pos] + "</think>\n" + content[insert_pos:]
+                        )
+                    else:
+                        content += "</think>"
         return content.strip()
+    async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
+        """Proxy request to Z.AI API"""
         cookie = await cookie_manager.get_next_cookie()
         if not cookie:
             raise HTTPException(status_code=503, detail="No available cookies")
+        # Transform model name
         target_model = (
             settings.UPSTREAM_MODEL
             if request.model == settings.MODEL_NAME
             else request.model
         )
+        # Determine if this should be a streaming response
+        is_streaming = (
+            request.stream if request.stream is not None else settings.DEFAULT_STREAM
+        )
+        # Validate parameter compatibility
+        if is_streaming and not settings.SHOW_THINK_TAGS:
+            logger.warning("SHOW_THINK_TAGS=false is ignored for streaming responses")
+        # Prepare request data
+        request_data = request.model_dump(exclude_none=True)
+        request_data["model"] = target_model
+        # Build request data based on actual Z.AI format from zai-messages.md
+        request_data = {
+            "stream": True,  # Always request streaming from Z.AI for processing
+            "model": target_model,
+            "messages": request_data["messages"],
+            "background_tasks": {"title_generation": True, "tags_generation": True},
+            "chat_id": str(uuid.uuid4()),
+            "features": {
+                "image_generation": False,
+                "code_interpreter": False,
+                "web_search": False,
+                "auto_web_search": False,
+            },
+            "id": str(uuid.uuid4()),
+            "mcp_servers": ["deep-web-search"],
+            "model_item": {"id": target_model, "name": "GLM-4.5", "owned_by": "openai"},
+            "params": {},
+            "tool_servers": [],
+            "variables": {
+                "{{USER_NAME}}": "User",
+                "{{USER_LOCATION}}": "Unknown",
+                "{{CURRENT_DATETIME}}": "2025-08-04 16:46:56",
+            },
+        }
+        logger.debug(f"Sending request data: {request_data}")
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {cookie}",
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
+            "Accept": "application/json, text/event-stream",
+            "Accept-Language": "zh-CN",
+            "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"macOS"',
+            "x-fe-version": "prod-fe-1.0.53",
+            "Origin": "https://chat.z.ai",
+            "Referer": "https://chat.z.ai/c/069723d5-060b-404f-992c-4705f1554c4c",
+        }
+        try:
+            response = await self.client.post(
+                settings.UPSTREAM_URL, json=request_data, headers=headers
+            )
+            if response.status_code == 401:
+                await cookie_manager.mark_cookie_failed(cookie)
+                raise HTTPException(status_code=401, detail="Invalid authentication")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Upstream error: {response.text}",
+                )
+            await cookie_manager.mark_cookie_success(cookie)
+            return {"response": response, "cookie": cookie}
+        except httpx.RequestError as e:
+            logger.error(f"Request error: {e}")
+            logger.error(f"Request error type: {type(e).__name__}")
+            logger.error(f"Request URL: {settings.UPSTREAM_URL}")
+            logger.error(f"Request timeout: {self.client.timeout}")
+            await cookie_manager.mark_cookie_failed(cookie)
+            raise HTTPException(
+                status_code=503, detail=f"Upstream service unavailable: {str(e)}"
+            )
+    async def process_streaming_response(
+        self, response: httpx.Response
+    ) -> AsyncGenerator[Dict[str, Any], None]:
+        """Process streaming response from Z.AI"""
+        buffer = ""
+        async for chunk in response.aiter_text():
+            buffer += chunk
+            lines = buffer.split("\n")
+            buffer = lines[-1]  # Keep incomplete line in buffer
+            for line in lines[:-1]:
+                line = line.strip()
+                if not line.startswith("data: "):
+                    continue
+                payload = line[6:].strip()
+                if payload == "[DONE]":
+                    return
+                try:
+                    parsed = json.loads(payload)
+                    # Check for errors first
+                    if parsed.get("error") or (parsed.get("data", {}).get("error")):
+                        error_detail = (
+                            parsed.get("error", {}).get("detail")
+                            or parsed.get("data", {}).get("error", {}).get("detail")
+                            or "Unknown error from upstream"
+                        )
+                        logger.error(f"Upstream error: {error_detail}")
+                        raise HTTPException(
+                            status_code=400, detail=f"Upstream error: {error_detail}"
+                        )
+                    # Transform the response
+                    if parsed.get("data"):
+                        # Remove unwanted fields
+                        parsed["data"].pop("edit_index", None)
+                        parsed["data"].pop("edit_content", None)
+                        # Note: We don't transform delta_content here because <think> tags
+                        # might span multiple chunks. We'll transform the final aggregated content.
+                    yield parsed
+                except json.JSONDecodeError:
+                    continue  # Skip non-JSON lines
+    async def stream_response(
+        self, response: httpx.Response, model: str
+    ) -> AsyncGenerator[str, None]:
+        """Generate OpenAI-compatible streaming response"""
+        try:
+            async for parsed in self.process_streaming_response(response):
+                # 取得增量內容
+                delta_content = parsed.get("data", {}).get("delta_content", "")
+                # 根據設定決定是否過濾思考內容
+                if not settings.SHOW_THINK_TAGS:
+                    # 只在回答階段輸出內容
+                    phase = parsed.get("data", {}).get("phase", "")
+                    if phase != "answer":
+                        continue
+                # 如果有內容才輸出
+                if delta_content:
+                    # 建立 OpenAI 格式的 chunk
+                    chunk = {
+                        "id": parsed.get("data", {}).get("id", f"chatcmpl-{uuid.uuid4()}"),
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"content": delta_content},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
+            # 發送完成標記
+            final_chunk = {
+                "id": f"chatcmpl-{uuid.uuid4()}",
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {},
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+            yield f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"
+            yield "data: [DONE]\n\n"
+        except Exception as e:
+            logger.error(f"Error in stream_response: {e}")
+            # 發送錯誤訊息
+            error_chunk = {
+                "id": f"chatcmpl-{uuid.uuid4()}",
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"content": f"Error: {str(e)}"},
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+            yield f"data: {json.dumps(error_chunk, ensure_ascii=False)}\n\n"
+            yield "data: [DONE]\n\n"
+    async def handle_chat_completion(self, request: ChatCompletionRequest):
+        """Handle chat completion request"""
+        proxy_result = await self.proxy_request(request)
+        response = proxy_result["response"]
+        # Determine final streaming mode
+        is_streaming = (
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
         if is_streaming:
+            # For streaming responses, SHOW_THINK_TAGS setting is ignored
             return StreamingResponse(
+                self.stream_response(response, request.model),
                 media_type="text/event-stream",
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
+                    "X-Accel-Buffering": "no",  # 對 nginx 有用
                 },
             )
         else:
+            # For non-streaming responses, SHOW_THINK_TAGS setting applies
+            return await self.non_stream_response(response, request.model)
+    async def non_stream_response(
+        self, response: httpx.Response, model: str
+    ) -> ChatCompletionResponse:
+        """Generate non-streaming response"""
+        chunks = []
+        async for parsed in self.process_streaming_response(response):
+            chunks.append(parsed)
+            logger.debug(f"Received chunk: {parsed}")  # Debug log
+        if not chunks:
+            raise HTTPException(status_code=500, detail="No response from upstream")
+        logger.info(f"Total chunks received: {len(chunks)}")
+        logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
+        # Aggregate content based on SHOW_THINK_TAGS setting
+        if settings.SHOW_THINK_TAGS:
+            # Include all content
+            full_content = "".join(
+                chunk.get("data", {}).get("delta_content", "") for chunk in chunks
+            )
+        else:
+            # Only include answer phase content
+            full_content = "".join(
+                chunk.get("data", {}).get("delta_content", "")
+                for chunk in chunks
+                if chunk.get("data", {}).get("phase") == "answer"
+            )
+        logger.info(f"Aggregated content length: {len(full_content)}")
+        logger.debug(
+            f"Full aggregated content: {full_content}"
+        )  # Show full content for debugging
+        # Apply content transformation (including think tag filtering)
+        transformed_content = self.transform_content(full_content)
+        logger.info(f"Transformed content length: {len(transformed_content)}")
+        logger.debug(f"Transformed content: {transformed_content[:200]}...")
+        # Create OpenAI-compatible response
+        return ChatCompletionResponse(
+            id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown"),
+            created=int(time.time()),
+            model=model,
+            choices=[
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": transformed_content},
+                    "finish_reason": "stop",
+                }
+            ],
+        )