Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Aug 6, 2025

Commit

d64c814

verified ·

1 Parent(s): 57d07dc

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +34 -121

proxy_handler.py CHANGED Viewed

@@ -6,7 +6,6 @@ import json
 import logging
 import re
 import time
-import uuid
 from typing import AsyncGenerator, Dict, Any, Optional
 import httpx
 from fastapi import HTTPException
@@ -39,8 +38,6 @@ class ProxyHandler:
             return content
         logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
-        logger.debug(f"Original content length: {len(content)}")
-        logger.debug(f"Original content preview: {content[:200]}...")
         # Optionally remove thinking content based on configuration
         if not settings.SHOW_THINK_TAGS:
@@ -92,8 +89,6 @@ class ProxyHandler:
                     else:
                         content += "</think>"
-        logger.debug(f"Final transformed content length: {len(content)}")
-        logger.debug(f"Final transformed content preview: {content[:200]}...")
         return content.strip()
     async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
@@ -114,10 +109,17 @@ class ProxyHandler:
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
         # Prepare request data
         request_data = request.model_dump(exclude_none=True)
         # Build request data based on actual Z.AI format from zai-messages.md
         request_data = {
             "stream": True,  # Always request streaming from Z.AI for processing
             "model": target_model,
@@ -208,7 +210,6 @@ class ProxyHandler:
                 try:
                     parsed = json.loads(payload)
-                    logger.debug(f"Parsed chunk: {parsed}")
                     # Check for errors first
                     if parsed.get("error") or (parsed.get("data", {}).get("error")):
@@ -228,107 +229,16 @@ class ProxyHandler:
                         parsed["data"].pop("edit_index", None)
                         parsed["data"].pop("edit_content", None)
                     yield parsed
-                except json.JSONDecodeError as e:
-                    logger.debug(f"Failed to parse JSON: {line}, error: {e}")
                     continue  # Skip non-JSON lines
-    async def stream_response(
-        self, response: httpx.Response, model: str
-    ) -> AsyncGenerator[str, None]:
-        """Generate OpenAI-compatible streaming response"""
-        try:
-            chunk_id = f"chatcmpl-{uuid.uuid4()}"
-            async for parsed in self.process_streaming_response(response):
-                # 取得增量內容
-                delta_content = parsed.get("data", {}).get("delta_content", "")
-                phase = parsed.get("data", {}).get("phase", "")
-                logger.debug(f"Processing chunk - phase: {phase}, content: {delta_content[:50]}...")
-                # 如果沒有內容就跳過
-                if not delta_content:
-                    continue
-                # 根據 SHOW_THINK_TAGS 設定決定是否輸出
-                should_output = True
-                if not settings.SHOW_THINK_TAGS:
-                    # 如果不顯示思考標籤，只輸出答案階段的內容
-                    if phase != "answer":
-                        logger.debug(f"Skipping non-answer phase: {phase}")
-                        should_output = False
-                else:
-                    # 如果顯示思考標籤，對思考內容進行轉換
-                    if phase == "thinking":
-                        # 將 <details> 轉換為 <think>
-                        delta_content = delta_content.replace("<details", "<think")
-                        delta_content = delta_content.replace("</details>", "</think>")
-                        # 移除 <summary> 標籤
-                        delta_content = re.sub(r"<summary>.*?</summary>", "", delta_content, flags=re.DOTALL)
-                if should_output and delta_content:
-                    # 建立 OpenAI 格式的 chunk
-                    chunk = {
-                        "id": chunk_id,
-                        "object": "chat.completion.chunk",
-                        "created": int(time.time()),
-                        "model": model,
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {"content": delta_content},
-                                "finish_reason": None,
-                            }
-                        ],
-                    }
-                    chunk_json = json.dumps(chunk, ensure_ascii=False)
-                    logger.debug(f"Yielding chunk: {chunk_json}")
-                    yield f"data: {chunk_json}\n\n"
-            # 發送完成標記
-            final_chunk = {
-                "id": chunk_id,
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "model": model,
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {},
-                        "finish_reason": "stop",
-                    }
-                ],
-            }
-            yield f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"
-            yield "data: [DONE]\n\n"
-        except Exception as e:
-            logger.error(f"Error in stream_response: {e}")
-            # 發送錯誤訊息
-            error_chunk = {
-                "id": f"chatcmpl-{uuid.uuid4()}",
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "model": model,
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"content": f"Error: {str(e)}"},
-                        "finish_reason": "stop",
-                    }
-                ],
-            }
-            yield f"data: {json.dumps(error_chunk, ensure_ascii=False)}\n\n"
-            yield "data: [DONE]\n\n"
     async def handle_chat_completion(self, request: ChatCompletionRequest):
         """Handle chat completion request"""
-        logger.info(f"Handling chat completion request - stream: {request.stream}, SHOW_THINK_TAGS: {settings.SHOW_THINK_TAGS}")
         proxy_result = await self.proxy_request(request)
         response = proxy_result["response"]
@@ -337,23 +247,25 @@ class ProxyHandler:
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
-        logger.info(f"Final streaming mode: {is_streaming}")
         if is_streaming:
-            # For streaming responses
             return StreamingResponse(
                 self.stream_response(response, request.model),
                 media_type="text/event-stream",
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
-                    "X-Accel-Buffering": "no",  # 對 nginx 有用
                 },
             )
         else:
             # For non-streaming responses, SHOW_THINK_TAGS setting applies
             return await self.non_stream_response(response, request.model)
     async def non_stream_response(
         self, response: httpx.Response, model: str
     ) -> ChatCompletionResponse:
@@ -361,7 +273,7 @@ class ProxyHandler:
         chunks = []
         async for parsed in self.process_streaming_response(response):
             chunks.append(parsed)
-            logger.debug(f"Received chunk: {parsed}")
         if not chunks:
             raise HTTPException(status_code=500, detail="No response from upstream")
@@ -370,28 +282,29 @@ class ProxyHandler:
         logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
         # Aggregate content based on SHOW_THINK_TAGS setting
-        full_content = ""
-        for chunk in chunks:
-            delta_content = chunk.get("data", {}).get("delta_content", "")
-            phase = chunk.get("data", {}).get("phase", "")
-            if settings.SHOW_THINK_TAGS:
-                # Include all content
-                full_content += delta_content
-            else:
-                # Only include answer phase content
-                if phase == "answer":
-                    full_content += delta_content
         logger.info(f"Aggregated content length: {len(full_content)}")
-        logger.debug(f"Full aggregated content preview: {full_content[:200]}...")
         # Apply content transformation (including think tag filtering)
         transformed_content = self.transform_content(full_content)
         logger.info(f"Transformed content length: {len(transformed_content)}")
-        logger.debug(f"Transformed content preview: {transformed_content[:200]}...")
         # Create OpenAI-compatible response
         return ChatCompletionResponse(

 import logging
 import re
 import time
 from typing import AsyncGenerator, Dict, Any, Optional
 import httpx
 from fastapi import HTTPException
             return content
         logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
         # Optionally remove thinking content based on configuration
         if not settings.SHOW_THINK_TAGS:
                     else:
                         content += "</think>"
         return content.strip()
     async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
+        # Validate parameter compatibility
+        if is_streaming and not settings.SHOW_THINK_TAGS:
+            logger.warning("SHOW_THINK_TAGS=false is ignored for streaming responses")
         # Prepare request data
         request_data = request.model_dump(exclude_none=True)
+        request_data["model"] = target_model
         # Build request data based on actual Z.AI format from zai-messages.md
+        import uuid
         request_data = {
             "stream": True,  # Always request streaming from Z.AI for processing
             "model": target_model,
                 try:
                     parsed = json.loads(payload)
                     # Check for errors first
                     if parsed.get("error") or (parsed.get("data", {}).get("error")):
                         parsed["data"].pop("edit_index", None)
                         parsed["data"].pop("edit_content", None)
+                        # Note: We don't transform delta_content here because <think> tags
+                        # might span multiple chunks. We'll transform the final aggregated content.
                     yield parsed
+                except json.JSONDecodeError:
                     continue  # Skip non-JSON lines
     async def handle_chat_completion(self, request: ChatCompletionRequest):
         """Handle chat completion request"""
         proxy_result = await self.proxy_request(request)
         response = proxy_result["response"]
             request.stream if request.stream is not None else settings.DEFAULT_STREAM
         )
         if is_streaming:
+            # For streaming responses, SHOW_THINK_TAGS setting is ignored
             return StreamingResponse(
                 self.stream_response(response, request.model),
                 media_type="text/event-stream",
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
                 },
             )
         else:
             # For non-streaming responses, SHOW_THINK_TAGS setting applies
             return await self.non_stream_response(response, request.model)
+        """Generate streaming response"""
+        async for parsed in self.process_streaming_response(response):
+            yield f"data: {json.dumps(parsed)}\n\n"
+        yield "data: [DONE]\n\n"
     async def non_stream_response(
         self, response: httpx.Response, model: str
     ) -> ChatCompletionResponse:
         chunks = []
         async for parsed in self.process_streaming_response(response):
             chunks.append(parsed)
+            logger.debug(f"Received chunk: {parsed}")  # Debug log
         if not chunks:
             raise HTTPException(status_code=500, detail="No response from upstream")
         logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
         # Aggregate content based on SHOW_THINK_TAGS setting
+        if settings.SHOW_THINK_TAGS:
+            # Include all content
+            full_content = "".join(
+                chunk.get("data", {}).get("delta_content", "") for chunk in chunks
+            )
+        else:
+            # Only include answer phase content
+            full_content = "".join(
+                chunk.get("data", {}).get("delta_content", "")
+                for chunk in chunks
+                if chunk.get("data", {}).get("phase") == "answer"
+            )
         logger.info(f"Aggregated content length: {len(full_content)}")
+        logger.debug(
+            f"Full aggregated content: {full_content}"
+        )  # Show full content for debugging
         # Apply content transformation (including think tag filtering)
         transformed_content = self.transform_content(full_content)
         logger.info(f"Transformed content length: {len(transformed_content)}")
+        logger.debug(f"Transformed content: {transformed_content[:200]}...")
         # Create OpenAI-compatible response
         return ChatCompletionResponse(