Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Aug 7, 2025

Commit

6d2594a

verified ·

1 Parent(s): e20a984

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +32 -34

proxy_handler.py CHANGED Viewed

@@ -27,6 +27,28 @@ class ProxyHandler:
         if not self.client.is_closed:
             await self.client.aclose()
     # ... Methods _serialize_msgs, _prep_upstream remain the same ...
     def _serialize_msgs(self, msgs) -> list:
         out = []
@@ -43,8 +65,7 @@ class ProxyHandler:
         body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
         headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
         return body, headers, ck
-    # The stream_proxy_response from v6 remains the same, as it was working correctly.
     async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
         ck = None
         try:
@@ -59,10 +80,11 @@ class ProxyHandler:
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
-                    # In stream, we clean as we go
                     cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
-                    cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
-                    cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
                     if cleaned_text:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
                 elif content_type == "answer":
@@ -108,15 +130,11 @@ class ProxyHandler:
         except Exception:
             logger.exception("Stream error"); raise
-    # ---------- non-stream (REVISED CLEANUP LOGIC) ----------
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
-            raw_thinking_parts = []
-            raw_answer_parts = []
-            phase_cur = None
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
@@ -140,8 +158,6 @@ class ProxyHandler:
                         content = dat.get("delta_content") or dat.get("edit_content")
                         if not content: continue
-                        content = re.sub(r'<glm_block.*?</glm_block>', '', content, flags=re.DOTALL)
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
@@ -155,30 +171,13 @@ class ProxyHandler:
                     else: continue
                     break
-            # Final assembly and cleaning
             final_ans_text = ''.join(raw_answer_parts)
             final_content = final_ans_text
             if settings.SHOW_THINK_TAGS and raw_thinking_parts:
-                full_think_text = ''.join(raw_thinking_parts)
-                # --- START OF FINAL FIX ---
-                # 1. Remove all HTML-like tags first. This gets rid of <details>, <summary>, etc.
-                cleaned_text = re.sub(r'<[^>]+>', '', full_think_text)
-                # 2. Remove specific known metadata patterns that are not standard HTML.
-                # This pattern matches 'true" duration="...">' and "Thought for ... seconds"
-                cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
-                # 3. Remove leading markdown quote symbols and trim whitespace.
-                cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE).strip()
-                # 4. Remove any remaining "Thinking..." headers.
-                cleaned_text = cleaned_text.replace("Thinking…", "").strip()
-                # --- END OF FINAL FIX ---
-                if cleaned_text:
-                    final_content = f"<think>{cleaned_text}</think>{final_ans_text}"
             return ChatCompletionResponse(
                 id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
@@ -187,7 +186,6 @@ class ProxyHandler:
         except Exception:
             logger.exception("Non-stream processing failed"); raise
-    # ---------- FastAPI entry ----------
     async def handle_chat_completion(self, req: ChatCompletionRequest):
         stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
         if stream:

         if not self.client.is_closed:
             await self.client.aclose()
+    # --- START OF FINAL FIX ---
+    # NEW: Unified thinking content cleaner function
+    def _clean_thinking_content(self, text: str) -> str:
+        """
+        A robust cleaner for the raw thinking content string.
+        """
+        if not text:
+            return ""
+        # 1. Remove tool call blocks first
+        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
+        # 2. Remove all HTML-like tags. This gets rid of <details>, <summary>, etc.
+        cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
+        # 3. Remove specific known metadata patterns that are not standard HTML.
+        cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
+        # 4. Remove leading markdown quote symbols
+        cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
+        # 5. Remove any "Thinking..." headers.
+        cleaned_text = cleaned_text.replace("Thinking…", "")
+        # 6. Final strip to clean up any residual whitespace.
+        return cleaned_text.strip()
+    # --- END OF FINAL FIX ---
     # ... Methods _serialize_msgs, _prep_upstream remain the same ...
     def _serialize_msgs(self, msgs) -> list:
         out = []
         body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
         headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
         return body, headers, ck
     async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
         ck = None
         try:
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
+                    # In stream, we clean as we go, but we don't strip the final result
+                    # as it might be part of a larger thought. We use a simpler clean here.
                     cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
+                    cleaned_text = cleaned_text.replace("Thinking…", "") # Remove header early
                     if cleaned_text:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
                 elif content_type == "answer":
         except Exception:
             logger.exception("Stream error"); raise
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
+            raw_thinking_parts = []; raw_answer_parts = []; phase_cur = None
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
                         content = dat.get("delta_content") or dat.get("edit_content")
                         if not content: continue
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
                     else: continue
                     break
             final_ans_text = ''.join(raw_answer_parts)
             final_content = final_ans_text
             if settings.SHOW_THINK_TAGS and raw_thinking_parts:
+                # Use the new unified cleaner function
+                cleaned_think_text = self._clean_thinking_content(''.join(raw_thinking_parts))
+                if cleaned_think_text:
+                    final_content = f"<think>{cleaned_think_text}</think>{final_ans_text}"
             return ChatCompletionResponse(
                 id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
         except Exception:
             logger.exception("Non-stream processing failed"); raise
     async def handle_chat_completion(self, req: ChatCompletionRequest):
         stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
         if stream: