Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Sep 30, 2025

Commit

be9067e

verified ·

1 Parent(s): 827f3c3

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +83 -100

proxy_handler.py CHANGED Viewed

@@ -26,65 +26,50 @@ class ProxyHandler:
         if not self.client.is_closed:
             await self.client.aclose()
-    def _clean_thinking_content(self, text: str) -> str:
         """
-        Aggressively cleans raw thinking content strings based on observed patterns
-        from the Z.AI API, inspired by a reference Cloudflare implementation.
-        Removes tool calls, specific HTML-like tags, and other metadata while preserving
-        the core thought process content.
         """
         if not text:
             return ""
-        cleaned_text = text
-        # 1. Remove entire blocks where the content is also unwanted metadata.
-        # e.g., <summary>Thinking...</summary> or <glm_block>...</glm_block>
-        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
-        # 2. Remove specific structural tags, but keep the content between them.
-        # Inspired by the reference implementation's targeted replaces.
-        # e.g., <details> content </details> becomes just 'content'
-        cleaned_text = cleaned_text.replace("</thinking>", "")
-        cleaned_text = cleaned_text.replace("<Full>", "")
-        cleaned_text = cleaned_text.replace("</Full>", "")
-        # This regex handles <details>, <details open>, and </details>
-        cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
-        # 3. Handle markdown blockquotes.
-        cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
-        # 4. Remove other known text artifacts.
-        cleaned_text = cleaned_text.replace("Thinking…", "")
-        # 5. Final strip to clean up residual whitespace from removed elements.
         return cleaned_text.strip()
-    def _clean_answer_content(self, text: str) -> str:
-        """
-        Cleans unwanted tags from answer content.
-        Does NOT strip whitespace to preserve markdown in streams.
-        """
-        if not text:
-            return ""
-        # Remove tool call blocks
-        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
-        # Remove any residual details/summary blocks that might leak into the answer
-        cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', cleaned_text, flags=re.DOTALL)
-        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
-        return cleaned_text
-    def _serialize_msgs(self, msgs) -> list:
-        """Converts message objects to a list of dictionaries."""
-        out = []
-        for m in msgs:
-            if hasattr(m, "dict"): out.append(m.dict())
-            elif hasattr(m, "model_dump"): out.append(m.model_dump())
-            elif isinstance(m, dict): out.append(m)
-            else: out.append({"role": getattr(m, "role", "user"), "content": getattr(m, "content", str(m))})
-        return out
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
         """Prepares the request body, headers, and cookie for the upstream API."""
         ck = await cookie_manager.get_next_cookie()
@@ -102,6 +87,7 @@ class ProxyHandler:
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
@@ -141,39 +127,41 @@ class ProxyHandler:
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
-                            if think_open:
-                                yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
-                            yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n";
-                            yield "data: [DONE]\n\n";
-                            return
                         try:
                             dat = json.loads(payload_str).get("data", {})
-                        except (json.JSONDecodeError, AttributeError):
-                            continue
-                        # --- START OF REFACTORED LOGIC ---
-                        phase = dat.get("phase")
-                        content_chunk = dat.get("delta_content") or dat.get("edit_content")
-                        if not content_chunk:
                             continue
-                        if phase == "thinking":
-                            # Accumulate raw thinking content. `edit_content` replaces the buffer.
-                            if dat.get("edit_content") is not None:
-                                current_raw_thinking = content_chunk
-                            else:
-                                current_raw_thinking += content_chunk
-                            # Yield the processed delta of the accumulated thinking content
-                            async for item in yield_delta("thinking", current_raw_thinking):
-                                yield item
-                        elif phase == "answer":
-                            # Directly yield the answer chunk for processing
-                            async for item in yield_delta("answer", content_chunk):
                                 yield item
-                        # --- END OF REFACTORED LOGIC ---
         except Exception:
             logger.exception("Stream error"); raise
@@ -202,32 +190,27 @@ class ProxyHandler:
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
-                        # Use the more robust phase-based logic for non-stream as well
-                        phase = dat.get("phase")
-                        content_chunk = dat.get("delta_content") or dat.get("edit_content")
-                        if not content_chunk:
                             continue
-                        if phase == "thinking":
-                            answer_started = False # Ensure we are in thinking mode
-                            if dat.get("edit_content") is not None:
-                                current_raw_thinking = content_chunk
-                            else:
-                                current_raw_thinking += content_chunk
-                            last_thinking_content = current_raw_thinking
-                        elif phase == "answer":
-                            if not answer_started:
-                                # First answer chunk might contain leftover thinking part, clean it.
-                                cleaned_chunk = self._clean_answer_content(content_chunk)
-                                if cleaned_chunk:
-                                    raw_answer_parts.append(cleaned_chunk)
-                                answer_started = True
-                            else:
-                                raw_answer_parts.append(content_chunk)
-                    else:
-                        continue
                     break
             full_answer = ''.join(raw_answer_parts)
@@ -252,4 +235,4 @@ class ProxyHandler:
         if stream:
             return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",
                                      headers={"Cache-Control": "no-cache", "Connection": "keep-alive"})

         if not self.client.is_closed:
             await self.client.aclose()
+def _clean_thinking_content(self, text: str) -> str:
         """
+        Aggressively cleans raw thinking content strings.
+        Removes tool calls, HTML-like tags, and other metadata.
+        Based on the Cloudflare Workers cleanThinkingContent implementation.
         """
         if not text:
             return ""
+        # Remove <summary>...</summary> tags and content
+        cleaned_text = re.sub(r'<summary>.*?</summary>', '', text, flags=re.DOTALL)
+        # Remove tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
+        # Clean up specific thinking-related tags
+        cleaned_text = cleaned_text.replace('</thinking>', '')
+        cleaned_text = cleaned_text.replace('<Full>', '')
+        cleaned_text = cleaned_text.replace('</Full>', '')
+        # Remove details tags (both opening and closing)
+        cleaned_text = re.sub(r'<details[^>]*>', '', cleaned_text)
+        cleaned_text = re.sub(r'</details>', '', cleaned_text)
+        # Remove other HTML-like tags
+        cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
+        # Remove metadata patterns
+        cleaned_text = re.sub(r'true"\s+duration="\d+"[^>]*>\s*Thought for \d+ seconds', '', cleaned_text)
+        # Handle quote symbols (按 Cloudflare Workers 逻辑)
+        # First handle leading "> "
+        if cleaned_text.startswith('> '):
+            cleaned_text = cleaned_text[2:]
+        # Then handle "\n> " patterns (but preserve ">>" for nested quotes)
+        cleaned_text = re.sub(r'\n>\s+(?!>)', '\n', cleaned_text)
+        # Remove "Thinking..." headers (various encodings)
+        cleaned_text = cleaned_text.replace("Thinkingâ€¦", "")
+        cleaned_text = cleaned_text.replace("Thinking...", "")
+        # Final trim
         return cleaned_text.strip()
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
         """Prepares the request body, headers, and cookie for the upstream API."""
         ck = await cookie_manager.get_next_cookie()
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
+            answer_started = False
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
+                            if think_open: yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
+                            yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"; yield "data: [DONE]\n\n"; return
                         try:
                             dat = json.loads(payload_str).get("data", {})
+                        except (json.JSONDecodeError, AttributeError): continue
+                        if answer_started:
+                            content = dat.get("delta_content") or dat.get("edit_content")
+                            # FIX: Expanded to a proper indented block
+                            if content:
+                                async for item in yield_delta("answer", content):
+                                    yield item
                             continue
+                        content_chunk = dat.get("edit_content") or dat.get("delta_content") or ""
+                        if dat.get("edit_content") is not None:
+                            current_raw_thinking = content_chunk
+                        else:
+                            current_raw_thinking += content_chunk
+                        match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
+                        if match:
+                            thinking_part, answer_part = match.groups()
+                            # FIX: Expanded to a proper indented block
+                            async for item in yield_delta("thinking", thinking_part):
+                                yield item
+                            # FIX: Expanded to a proper indented block
+                            if answer_part:
+                                async for item in yield_delta("answer", answer_part):
+                                    yield item
+                            answer_started = True
+                        else:
+                            # FIX: Expanded to a proper indented block
+                            async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
         except Exception:
             logger.exception("Stream error"); raise
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
+                        if answer_started:
+                            content = dat.get("delta_content") or dat.get("edit_content")
+                            if content: raw_answer_parts.append(content)
                             continue
+                        content_chunk = dat.get("edit_content") or dat.get("delta_content")
+                        if not content_chunk: continue
+                        if dat.get("edit_content") is not None:
+                             current_raw_thinking = content_chunk
+                        else:
+                             current_raw_thinking += content_chunk
+                        match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
+                        if match:
+                            last_thinking_content, answer_part = match.groups()
+                            if answer_part: raw_answer_parts.append(answer_part)
+                            answer_started = True
+                        else:
+                            last_thinking_content = current_raw_thinking
+                    else: continue
                     break
             full_answer = ''.join(raw_answer_parts)
         if stream:
             return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",
                                      headers={"Cache-Control": "no-cache", "Connection": "keep-alive"})
+        return await self.non_stream_proxy_response(req)