Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Sep 30, 2025

Commit

d58e410

verified ·

1 Parent(s): af689c4

Update proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +29 -12

proxy_handler.py CHANGED Viewed

@@ -28,23 +28,40 @@ class ProxyHandler:
     def _clean_thinking_content(self, text: str) -> str:
         """
-        Aggressively cleans raw thinking content strings.
-        Removes tool calls, HTML-like tags, and other metadata.
         """
         if not text:
             return ""
-        # Remove tool call blocks
-        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
-        # Remove all other HTML-like tags
-        cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
-        # Remove specific metadata patterns
-        # FIX: Made regex more flexible to handle additional attributes before the closing '>'
-        cleaned_text = re.sub(r'true" duration="\d+"[^>]*>\s*Thought for \d+ seconds', '', cleaned_text)
-        # Remove only first-level markdown quotes, preserving sub-quotes like '>>'
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
-        # Remove "Thinking..." headers
         cleaned_text = cleaned_text.replace("Thinking…", "")
-        # Final strip to clean up residual whitespace
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str:

     def _clean_thinking_content(self, text: str) -> str:
         """
+        Aggressively cleans raw thinking content strings based on observed patterns
+        from the Z.AI API, inspired by a reference Cloudflare implementation.
+        Removes tool calls, specific HTML-like tags, and other metadata while preserving
+        the core thought process content.
         """
         if not text:
             return ""
+        cleaned_text = text
+        # 1. Remove entire blocks where the content is also unwanted metadata.
+        # e.g., <summary>Thinking...</summary> or <glm_block>...</glm_block>
+        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
+        cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
+        # 2. Remove specific structural tags, but keep the content between them.
+        # Inspired by the reference implementation's targeted replaces.
+        # e.g., <details> content </details> becomes just 'content'
+        cleaned_text = cleaned_text.replace("</thinking>", "")
+        cleaned_text = cleaned_text.replace("<Full>", "")
+        cleaned_text = cleaned_text.replace("</Full>", "")
+        # This regex handles <details>, <details open>, and </details>
+        cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
+        # 3. Handle markdown blockquotes.
+        # The reference implementation removes "> " at the start of lines.
+        # The original Python implementation is more robust and correctly
+        # preserves multi-level quotes (e.g., '>>'). We'll keep it.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
+        # 4. Remove other known text artifacts.
         cleaned_text = cleaned_text.replace("Thinking…", "")
+        # 5. Final strip to clean up residual whitespace from removed elements.
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str: