Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Oct 1, 2025

Commit

56e545a

verified ·

1 Parent(s): c394f8c

Update proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +37 -30

proxy_handler.py CHANGED Viewed

@@ -29,36 +29,35 @@ class ProxyHandler:
     def _clean_thinking_content(self, text: str) -> str:
         """
         Aggressively cleans raw thinking content strings based on observed patterns
-        from the Z.AI API, inspired by a reference Cloudflare implementation.
-        Removes tool calls, specific HTML-like tags, and other metadata while preserving
-        the core thought process content.
         """
         if not text:
             return ""
         cleaned_text = text
-        # 1. Remove entire blocks where the content is also unwanted metadata.
-        # e.g., <summary>Thinking...</summary> or <glm_block>...</glm_block>
         cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
-        # 2. Remove specific structural tags, but keep the content between them.
-        # Inspired by the reference implementation's targeted replaces.
-        # e.g., <details> content </details> becomes just 'content'
         cleaned_text = cleaned_text.replace("</thinking>", "")
         cleaned_text = cleaned_text.replace("<Full>", "")
         cleaned_text = cleaned_text.replace("</Full>", "")
         # This regex handles <details>, <details open>, and </details>
         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
-        # 3. Handle markdown blockquotes.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
-        # 4. Remove other known text artifacts.
         cleaned_text = cleaned_text.replace("Thinking…", "")
-        # 5. Final strip to clean up residual whitespace from removed elements.
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str:
@@ -102,6 +101,8 @@ class ProxyHandler:
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
@@ -151,7 +152,6 @@ class ProxyHandler:
                         except (json.JSONDecodeError, AttributeError):
                             continue
-                        # --- START OF REFACTORED LOGIC ---
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
@@ -159,21 +159,27 @@ class ProxyHandler:
                             continue
                         if phase == "thinking":
-                            # Accumulate raw thinking content. `edit_content` replaces the buffer.
                             if dat.get("edit_content") is not None:
                                 current_raw_thinking = content_chunk
                             else:
                                 current_raw_thinking += content_chunk
-                            # Yield the processed delta of the accumulated thinking content
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
                         elif phase == "answer":
-                            # Directly yield the answer chunk for processing
-                            async for item in yield_delta("answer", content_chunk):
-                                yield item
-                        # --- END OF REFACTORED LOGIC ---
         except Exception:
             logger.exception("Stream error"); raise
@@ -190,7 +196,7 @@ class ProxyHandler:
                 await cookie_manager.mark_cookie_success(ck)
                 current_raw_thinking = ""
-                answer_started = False
                 async for raw in resp.aiter_text():
                     for line in raw.strip().split('\n'):
@@ -202,7 +208,6 @@ class ProxyHandler:
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
-                        # Use the more robust phase-based logic for non-stream as well
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
@@ -210,7 +215,6 @@ class ProxyHandler:
                             continue
                         if phase == "thinking":
-                            answer_started = False # Ensure we are in thinking mode
                             if dat.get("edit_content") is not None:
                                 current_raw_thinking = content_chunk
                             else:
@@ -218,19 +222,22 @@ class ProxyHandler:
                             last_thinking_content = current_raw_thinking
                         elif phase == "answer":
-                            if not answer_started:
-                                # First answer chunk might contain leftover thinking part, clean it.
-                                cleaned_chunk = self._clean_answer_content(content_chunk)
-                                if cleaned_chunk:
-                                    raw_answer_parts.append(cleaned_chunk)
-                                answer_started = True
-                            else:
-                                raw_answer_parts.append(content_chunk)
                     else:
                         continue
                     break
             full_answer = ''.join(raw_answer_parts)
             cleaned_ans_text = self._clean_answer_content(full_answer).strip()
             final_content = cleaned_ans_text

     def _clean_thinking_content(self, text: str) -> str:
         """
         Aggressively cleans raw thinking content strings based on observed patterns
+        from the Z.AI API.
         """
         if not text:
             return ""
         cleaned_text = text
+        # 1. Remove specific unwanted blocks like tool calls and summaries.
         cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
+        # 2. **FIX**: Remove tag-like metadata containing `duration` attribute.
+        # This handles the reported issue: `true" duration="0" ... >`
+        cleaned_text = re.sub(r'<[^>]*duration="[^"]*"[^>]*>', '', cleaned_text)
+        # 3. Remove specific structural tags, but keep the content between them.
         cleaned_text = cleaned_text.replace("</thinking>", "")
         cleaned_text = cleaned_text.replace("<Full>", "")
         cleaned_text = cleaned_text.replace("</Full>", "")
         # This regex handles <details>, <details open>, and </details>
         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
+        # 4. Handle markdown blockquotes, preserving multi-level ones.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
+        # 5. Remove other known text artifacts.
         cleaned_text = cleaned_text.replace("Thinking…", "")
+        # 6. Final strip to clean up residual whitespace.
         return cleaned_text.strip()
     def _clean_answer_content(self, text: str) -> str:
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
+            # **FIX**: State to handle the transition from thinking to answer
+            is_first_answer_chunk = True
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
                         except (json.JSONDecodeError, AttributeError):
                             continue
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                             continue
                         if phase == "thinking":
                             if dat.get("edit_content") is not None:
                                 current_raw_thinking = content_chunk
                             else:
                                 current_raw_thinking += content_chunk
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
                         elif phase == "answer":
+                            content_to_process = content_chunk
+                            # **FIX**: Special handling for the first answer chunk
+                            if is_first_answer_chunk:
+                                # The first answer chunk often contains leftover thinking content.
+                                # We split by '</details>' and only use the part after it.
+                                if '</details>' in content_to_process:
+                                    parts = content_to_process.split('</details>', 1)
+                                    content_to_process = parts[1] if len(parts) > 1 else ""
+                                is_first_answer_chunk = False
+                            if content_to_process:
+                                async for item in yield_delta("answer", content_to_process):
+                                    yield item
         except Exception:
             logger.exception("Stream error"); raise
                 await cookie_manager.mark_cookie_success(ck)
                 current_raw_thinking = ""
+                is_first_answer_chunk = True
                 async for raw in resp.aiter_text():
                     for line in raw.strip().split('\n'):
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                             continue
                         if phase == "thinking":
                             if dat.get("edit_content") is not None:
                                 current_raw_thinking = content_chunk
                             else:
                             last_thinking_content = current_raw_thinking
                         elif phase == "answer":
+                            content_to_process = content_chunk
+                            # **FIX**: Apply same logic to non-stream mode
+                            if is_first_answer_chunk:
+                                if '</details>' in content_to_process:
+                                    parts = content_to_process.split('</details>', 1)
+                                    content_to_process = parts[1] if len(parts) > 1 else ""
+                                is_first_answer_chunk = False
+                            if content_to_process:
+                                raw_answer_parts.append(content_to_process)
                     else:
                         continue
                     break
             full_answer = ''.join(raw_answer_parts)
+            # The final cleaning is still useful for any other residual tags
             cleaned_ans_text = self._clean_answer_content(full_answer).strip()
             final_content = cleaned_ans_text