Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Aug 12, 2025

Commit

15da475

verified ·

1 Parent(s): 4f7eefe

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +45 -37

proxy_handler.py CHANGED Viewed

@@ -34,14 +34,14 @@ class ProxyHandler:
         """
         if not text:
             return ""
-        # Remove tool call blocks first
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
         # Remove all other HTML-like tags
         cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
         # Remove specific metadata patterns
         cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
-        # Remove leading markdown quote symbols
-        # cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
         # Remove "Thinking..." headers
         cleaned_text = cleaned_text.replace("Thinking…", "")
         # Final strip to clean up residual whitespace
@@ -56,7 +56,7 @@ class ProxyHandler:
             return ""
         # Remove only tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
-        return cleaned_text # <-- FIX: Removed .strip() to preserve whitespace in chunks
     def _serialize_msgs(self, msgs) -> list:
         """Converts message objects to a list of dictionaries."""
@@ -81,39 +81,43 @@ class ProxyHandler:
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
-            comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"; think_open = False; phase_cur = None
             async def yield_content(content_type: str, text: str):
-                nonlocal think_open
-                # Apply cleaning based on content type
-                cleaned_text = ""
-                if content_type == "thinking":
-                    cleaned_text = self._clean_thinking_content(text)
-                elif content_type == "answer":
-                    # Use the non-stripping cleaner for stream chunks
-                    cleaned_text = self._clean_answer_content(text)
-                if not cleaned_text and not (content_type == "answer" and text):
-                    # For answer, even if cleaning results in empty (e.g. only a glm_block),
-                    # the original might have been just whitespace, so we allow it to pass if text was present.
-                    # This logic is now simpler: if there's no text, don't yield.
-                    if not text: return
                 if content_type == "thinking" and settings.SHOW_THINK_TAGS:
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
-                    if cleaned_text:
-                        yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
                 elif content_type == "answer":
                     if think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = False
-                    # Yield the cleaned text, which now preserves crucial whitespace
-                    yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
@@ -140,16 +144,17 @@ class ProxyHandler:
                         if new_phase: phase_cur = new_phase
                         if not phase_cur: continue
-                        content = dat.get("delta_content") or dat.get("edit_content")
                         if not content: continue
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
-                            async for item in yield_content("thinking", thinking_part): yield item
-                            async for item in yield_content("answer", answer_part): yield item
                         else:
-                             async for item in yield_content(phase_cur, content): yield item
         except Exception:
             logger.exception("Stream error"); raise
@@ -157,7 +162,10 @@ class ProxyHandler:
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
-            raw_thinking_parts = []; raw_answer_parts = []; phase_cur = None
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
@@ -179,17 +187,17 @@ class ProxyHandler:
                         if new_phase: phase_cur = new_phase
                         if not phase_cur: continue
-                        content = dat.get("delta_content") or dat.get("edit_content")
                         if not content: continue
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
-                            raw_thinking_parts.append(thinking_part)
                             raw_answer_parts.append(answer_part)
                         else:
                             if phase_cur == "thinking":
-                                raw_thinking_parts.append(content)
                             elif phase_cur == "answer":
                                 raw_answer_parts.append(content)
                     else: continue
@@ -197,12 +205,12 @@ class ProxyHandler:
             # Clean the joined answer text, then strip the final result.
             full_answer = ''.join(raw_answer_parts)
-            cleaned_ans_text = self._clean_answer_content(full_answer).strip() # <-- FIX: Apply .strip() here
             final_content = cleaned_ans_text
-            if settings.SHOW_THINK_TAGS and raw_thinking_parts:
-                # Aggressively clean the thinking part.
-                cleaned_think_text = self._clean_thinking_content(''.join(raw_thinking_parts))
                 if cleaned_think_text:
                     final_content = f"<think>{cleaned_think_text}</think>{cleaned_ans_text}"

         """
         if not text:
             return ""
+        # Remove tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
         # Remove all other HTML-like tags
         cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
         # Remove specific metadata patterns
         cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
+        # FIX: Remove only first-level markdown quotes, preserving sub-quotes like '>>'
+        cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
         # Remove "Thinking..." headers
         cleaned_text = cleaned_text.replace("Thinking…", "")
         # Final strip to clean up residual whitespace
             return ""
         # Remove only tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
+        return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
         """Converts message objects to a list of dictionaries."""
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
+            comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
+            think_open = False
+            phase_cur = None
+            # FIX: Buffer to track streamed thinking content and prevent duplication.
+            streamed_think_buffer = ""
             async def yield_content(content_type: str, text: str):
+                nonlocal think_open, streamed_think_buffer
+                if not text: return
                 if content_type == "thinking" and settings.SHOW_THINK_TAGS:
                     if not think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = True
+                    # Clean the full incoming text first.
+                    cleaned_full_text = self._clean_thinking_content(text)
+                    # Send only the new part of the content.
+                    if cleaned_full_text.startswith(streamed_think_buffer):
+                        delta_to_send = cleaned_full_text[len(streamed_think_buffer):]
+                        if delta_to_send:
+                            yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': delta_to_send}, 'finish_reason': None}]})}\n\n"
+                    else: # If content radically changes, send the whole new content.
+                        yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_full_text}, 'finish_reason': None}]})}\n\n"
+                    # Update the buffer with the latest full content.
+                    streamed_think_buffer = cleaned_full_text
                 elif content_type == "answer":
                     if think_open:
                         yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                         think_open = False
+                    cleaned_text = self._clean_answer_content(text)
+                    if cleaned_text:
+                        yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                         if new_phase: phase_cur = new_phase
                         if not phase_cur: continue
+                        # Use edit_content first as it's the complete version, fallback to delta.
+                        content = dat.get("edit_content") or dat.get("delta_content")
                         if not content: continue
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
+                            await yield_content("thinking", thinking_part)
+                            await yield_content("answer", answer_part)
                         else:
+                             await yield_content(phase_cur, content)
         except Exception:
             logger.exception("Stream error"); raise
         ck = None
         try:
             body, headers, ck = await self._prep_upstream(req)
+            # FIX: Use single variables to store the latest content, preventing duplication.
+            last_thinking_content = ""
+            raw_answer_parts = []
+            phase_cur = None
             async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
                 if resp.status_code != 200:
                     await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
                         if new_phase: phase_cur = new_phase
                         if not phase_cur: continue
+                        content = dat.get("edit_content") or dat.get("delta_content")
                         if not content: continue
                         match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
                         if match:
                             thinking_part, answer_part = match.groups()
+                            last_thinking_content = thinking_part
                             raw_answer_parts.append(answer_part)
                         else:
                             if phase_cur == "thinking":
+                                last_thinking_content = content # Overwrite with the latest
                             elif phase_cur == "answer":
                                 raw_answer_parts.append(content)
                     else: continue
             # Clean the joined answer text, then strip the final result.
             full_answer = ''.join(raw_answer_parts)
+            cleaned_ans_text = self._clean_answer_content(full_answer).strip()
             final_content = cleaned_ans_text
+            if settings.SHOW_THINK_TAGS and last_thinking_content:
+                # Aggressively clean the final thinking content.
+                cleaned_think_text = self._clean_thinking_content(last_thinking_content)
                 if cleaned_think_text:
                     final_content = f"<think>{cleaned_think_text}</think>{cleaned_ans_text}"