Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Oct 1, 2025

Commit

c394f8c

verified ·

1 Parent(s): b9ed765

Update proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +54 -52

proxy_handler.py CHANGED Viewed

@@ -53,9 +53,6 @@ class ProxyHandler:
         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
         # 3. Handle markdown blockquotes.
-        # The reference implementation removes "> " at the start of lines.
-        # The original Python implementation is more robust and correctly
-        # preserves multi-level quotes (e.g., '>>'). We'll keep it.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
         # 4. Remove other known text artifacts.
@@ -66,13 +63,16 @@ class ProxyHandler:
     def _clean_answer_content(self, text: str) -> str:
         """
-        Cleans only <glm_block> tags from answer content.
         Does NOT strip whitespace to preserve markdown in streams.
         """
         if not text:
             return ""
-        # Remove only tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
@@ -102,7 +102,6 @@ class ProxyHandler:
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
-            answer_started = False
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
@@ -142,41 +141,39 @@ class ProxyHandler:
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
-                            if think_open: yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
-                            yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"; yield "data: [DONE]\n\n"; return
                         try:
                             dat = json.loads(payload_str).get("data", {})
-                        except (json.JSONDecodeError, AttributeError): continue
-                        if answer_started:
-                            content = dat.get("delta_content") or dat.get("edit_content")
-                            # FIX: Expanded to a proper indented block
-                            if content:
-                                async for item in yield_delta("answer", content):
-                                    yield item
                             continue
-                        content_chunk = dat.get("edit_content") or dat.get("delta_content") or ""
-                        if dat.get("edit_content") is not None:
-                            current_raw_thinking = content_chunk
-                        else:
-                            current_raw_thinking += content_chunk
-                        match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
-                        if match:
-                            thinking_part, answer_part = match.groups()
-                            # FIX: Expanded to a proper indented block
-                            async for item in yield_delta("thinking", thinking_part):
-                                yield item
-                            # FIX: Expanded to a proper indented block
-                            if answer_part:
-                                async for item in yield_delta("answer", answer_part):
-                                    yield item
-                            answer_started = True
-                        else:
-                            # FIX: Expanded to a proper indented block
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
         except Exception:
             logger.exception("Stream error"); raise
@@ -205,27 +202,32 @@ class ProxyHandler:
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
-                        if answer_started:
-                            content = dat.get("delta_content") or dat.get("edit_content")
-                            if content: raw_answer_parts.append(content)
-                            continue
-                        content_chunk = dat.get("edit_content") or dat.get("delta_content")
-                        if not content_chunk: continue
-                        if dat.get("edit_content") is not None:
-                             current_raw_thinking = content_chunk
-                        else:
-                             current_raw_thinking += content_chunk
-                        match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
-                        if match:
-                            last_thinking_content, answer_part = match.groups()
-                            if answer_part: raw_answer_parts.append(answer_part)
-                            answer_started = True
-                        else:
                             last_thinking_content = current_raw_thinking
-                    else: continue
                     break
             full_answer = ''.join(raw_answer_parts)

         cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
         # 3. Handle markdown blockquotes.
         cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
         # 4. Remove other known text artifacts.
     def _clean_answer_content(self, text: str) -> str:
         """
+        Cleans unwanted tags from answer content.
         Does NOT strip whitespace to preserve markdown in streams.
         """
         if not text:
             return ""
+        # Remove tool call blocks
         cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
+        # Remove any residual details/summary blocks that might leak into the answer
+        cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', cleaned_text, flags=re.DOTALL)
+        cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
             think_open = False
             yielded_think_buffer = ""
             current_raw_thinking = ""
             async def yield_delta(content_type: str, text: str):
                 nonlocal think_open, yielded_think_buffer
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
+                            if think_open:
+                                yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
+                            yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n";
+                            yield "data: [DONE]\n\n";
+                            return
                         try:
                             dat = json.loads(payload_str).get("data", {})
+                        except (json.JSONDecodeError, AttributeError):
+                            continue
+                        # --- START OF REFACTORED LOGIC ---
+                        phase = dat.get("phase")
+                        content_chunk = dat.get("delta_content") or dat.get("edit_content")
+                        if not content_chunk:
                             continue
+                        if phase == "thinking":
+                            # Accumulate raw thinking content. `edit_content` replaces the buffer.
+                            if dat.get("edit_content") is not None:
+                                current_raw_thinking = content_chunk
+                            else:
+                                current_raw_thinking += content_chunk
+                            # Yield the processed delta of the accumulated thinking content
                             async for item in yield_delta("thinking", current_raw_thinking):
                                 yield item
+                        elif phase == "answer":
+                            # Directly yield the answer chunk for processing
+                            async for item in yield_delta("answer", content_chunk):
+                                yield item
+                        # --- END OF REFACTORED LOGIC ---
         except Exception:
             logger.exception("Stream error"); raise
                             dat = json.loads(payload_str).get("data", {})
                         except (json.JSONDecodeError, AttributeError): continue
+                        # Use the more robust phase-based logic for non-stream as well
+                        phase = dat.get("phase")
+                        content_chunk = dat.get("delta_content") or dat.get("edit_content")
+                        if not content_chunk:
+                            continue
+                        if phase == "thinking":
+                            answer_started = False # Ensure we are in thinking mode
+                            if dat.get("edit_content") is not None:
+                                current_raw_thinking = content_chunk
+                            else:
+                                current_raw_thinking += content_chunk
                             last_thinking_content = current_raw_thinking
+                        elif phase == "answer":
+                            if not answer_started:
+                                # First answer chunk might contain leftover thinking part, clean it.
+                                cleaned_chunk = self._clean_answer_content(content_chunk)
+                                if cleaned_chunk:
+                                    raw_answer_parts.append(cleaned_chunk)
+                                answer_started = True
+                            else:
+                                raw_answer_parts.append(content_chunk)
+                    else:
+                        continue
                     break
             full_answer = ''.join(raw_answer_parts)