bluewinliang commited on
Commit
c394f8c
·
verified ·
1 Parent(s): b9ed765

Update proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +54 -52
proxy_handler.py CHANGED
@@ -53,9 +53,6 @@ class ProxyHandler:
53
  cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
54
 
55
  # 3. Handle markdown blockquotes.
56
- # The reference implementation removes "> " at the start of lines.
57
- # The original Python implementation is more robust and correctly
58
- # preserves multi-level quotes (e.g., '>>'). We'll keep it.
59
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
60
 
61
  # 4. Remove other known text artifacts.
@@ -66,13 +63,16 @@ class ProxyHandler:
66
 
67
  def _clean_answer_content(self, text: str) -> str:
68
  """
69
- Cleans only <glm_block> tags from answer content.
70
  Does NOT strip whitespace to preserve markdown in streams.
71
  """
72
  if not text:
73
  return ""
74
- # Remove only tool call blocks
75
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
 
 
 
76
  return cleaned_text
77
 
78
  def _serialize_msgs(self, msgs) -> list:
@@ -102,7 +102,6 @@ class ProxyHandler:
102
  think_open = False
103
  yielded_think_buffer = ""
104
  current_raw_thinking = ""
105
- answer_started = False
106
 
107
  async def yield_delta(content_type: str, text: str):
108
  nonlocal think_open, yielded_think_buffer
@@ -142,41 +141,39 @@ class ProxyHandler:
142
 
143
  payload_str = line[6:]
144
  if payload_str == '[DONE]':
145
- if think_open: yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
146
- yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"; yield "data: [DONE]\n\n"; return
 
 
 
147
  try:
148
  dat = json.loads(payload_str).get("data", {})
149
- except (json.JSONDecodeError, AttributeError): continue
 
150
 
151
- if answer_started:
152
- content = dat.get("delta_content") or dat.get("edit_content")
153
- # FIX: Expanded to a proper indented block
154
- if content:
155
- async for item in yield_delta("answer", content):
156
- yield item
157
  continue
158
 
159
- content_chunk = dat.get("edit_content") or dat.get("delta_content") or ""
160
- if dat.get("edit_content") is not None:
161
- current_raw_thinking = content_chunk
162
- else:
163
- current_raw_thinking += content_chunk
164
-
165
- match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
166
- if match:
167
- thinking_part, answer_part = match.groups()
168
- # FIX: Expanded to a proper indented block
169
- async for item in yield_delta("thinking", thinking_part):
170
- yield item
171
- # FIX: Expanded to a proper indented block
172
- if answer_part:
173
- async for item in yield_delta("answer", answer_part):
174
- yield item
175
- answer_started = True
176
- else:
177
- # FIX: Expanded to a proper indented block
178
  async for item in yield_delta("thinking", current_raw_thinking):
179
  yield item
 
 
 
 
 
 
 
180
  except Exception:
181
  logger.exception("Stream error"); raise
182
 
@@ -205,27 +202,32 @@ class ProxyHandler:
205
  dat = json.loads(payload_str).get("data", {})
206
  except (json.JSONDecodeError, AttributeError): continue
207
 
208
- if answer_started:
209
- content = dat.get("delta_content") or dat.get("edit_content")
210
- if content: raw_answer_parts.append(content)
211
- continue
212
 
213
- content_chunk = dat.get("edit_content") or dat.get("delta_content")
214
- if not content_chunk: continue
215
 
216
- if dat.get("edit_content") is not None:
217
- current_raw_thinking = content_chunk
218
- else:
219
- current_raw_thinking += content_chunk
220
-
221
- match = re.search(r'(.*</details>)(.*)', current_raw_thinking, flags=re.DOTALL)
222
- if match:
223
- last_thinking_content, answer_part = match.groups()
224
- if answer_part: raw_answer_parts.append(answer_part)
225
- answer_started = True
226
- else:
227
  last_thinking_content = current_raw_thinking
228
- else: continue
 
 
 
 
 
 
 
 
 
 
 
229
  break
230
 
231
  full_answer = ''.join(raw_answer_parts)
 
53
  cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
54
 
55
  # 3. Handle markdown blockquotes.
 
 
 
56
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
57
 
58
  # 4. Remove other known text artifacts.
 
63
 
64
  def _clean_answer_content(self, text: str) -> str:
65
  """
66
+ Cleans unwanted tags from answer content.
67
  Does NOT strip whitespace to preserve markdown in streams.
68
  """
69
  if not text:
70
  return ""
71
+ # Remove tool call blocks
72
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
73
+ # Remove any residual details/summary blocks that might leak into the answer
74
+ cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', cleaned_text, flags=re.DOTALL)
75
+ cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
76
  return cleaned_text
77
 
78
  def _serialize_msgs(self, msgs) -> list:
 
102
  think_open = False
103
  yielded_think_buffer = ""
104
  current_raw_thinking = ""
 
105
 
106
  async def yield_delta(content_type: str, text: str):
107
  nonlocal think_open, yielded_think_buffer
 
141
 
142
  payload_str = line[6:]
143
  if payload_str == '[DONE]':
144
+ if think_open:
145
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
146
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n";
147
+ yield "data: [DONE]\n\n";
148
+ return
149
  try:
150
  dat = json.loads(payload_str).get("data", {})
151
+ except (json.JSONDecodeError, AttributeError):
152
+ continue
153
 
154
+ # --- START OF REFACTORED LOGIC ---
155
+ phase = dat.get("phase")
156
+ content_chunk = dat.get("delta_content") or dat.get("edit_content")
157
+
158
+ if not content_chunk:
 
159
  continue
160
 
161
+ if phase == "thinking":
162
+ # Accumulate raw thinking content. `edit_content` replaces the buffer.
163
+ if dat.get("edit_content") is not None:
164
+ current_raw_thinking = content_chunk
165
+ else:
166
+ current_raw_thinking += content_chunk
167
+ # Yield the processed delta of the accumulated thinking content
 
 
 
 
 
 
 
 
 
 
 
 
168
  async for item in yield_delta("thinking", current_raw_thinking):
169
  yield item
170
+
171
+ elif phase == "answer":
172
+ # Directly yield the answer chunk for processing
173
+ async for item in yield_delta("answer", content_chunk):
174
+ yield item
175
+ # --- END OF REFACTORED LOGIC ---
176
+
177
  except Exception:
178
  logger.exception("Stream error"); raise
179
 
 
202
  dat = json.loads(payload_str).get("data", {})
203
  except (json.JSONDecodeError, AttributeError): continue
204
 
205
+ # Use the more robust phase-based logic for non-stream as well
206
+ phase = dat.get("phase")
207
+ content_chunk = dat.get("delta_content") or dat.get("edit_content")
 
208
 
209
+ if not content_chunk:
210
+ continue
211
 
212
+ if phase == "thinking":
213
+ answer_started = False # Ensure we are in thinking mode
214
+ if dat.get("edit_content") is not None:
215
+ current_raw_thinking = content_chunk
216
+ else:
217
+ current_raw_thinking += content_chunk
 
 
 
 
 
218
  last_thinking_content = current_raw_thinking
219
+
220
+ elif phase == "answer":
221
+ if not answer_started:
222
+ # First answer chunk might contain leftover thinking part, clean it.
223
+ cleaned_chunk = self._clean_answer_content(content_chunk)
224
+ if cleaned_chunk:
225
+ raw_answer_parts.append(cleaned_chunk)
226
+ answer_started = True
227
+ else:
228
+ raw_answer_parts.append(content_chunk)
229
+ else:
230
+ continue
231
  break
232
 
233
  full_answer = ''.join(raw_answer_parts)