bluewinliang commited on
Commit
6d2594a
·
verified ·
1 Parent(s): e20a984

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +32 -34
proxy_handler.py CHANGED
@@ -27,6 +27,28 @@ class ProxyHandler:
27
  if not self.client.is_closed:
28
  await self.client.aclose()
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # ... Methods _serialize_msgs, _prep_upstream remain the same ...
31
  def _serialize_msgs(self, msgs) -> list:
32
  out = []
@@ -43,8 +65,7 @@ class ProxyHandler:
43
  body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
44
  headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
45
  return body, headers, ck
46
-
47
- # The stream_proxy_response from v6 remains the same, as it was working correctly.
48
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
49
  ck = None
50
  try:
@@ -59,10 +80,11 @@ class ProxyHandler:
59
  if not think_open:
60
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
61
  think_open = True
62
- # In stream, we clean as we go
 
63
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
64
- cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
65
- cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
66
  if cleaned_text:
67
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
68
  elif content_type == "answer":
@@ -108,15 +130,11 @@ class ProxyHandler:
108
  except Exception:
109
  logger.exception("Stream error"); raise
110
 
111
- # ---------- non-stream (REVISED CLEANUP LOGIC) ----------
112
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
113
  ck = None
114
  try:
115
  body, headers, ck = await self._prep_upstream(req)
116
- raw_thinking_parts = []
117
- raw_answer_parts = []
118
- phase_cur = None
119
-
120
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
121
  if resp.status_code != 200:
122
  await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
@@ -140,8 +158,6 @@ class ProxyHandler:
140
  content = dat.get("delta_content") or dat.get("edit_content")
141
  if not content: continue
142
 
143
- content = re.sub(r'<glm_block.*?</glm_block>', '', content, flags=re.DOTALL)
144
-
145
  match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
146
  if match:
147
  thinking_part, answer_part = match.groups()
@@ -155,30 +171,13 @@ class ProxyHandler:
155
  else: continue
156
  break
157
 
158
- # Final assembly and cleaning
159
  final_ans_text = ''.join(raw_answer_parts)
160
  final_content = final_ans_text
161
-
162
  if settings.SHOW_THINK_TAGS and raw_thinking_parts:
163
- full_think_text = ''.join(raw_thinking_parts)
164
-
165
- # --- START OF FINAL FIX ---
166
- # 1. Remove all HTML-like tags first. This gets rid of <details>, <summary>, etc.
167
- cleaned_text = re.sub(r'<[^>]+>', '', full_think_text)
168
-
169
- # 2. Remove specific known metadata patterns that are not standard HTML.
170
- # This pattern matches 'true" duration="...">' and "Thought for ... seconds"
171
- cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
172
-
173
- # 3. Remove leading markdown quote symbols and trim whitespace.
174
- cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE).strip()
175
-
176
- # 4. Remove any remaining "Thinking..." headers.
177
- cleaned_text = cleaned_text.replace("Thinking…", "").strip()
178
- # --- END OF FINAL FIX ---
179
-
180
- if cleaned_text:
181
- final_content = f"<think>{cleaned_text}</think>{final_ans_text}"
182
 
183
  return ChatCompletionResponse(
184
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
@@ -187,7 +186,6 @@ class ProxyHandler:
187
  except Exception:
188
  logger.exception("Non-stream processing failed"); raise
189
 
190
- # ---------- FastAPI entry ----------
191
  async def handle_chat_completion(self, req: ChatCompletionRequest):
192
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
193
  if stream:
 
27
  if not self.client.is_closed:
28
  await self.client.aclose()
29
 
30
+ # --- START OF FINAL FIX ---
31
+ # NEW: Unified thinking content cleaner function
32
+ def _clean_thinking_content(self, text: str) -> str:
33
+ """
34
+ A robust cleaner for the raw thinking content string.
35
+ """
36
+ if not text:
37
+ return ""
38
+ # 1. Remove tool call blocks first
39
+ cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
40
+ # 2. Remove all HTML-like tags. This gets rid of <details>, <summary>, etc.
41
+ cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
42
+ # 3. Remove specific known metadata patterns that are not standard HTML.
43
+ cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
44
+ # 4. Remove leading markdown quote symbols
45
+ cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
46
+ # 5. Remove any "Thinking..." headers.
47
+ cleaned_text = cleaned_text.replace("Thinking…", "")
48
+ # 6. Final strip to clean up any residual whitespace.
49
+ return cleaned_text.strip()
50
+ # --- END OF FINAL FIX ---
51
+
52
  # ... Methods _serialize_msgs, _prep_upstream remain the same ...
53
  def _serialize_msgs(self, msgs) -> list:
54
  out = []
 
65
  body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
66
  headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
67
  return body, headers, ck
68
+
 
69
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
70
  ck = None
71
  try:
 
80
  if not think_open:
81
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
82
  think_open = True
83
+ # In stream, we clean as we go, but we don't strip the final result
84
+ # as it might be part of a larger thought. We use a simpler clean here.
85
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
86
+ cleaned_text = cleaned_text.replace("Thinking…", "") # Remove header early
87
+
88
  if cleaned_text:
89
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
90
  elif content_type == "answer":
 
130
  except Exception:
131
  logger.exception("Stream error"); raise
132
 
 
133
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
134
  ck = None
135
  try:
136
  body, headers, ck = await self._prep_upstream(req)
137
+ raw_thinking_parts = []; raw_answer_parts = []; phase_cur = None
 
 
 
138
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
139
  if resp.status_code != 200:
140
  await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
 
158
  content = dat.get("delta_content") or dat.get("edit_content")
159
  if not content: continue
160
 
 
 
161
  match = re.search(r'(.*</details>)(.*)', content, flags=re.DOTALL)
162
  if match:
163
  thinking_part, answer_part = match.groups()
 
171
  else: continue
172
  break
173
 
 
174
  final_ans_text = ''.join(raw_answer_parts)
175
  final_content = final_ans_text
 
176
  if settings.SHOW_THINK_TAGS and raw_thinking_parts:
177
+ # Use the new unified cleaner function
178
+ cleaned_think_text = self._clean_thinking_content(''.join(raw_thinking_parts))
179
+ if cleaned_think_text:
180
+ final_content = f"<think>{cleaned_think_text}</think>{final_ans_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  return ChatCompletionResponse(
183
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
 
186
  except Exception:
187
  logger.exception("Non-stream processing failed"); raise
188
 
 
189
  async def handle_chat_completion(self, req: ChatCompletionRequest):
190
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
191
  if stream: