bluewinliang commited on
Commit
3b1d19a
·
verified ·
1 Parent(s): cb0a58f

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +93 -67
proxy_handler.py CHANGED
@@ -2,7 +2,7 @@
2
  Proxy handler for Z.AI API requests
3
  """
4
  import json, logging, re, time, uuid
5
- from typing import AsyncGenerator, Dict, Any, Tuple
6
 
7
  import httpx
8
  from fastapi import HTTPException
@@ -27,8 +27,9 @@ class ProxyHandler:
27
  if not self.client.is_closed:
28
  await self.client.aclose()
29
 
30
- # --- Text utilities remain the same from the last version ---
31
- def _clean_thinking(self, s: str) -> str:
 
32
  if not s: return ""
33
  s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
34
  s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
@@ -36,16 +37,6 @@ class ProxyHandler:
36
  s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
37
  return s
38
 
39
- def _clean_answer(self, s: str, from_edit_content: bool = False) -> str:
40
- if not s: return ""
41
- if from_edit_content:
42
- last_details_pos = s.rfind('</details>')
43
- if last_details_pos != -1:
44
- s = s[last_details_pos + len('</details>'):]
45
- s = re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
46
- # We keep the lstrip logic as it is, but we will handle yielding empty strings differently.
47
- return s.lstrip('\n\r')
48
-
49
  # ... Other methods like _serialize_msgs, _prep_upstream remain the same ...
50
  def _serialize_msgs(self, msgs) -> list:
51
  out = []
@@ -62,15 +53,54 @@ class ProxyHandler:
62
  body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
63
  headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
64
  return body, headers, ck
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # ---------- stream (REVISED YIELD LOGIC) ----------
 
 
 
 
 
67
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
68
  ck = None
69
  try:
70
  body, headers, ck = await self._prep_upstream(req)
71
  comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"; think_open = False; phase_cur = None
72
- # FIX: New flag to track if we've started sending content.
73
- has_yielded_content = False
74
 
75
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
76
  if resp.status_code != 200:
@@ -92,58 +122,52 @@ class ProxyHandler:
92
  dat = json.loads(payload_str).get("data", {})
93
  except (json.JSONDecodeError, AttributeError): continue
94
 
95
- is_edit = "edit_content" in dat
96
- content = dat.get("delta_content", "") or dat.get("edit_content", "")
97
  new_phase = dat.get("phase")
98
-
99
- is_transition = new_phase and new_phase != phase_cur
100
- if is_transition:
101
- if phase_cur == "thinking" and new_phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
102
- close_payload = {'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]}
103
- yield f"data: {json.dumps(close_payload)}\n\n"
104
  think_open = False
105
  phase_cur = new_phase
106
 
107
- current_content_phase = phase_cur or new_phase
 
108
 
109
- text_to_yield = ""
110
- if current_content_phase == "thinking":
111
- if content and settings.SHOW_THINK_TAGS:
112
- if not think_open:
113
- open_payload = {'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}
114
- yield f"data: {json.dumps(open_payload)}\n\n"
115
- has_yielded_content = True
116
- think_open = True
117
- text_to_yield = self._clean_thinking(content)
118
- elif current_content_phase == "answer":
119
- text_to_yield = self._clean_answer(content, from_edit_content=is_edit)
120
-
121
- # FIX: Revised yield logic.
122
- # We yield if there's content, OR if we've already started yielding and the content is an empty string
123
- # (which could be a meaningful newline that was stripped).
124
- if text_to_yield or (has_yielded_content and content is not None and text_to_yield == ""):
125
- content_payload = {"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model, "choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],}
126
- yield f"data: {json.dumps(content_payload)}\n\n"
127
- if text_to_yield: # Only set the flag if we actually sent non-empty content.
128
- has_yielded_content = True
129
 
130
  except Exception:
131
  logger.exception("Stream error"); raise
132
 
133
- # ---------- non-stream ----------
134
- # The non-stream version is less susceptible to this issue because it processes all content at once.
135
- # The existing logic should be sufficient. No changes needed here.
136
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
137
  ck = None
138
  try:
139
  body, headers, ck = await self._prep_upstream(req)
140
- full_content = []
 
141
  phase_cur = None
 
142
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
143
  if resp.status_code != 200:
144
  await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
145
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
146
  await cookie_manager.mark_cookie_success(ck)
 
147
  async for raw in resp.aiter_text():
148
  for line in raw.strip().split('\n'):
149
  line = line.strip()
@@ -153,29 +177,31 @@ class ProxyHandler:
153
  try:
154
  dat = json.loads(payload_str).get("data", {})
155
  except (json.JSONDecodeError, AttributeError): continue
156
- is_edit = "edit_content" in dat
157
- content = dat.get("delta_content") or dat.get("edit_content")
158
  new_phase = dat.get("phase")
159
- if new_phase: phase_cur = new_phase
160
- if content and phase_cur:
161
- full_content.append((phase_cur, content, is_edit))
 
 
 
 
 
 
 
 
162
  else: continue
163
  break
164
 
165
- think_buf = []
166
- answer_buf = []
167
- for phase, content, is_edit in full_content:
168
- if phase == "thinking":
169
- think_buf.append(self._clean_thinking(content))
170
- elif phase == "answer":
171
- answer_buf.append(self._clean_answer(content, from_edit_content=is_edit))
172
-
173
- ans_text = ''.join(answer_buf)
174
- final_content = ans_text
175
  if settings.SHOW_THINK_TAGS and think_buf:
176
- think_text = ''.join(think_buf).strip()
177
- if think_text:
178
- final_content = f"<think>{think_text}</think>{ans_text}"
 
179
 
180
  return ChatCompletionResponse(
181
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
 
2
  Proxy handler for Z.AI API requests
3
  """
4
  import json, logging, re, time, uuid
5
+ from typing import AsyncGenerator, Dict, Any, Tuple, List
6
 
7
  import httpx
8
  from fastapi import HTTPException
 
27
  if not self.client.is_closed:
28
  await self.client.aclose()
29
 
30
+ # --- Text utilities ---
31
+ # These are now simplified, as the main logic will handle parsing.
32
+ def _clean_html_and_quotes(self, s: str) -> str:
33
  if not s: return ""
34
  s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
35
  s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
 
37
  s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
38
  return s
39
 
 
 
 
 
 
 
 
 
 
 
40
  # ... Other methods like _serialize_msgs, _prep_upstream remain the same ...
41
  def _serialize_msgs(self, msgs) -> list:
42
  out = []
 
53
  body = { "stream": True, "model": model, "messages": self._serialize_msgs(req.messages), "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()), "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,}, "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"], "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [], "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},}
54
  headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
55
  return body, headers, ck
56
+
57
+ # --- NEW: Unified Chunk Parser ---
58
+ def _parse_chunk(self, dat: Dict[str, Any]) -> List[Tuple[str, str]]:
59
+ """
60
+ Parses a single data chunk from Z.AI and returns a list of (type, content) tuples.
61
+ Type can be 'thinking' or 'answer'.
62
+ This function is the core of the new logic, designed to be robust.
63
+ """
64
+ if not dat:
65
+ return []
66
+
67
+ content = dat.get("delta_content")
68
+ edit_content = dat.get("edit_content")
69
+ phase = dat.get("phase")
70
+
71
+ # Case 1: Simple delta_content chunk
72
+ if content and not edit_content:
73
+ if phase in ["thinking", "answer"]:
74
+ return [(phase, content)]
75
+ return []
76
+
77
+ # Case 2: Complex edit_content chunk (the source of most problems)
78
+ if edit_content:
79
+ # edit_content can contain both thinking and answer parts.
80
+ # We split it by the last </details> tag.
81
+ parts = edit_content.rsplit('</details>', 1)
82
+
83
+ # This is a heuristic, but it's based on observed data.
84
+ # If split results in 2 parts, the first is thinking, the second is answer.
85
+ if len(parts) == 2:
86
+ thinking_part, answer_part = parts
87
+ return [("thinking", thinking_part + '</details>'), ("answer", answer_part)]
88
+ else:
89
+ # If no </details>, the whole thing is likely an answer or thinking chunk.
90
+ if phase in ["thinking", "answer"]:
91
+ return [(phase, edit_content)]
92
 
93
+ # Case 3: A chunk that only changes phase, without content.
94
+ # We don't need to do anything, the state will be updated in the main loop.
95
+
96
+ return []
97
+
98
+ # ---------- stream (REBUILT) ----------
99
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
100
  ck = None
101
  try:
102
  body, headers, ck = await self._prep_upstream(req)
103
  comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"; think_open = False; phase_cur = None
 
 
104
 
105
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
106
  if resp.status_code != 200:
 
122
  dat = json.loads(payload_str).get("data", {})
123
  except (json.JSONDecodeError, AttributeError): continue
124
 
 
 
125
  new_phase = dat.get("phase")
126
+
127
+ # Handle phase transition side-effect
128
+ if new_phase and new_phase != phase_cur:
129
+ if phase_cur == "thinking" and think_open and settings.SHOW_THINK_TAGS:
130
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
 
131
  think_open = False
132
  phase_cur = new_phase
133
 
134
+ # Use the unified parser
135
+ parsed_parts = self._parse_chunk(dat)
136
 
137
+ for part_type, part_content in parsed_parts:
138
+ text_to_yield = ""
139
+ if part_type == "thinking":
140
+ if settings.SHOW_THINK_TAGS:
141
+ if not think_open:
142
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
143
+ think_open = True
144
+ text_to_yield = self._clean_html_and_quotes(part_content)
145
+ elif part_type == "answer":
146
+ # For answer, we pass it through raw to preserve all formatting.
147
+ text_to_yield = part_content
148
+
149
+ # Always yield, even if empty, to preserve timing and newlines.
150
+ # The check `if part_content is not None` is implicit in the loop.
151
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': text_to_yield}, 'finish_reason': None}]})}\n\n"
 
 
 
 
 
152
 
153
  except Exception:
154
  logger.exception("Stream error"); raise
155
 
156
+ # ---------- non-stream (REBUILT) ----------
 
 
157
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
158
  ck = None
159
  try:
160
  body, headers, ck = await self._prep_upstream(req)
161
+ think_buf = []
162
+ answer_buf = []
163
  phase_cur = None
164
+
165
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
166
  if resp.status_code != 200:
167
  await cookie_manager.mark_cookie_failed(ck); error_detail = await resp.text()
168
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
169
  await cookie_manager.mark_cookie_success(ck)
170
+
171
  async for raw in resp.aiter_text():
172
  for line in raw.strip().split('\n'):
173
  line = line.strip()
 
177
  try:
178
  dat = json.loads(payload_str).get("data", {})
179
  except (json.JSONDecodeError, AttributeError): continue
180
+
 
181
  new_phase = dat.get("phase")
182
+ if new_phase:
183
+ phase_cur = new_phase
184
+
185
+ # Use the unified parser
186
+ parsed_parts = self._parse_chunk(dat)
187
+
188
+ for part_type, part_content in parsed_parts:
189
+ if part_type == "thinking":
190
+ think_buf.append(part_content)
191
+ elif part_type == "answer":
192
+ answer_buf.append(part_content)
193
  else: continue
194
  break
195
 
196
+ # Post-process collected buffers
197
+ final_ans_text = ''.join(answer_buf)
198
+ final_content = final_ans_text
199
+
 
 
 
 
 
 
200
  if settings.SHOW_THINK_TAGS and think_buf:
201
+ # Clean the thinking part at the very end
202
+ final_think_text = self._clean_html_and_quotes(''.join(think_buf)).strip()
203
+ if final_think_text:
204
+ final_content = f"<think>{final_think_text}</think>{final_ans_text}"
205
 
206
  return ChatCompletionResponse(
207
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,