bluewinliang commited on
Commit
92a992c
·
verified ·
1 Parent(s): 9138579

Update proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +34 -53
proxy_handler.py CHANGED
@@ -58,26 +58,14 @@ class ProxyHandler:
58
  Returns:
59
  A dictionary with 'signature' and 'timestamp'.
60
  """
61
- # --- Updated logic from signature_generator.py ---
62
  timestamp_ms = self._get_timestamp_millis()
63
-
64
- # 1. Base64 encode the message content (t_payload)
65
  b64_encoded_t = base64.b64encode(t_payload.encode("utf-8")).decode("utf-8")
66
-
67
- # 2. Construct the message string for signing using the b64 encoded payload
68
  message_string = f"{e_payload}|{b64_encoded_t}|{timestamp_ms}"
69
-
70
- # 3. Calculate the time bucket 'n' (5-minute intervals)
71
  n = timestamp_ms // (5 * 60 * 1000)
72
-
73
- # 4. Intermediate key derivation
74
  msg1 = str(n).encode("utf-8")
75
  intermediate_key = hmac.new(self.primary_secret, msg1, hashlib.sha256).hexdigest()
76
-
77
- # 5. Final signature
78
  msg2 = message_string.encode("utf-8")
79
  final_signature = hmac.new(intermediate_key.encode("utf-8"), msg2, hashlib.sha256).hexdigest()
80
-
81
  return {"signature": final_signature, "timestamp": timestamp_ms}
82
 
83
  def _clean_thinking_content(self, text: str) -> str:
@@ -85,20 +73,24 @@ class ProxyHandler:
85
  cleaned_text = re.sub(r'<summary>.*?</summary>|<glm_block.*?</glm_block>|<[^>]*duration="[^"]*"[^>]*>', '', text, flags=re.DOTALL)
86
  cleaned_text = cleaned_text.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
87
  cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
88
- # FIX: Corrected typo from re.MULTiline to re.MULTILINE
89
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
90
  cleaned_text = cleaned_text.replace("Thinking…", "")
91
  return cleaned_text.strip()
92
 
93
  def _clean_answer_content(self, text: str) -> str:
94
  if not text: return ""
95
- cleaned_text = re.sub(r'<glm_block.*?</glm_block>|<details[^>]*>.*?</details>|<summary>.*?</summary>', '', text, flags=re.DOTALL)
 
 
 
 
 
 
96
  return cleaned_text
97
 
98
  def _serialize_msgs(self, msgs) -> list:
99
  out = []
100
  for m in msgs:
101
- # Adapting to Pydantic v1/v2 and dicts
102
  if hasattr(m, "dict"): out.append(m.dict())
103
  elif hasattr(m, "model_dump"): out.append(m.model_dump())
104
  elif isinstance(m, dict): out.append(m)
@@ -106,39 +98,26 @@ class ProxyHandler:
106
  return out
107
 
108
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
109
- """Prepares the request body, headers, cookie, and URL for the upstream API."""
110
  ck = await cookie_manager.get_next_cookie()
111
  if not ck: raise HTTPException(503, "No available cookies")
112
 
113
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
114
- chat_id = str(uuid.uuid4())
115
- request_id = str(uuid.uuid4())
116
 
117
- # --- NEW Simplified Signature Payload Logic ---
118
- user_info = self._parse_jwt_token(ck)
119
- user_id = user_info.get("user_id", "")
120
- # The reference code uses a separate UUID for user_id in payload, let's follow that.
121
- # This seems strange, but let's replicate the reference code exactly.
122
  payload_user_id = str(uuid.uuid4())
123
  payload_request_id = str(uuid.uuid4())
124
  payload_timestamp = str(self._get_timestamp_millis())
125
 
126
- # e: The simplified payload for the signature
127
  e_payload = f"requestId,{payload_request_id},timestamp,{payload_timestamp},user_id,{payload_user_id}"
128
-
129
- # t: The last message content
130
  t_payload = ""
131
  if req.messages:
132
  last_message = req.messages[-1]
133
  if isinstance(last_message.content, str):
134
  t_payload = last_message.content
135
 
136
- # Generate the signature
137
  signature_data = self._generate_signature(e_payload, t_payload)
138
  signature = signature_data["signature"]
139
  signature_timestamp = signature_data["timestamp"]
140
 
141
- # The reference code sends these as URL parameters, not in the body.
142
  url_params = {
143
  "requestId": payload_request_id,
144
  "timestamp": payload_timestamp,
@@ -146,16 +125,14 @@ class ProxyHandler:
146
  "signature_timestamp": str(signature_timestamp)
147
  }
148
 
149
- # Construct URL with query parameters
150
- # Note: The reference code has a typo `f"{BASE_URL}/api/chat/completions"`, it should be `z.ai`
151
  final_url = httpx.URL(settings.UPSTREAM_URL).copy_with(params=url_params)
152
 
153
  body = {
154
  "stream": True,
155
  "model": model,
156
  "messages": self._serialize_msgs(req.messages),
157
- "chat_id": chat_id,
158
- "id": request_id,
159
  "features": {
160
  "image_generation": False,
161
  "web_search": False,
@@ -190,7 +167,9 @@ class ProxyHandler:
190
  think_open = False
191
  yielded_think_buffer = ""
192
  current_raw_thinking = ""
193
-
 
 
194
  async def yield_delta(content_type: str, text: str):
195
  nonlocal think_open, yielded_think_buffer
196
  if content_type == "thinking" and settings.SHOW_THINK_TAGS:
@@ -224,7 +203,6 @@ class ProxyHandler:
224
  line = line.strip()
225
  if not line.startswith('data: '): continue
226
  payload_str = line[6:]
227
- # The reference code has a special 'done' phase, but the original Z.AI uses [DONE]
228
  if payload_str == '[DONE]':
229
  if think_open:
230
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
@@ -238,34 +216,38 @@ class ProxyHandler:
238
  phase = dat.get("phase")
239
  content_chunk = dat.get("delta_content") or dat.get("edit_content")
240
  if not content_chunk:
241
- # Handle case where chunk is just usage info, etc.
242
- if phase == 'other' and dat.get('usage'):
243
- pass # In streaming, usage might come with the final chunk
244
- else:
245
- continue
246
 
247
  if phase == "thinking":
248
  current_raw_thinking = content_chunk if dat.get("edit_content") is not None else current_raw_thinking + content_chunk
249
  async for item in yield_delta("thinking", current_raw_thinking):
250
  yield item
251
  elif phase == "answer":
252
- # MODIFICATION: Removed the special handling for the first answer chunk.
253
- # All chunks are now processed uniformly.
254
- # The _clean_answer_content function, called within yield_delta,
255
- # is responsible for cleaning up any remaining tags.
256
- if content_chunk:
257
- async for item in yield_delta("answer", content_chunk):
 
 
 
 
 
 
 
 
 
 
 
258
  yield item
259
  except Exception:
260
  logger.exception("Stream error"); raise
261
 
262
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
263
- # This part of the code can be simplified as well, but let's focus on fixing the streaming first.
264
- # The logic will be almost identical to the streaming one.
265
  ck = None
266
  try:
267
  body, headers, ck, url = await self._prep_upstream(req)
268
- # For non-stream, set stream to False in the body
269
  body["stream"] = False
270
 
271
  async with self.client.post(url, json=body, headers=headers) as resp:
@@ -275,13 +257,9 @@ class ProxyHandler:
275
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
276
 
277
  await cookie_manager.mark_cookie_success(ck)
278
-
279
- # Z.AI non-stream response is a single JSON object
280
  response_data = resp.json()
281
-
282
- # We need to adapt Z.AI's response format to OpenAI's format
283
  final_content = ""
284
- finish_reason = "stop" # Default
285
 
286
  if "choices" in response_data and response_data["choices"]:
287
  first_choice = response_data["choices"][0]
@@ -290,6 +268,9 @@ class ProxyHandler:
290
  if "finish_reason" in first_choice:
291
  finish_reason = first_choice["finish_reason"]
292
 
 
 
 
293
  return ChatCompletionResponse(
294
  id=response_data.get("id", f"chatcmpl-{uuid.uuid4().hex[:29]}"),
295
  created=int(time.time()),
 
58
  Returns:
59
  A dictionary with 'signature' and 'timestamp'.
60
  """
 
61
  timestamp_ms = self._get_timestamp_millis()
 
 
62
  b64_encoded_t = base64.b64encode(t_payload.encode("utf-8")).decode("utf-8")
 
 
63
  message_string = f"{e_payload}|{b64_encoded_t}|{timestamp_ms}"
 
 
64
  n = timestamp_ms // (5 * 60 * 1000)
 
 
65
  msg1 = str(n).encode("utf-8")
66
  intermediate_key = hmac.new(self.primary_secret, msg1, hashlib.sha256).hexdigest()
 
 
67
  msg2 = message_string.encode("utf-8")
68
  final_signature = hmac.new(intermediate_key.encode("utf-8"), msg2, hashlib.sha256).hexdigest()
 
69
  return {"signature": final_signature, "timestamp": timestamp_ms}
70
 
71
  def _clean_thinking_content(self, text: str) -> str:
 
73
  cleaned_text = re.sub(r'<summary>.*?</summary>|<glm_block.*?</glm_block>|<[^>]*duration="[^"]*"[^>]*>', '', text, flags=re.DOTALL)
74
  cleaned_text = cleaned_text.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
75
  cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
 
76
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
77
  cleaned_text = cleaned_text.replace("Thinking…", "")
78
  return cleaned_text.strip()
79
 
80
  def _clean_answer_content(self, text: str) -> str:
81
  if not text: return ""
82
+ # Enhanced cleaning rules for the final answer
83
+ # Rule 1: Remove any complete <details> blocks, which contain citations.
84
+ cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', text, flags=re.DOTALL)
85
+ # Rule 2: Remove other known thinking-related tags.
86
+ cleaned_text = re.sub(r'<glm_block.*?</glm_block>|<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
87
+ # Rule 3: Remove leftover tag attributes like duration="", which can leak into the answer.
88
+ cleaned_text = re.sub(r'\s*duration="\d+"[^>]*>', '', cleaned_text)
89
  return cleaned_text
90
 
91
  def _serialize_msgs(self, msgs) -> list:
92
  out = []
93
  for m in msgs:
 
94
  if hasattr(m, "dict"): out.append(m.dict())
95
  elif hasattr(m, "model_dump"): out.append(m.model_dump())
96
  elif isinstance(m, dict): out.append(m)
 
98
  return out
99
 
100
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
 
101
  ck = await cookie_manager.get_next_cookie()
102
  if not ck: raise HTTPException(503, "No available cookies")
103
 
104
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
 
 
105
 
 
 
 
 
 
106
  payload_user_id = str(uuid.uuid4())
107
  payload_request_id = str(uuid.uuid4())
108
  payload_timestamp = str(self._get_timestamp_millis())
109
 
 
110
  e_payload = f"requestId,{payload_request_id},timestamp,{payload_timestamp},user_id,{payload_user_id}"
 
 
111
  t_payload = ""
112
  if req.messages:
113
  last_message = req.messages[-1]
114
  if isinstance(last_message.content, str):
115
  t_payload = last_message.content
116
 
 
117
  signature_data = self._generate_signature(e_payload, t_payload)
118
  signature = signature_data["signature"]
119
  signature_timestamp = signature_data["timestamp"]
120
 
 
121
  url_params = {
122
  "requestId": payload_request_id,
123
  "timestamp": payload_timestamp,
 
125
  "signature_timestamp": str(signature_timestamp)
126
  }
127
 
 
 
128
  final_url = httpx.URL(settings.UPSTREAM_URL).copy_with(params=url_params)
129
 
130
  body = {
131
  "stream": True,
132
  "model": model,
133
  "messages": self._serialize_msgs(req.messages),
134
+ "chat_id": str(uuid.uuid4()),
135
+ "id": str(uuid.uuid4()),
136
  "features": {
137
  "image_generation": False,
138
  "web_search": False,
 
167
  think_open = False
168
  yielded_think_buffer = ""
169
  current_raw_thinking = ""
170
+ # FIX: Re-introduce the flag to handle the transition chunk correctly.
171
+ is_first_answer_chunk = True
172
+
173
  async def yield_delta(content_type: str, text: str):
174
  nonlocal think_open, yielded_think_buffer
175
  if content_type == "thinking" and settings.SHOW_THINK_TAGS:
 
203
  line = line.strip()
204
  if not line.startswith('data: '): continue
205
  payload_str = line[6:]
 
206
  if payload_str == '[DONE]':
207
  if think_open:
208
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
 
216
  phase = dat.get("phase")
217
  content_chunk = dat.get("delta_content") or dat.get("edit_content")
218
  if not content_chunk:
219
+ continue
 
 
 
 
220
 
221
  if phase == "thinking":
222
  current_raw_thinking = content_chunk if dat.get("edit_content") is not None else current_raw_thinking + content_chunk
223
  async for item in yield_delta("thinking", current_raw_thinking):
224
  yield item
225
  elif phase == "answer":
226
+ content_to_process = content_chunk
227
+ # FIX: Restore and improve special handling for the first answer chunk.
228
+ if is_first_answer_chunk:
229
+ # The first answer chunk often contains leftover closing tags from thinking.
230
+ # Find the last '</details>' tag and trim everything before and including it.
231
+ last_details_pos = content_to_process.rfind('</details>')
232
+ if last_details_pos != -1:
233
+ content_to_process = content_to_process[last_details_pos + len('</details>'):]
234
+
235
+ # Clean up any other potential leaked raw text from the thinking phase.
236
+ # This regex targets the `true" duration...` pattern.
237
+ content_to_process = re.sub(r'^[\s\S]*last_tool_call_name="">', '', content_to_process)
238
+
239
+ is_first_answer_chunk = False
240
+
241
+ if content_to_process:
242
+ async for item in yield_delta("answer", content_to_process):
243
  yield item
244
  except Exception:
245
  logger.exception("Stream error"); raise
246
 
247
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
 
 
248
  ck = None
249
  try:
250
  body, headers, ck, url = await self._prep_upstream(req)
 
251
  body["stream"] = False
252
 
253
  async with self.client.post(url, json=body, headers=headers) as resp:
 
257
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
258
 
259
  await cookie_manager.mark_cookie_success(ck)
 
 
260
  response_data = resp.json()
 
 
261
  final_content = ""
262
+ finish_reason = "stop"
263
 
264
  if "choices" in response_data and response_data["choices"]:
265
  first_choice = response_data["choices"][0]
 
268
  if "finish_reason" in first_choice:
269
  finish_reason = first_choice["finish_reason"]
270
 
271
+ # Apply cleaning to non-streamed responses as well
272
+ final_content = self._clean_answer_content(final_content)
273
+
274
  return ChatCompletionResponse(
275
  id=response_data.get("id", f"chatcmpl-{uuid.uuid4().hex[:29]}"),
276
  created=int(time.time()),