bluewinliang commited on
Commit
57d07dc
·
verified ·
1 Parent(s): 40459ae

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +57 -36
proxy_handler.py CHANGED
@@ -39,6 +39,8 @@ class ProxyHandler:
39
  return content
40
 
41
  logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
 
 
42
 
43
  # Optionally remove thinking content based on configuration
44
  if not settings.SHOW_THINK_TAGS:
@@ -90,6 +92,8 @@ class ProxyHandler:
90
  else:
91
  content += "</think>"
92
 
 
 
93
  return content.strip()
94
 
95
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
@@ -110,13 +114,8 @@ class ProxyHandler:
110
  request.stream if request.stream is not None else settings.DEFAULT_STREAM
111
  )
112
 
113
- # Validate parameter compatibility
114
- if is_streaming and not settings.SHOW_THINK_TAGS:
115
- logger.warning("SHOW_THINK_TAGS=false is ignored for streaming responses")
116
-
117
  # Prepare request data
118
  request_data = request.model_dump(exclude_none=True)
119
- request_data["model"] = target_model
120
 
121
  # Build request data based on actual Z.AI format from zai-messages.md
122
  request_data = {
@@ -209,6 +208,7 @@ class ProxyHandler:
209
 
210
  try:
211
  parsed = json.loads(payload)
 
212
 
213
  # Check for errors first
214
  if parsed.get("error") or (parsed.get("data", {}).get("error")):
@@ -228,12 +228,10 @@ class ProxyHandler:
228
  parsed["data"].pop("edit_index", None)
229
  parsed["data"].pop("edit_content", None)
230
 
231
- # Note: We don't transform delta_content here because <think> tags
232
- # might span multiple chunks. We'll transform the final aggregated content.
233
-
234
  yield parsed
235
 
236
- except json.JSONDecodeError:
 
237
  continue # Skip non-JSON lines
238
 
239
  async def stream_response(
@@ -241,22 +239,40 @@ class ProxyHandler:
241
  ) -> AsyncGenerator[str, None]:
242
  """Generate OpenAI-compatible streaming response"""
243
  try:
 
 
244
  async for parsed in self.process_streaming_response(response):
245
  # 取得增量內容
246
  delta_content = parsed.get("data", {}).get("delta_content", "")
 
 
 
 
 
 
 
 
 
 
247
 
248
- # 根據設定決定是否過濾思考內容
249
  if not settings.SHOW_THINK_TAGS:
250
- # 只在回答階段輸出內容
251
- phase = parsed.get("data", {}).get("phase", "")
252
  if phase != "answer":
253
- continue
 
 
 
 
 
 
 
 
 
254
 
255
- # 如果有內容才輸出
256
- if delta_content:
257
  # 建立 OpenAI 格式的 chunk
258
  chunk = {
259
- "id": parsed.get("data", {}).get("id", f"chatcmpl-{uuid.uuid4()}"),
260
  "object": "chat.completion.chunk",
261
  "created": int(time.time()),
262
  "model": model,
@@ -269,11 +285,13 @@ class ProxyHandler:
269
  ],
270
  }
271
 
272
- yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
 
 
273
 
274
  # 發送完成標記
275
  final_chunk = {
276
- "id": f"chatcmpl-{uuid.uuid4()}",
277
  "object": "chat.completion.chunk",
278
  "created": int(time.time()),
279
  "model": model,
@@ -309,6 +327,8 @@ class ProxyHandler:
309
 
310
  async def handle_chat_completion(self, request: ChatCompletionRequest):
311
  """Handle chat completion request"""
 
 
312
  proxy_result = await self.proxy_request(request)
313
  response = proxy_result["response"]
314
 
@@ -317,8 +337,10 @@ class ProxyHandler:
317
  request.stream if request.stream is not None else settings.DEFAULT_STREAM
318
  )
319
 
 
 
320
  if is_streaming:
321
- # For streaming responses, SHOW_THINK_TAGS setting is ignored
322
  return StreamingResponse(
323
  self.stream_response(response, request.model),
324
  media_type="text/event-stream",
@@ -339,7 +361,7 @@ class ProxyHandler:
339
  chunks = []
340
  async for parsed in self.process_streaming_response(response):
341
  chunks.append(parsed)
342
- logger.debug(f"Received chunk: {parsed}") # Debug log
343
 
344
  if not chunks:
345
  raise HTTPException(status_code=500, detail="No response from upstream")
@@ -348,29 +370,28 @@ class ProxyHandler:
348
  logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
349
 
350
  # Aggregate content based on SHOW_THINK_TAGS setting
351
- if settings.SHOW_THINK_TAGS:
352
- # Include all content
353
- full_content = "".join(
354
- chunk.get("data", {}).get("delta_content", "") for chunk in chunks
355
- )
356
- else:
357
- # Only include answer phase content
358
- full_content = "".join(
359
- chunk.get("data", {}).get("delta_content", "")
360
- for chunk in chunks
361
- if chunk.get("data", {}).get("phase") == "answer"
362
- )
 
363
 
364
  logger.info(f"Aggregated content length: {len(full_content)}")
365
- logger.debug(
366
- f"Full aggregated content: {full_content}"
367
- ) # Show full content for debugging
368
 
369
  # Apply content transformation (including think tag filtering)
370
  transformed_content = self.transform_content(full_content)
371
 
372
  logger.info(f"Transformed content length: {len(transformed_content)}")
373
- logger.debug(f"Transformed content: {transformed_content[:200]}...")
374
 
375
  # Create OpenAI-compatible response
376
  return ChatCompletionResponse(
 
39
  return content
40
 
41
  logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
42
+ logger.debug(f"Original content length: {len(content)}")
43
+ logger.debug(f"Original content preview: {content[:200]}...")
44
 
45
  # Optionally remove thinking content based on configuration
46
  if not settings.SHOW_THINK_TAGS:
 
92
  else:
93
  content += "</think>"
94
 
95
+ logger.debug(f"Final transformed content length: {len(content)}")
96
+ logger.debug(f"Final transformed content preview: {content[:200]}...")
97
  return content.strip()
98
 
99
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
 
114
  request.stream if request.stream is not None else settings.DEFAULT_STREAM
115
  )
116
 
 
 
 
 
117
  # Prepare request data
118
  request_data = request.model_dump(exclude_none=True)
 
119
 
120
  # Build request data based on actual Z.AI format from zai-messages.md
121
  request_data = {
 
208
 
209
  try:
210
  parsed = json.loads(payload)
211
+ logger.debug(f"Parsed chunk: {parsed}")
212
 
213
  # Check for errors first
214
  if parsed.get("error") or (parsed.get("data", {}).get("error")):
 
228
  parsed["data"].pop("edit_index", None)
229
  parsed["data"].pop("edit_content", None)
230
 
 
 
 
231
  yield parsed
232
 
233
+ except json.JSONDecodeError as e:
234
+ logger.debug(f"Failed to parse JSON: {line}, error: {e}")
235
  continue # Skip non-JSON lines
236
 
237
  async def stream_response(
 
239
  ) -> AsyncGenerator[str, None]:
240
  """Generate OpenAI-compatible streaming response"""
241
  try:
242
+ chunk_id = f"chatcmpl-{uuid.uuid4()}"
243
+
244
  async for parsed in self.process_streaming_response(response):
245
  # 取得增量內容
246
  delta_content = parsed.get("data", {}).get("delta_content", "")
247
+ phase = parsed.get("data", {}).get("phase", "")
248
+
249
+ logger.debug(f"Processing chunk - phase: {phase}, content: {delta_content[:50]}...")
250
+
251
+ # 如果沒有內容就跳過
252
+ if not delta_content:
253
+ continue
254
+
255
+ # 根據 SHOW_THINK_TAGS 設定決定是否輸出
256
+ should_output = True
257
 
 
258
  if not settings.SHOW_THINK_TAGS:
259
+ # 如果不顯示思考標籤,只輸出答案階段的內容
 
260
  if phase != "answer":
261
+ logger.debug(f"Skipping non-answer phase: {phase}")
262
+ should_output = False
263
+ else:
264
+ # 如果顯示思考標籤,對思考內容進行轉換
265
+ if phase == "thinking":
266
+ # 將 <details> 轉換為 <think>
267
+ delta_content = delta_content.replace("<details", "<think")
268
+ delta_content = delta_content.replace("</details>", "</think>")
269
+ # 移除 <summary> 標籤
270
+ delta_content = re.sub(r"<summary>.*?</summary>", "", delta_content, flags=re.DOTALL)
271
 
272
+ if should_output and delta_content:
 
273
  # 建立 OpenAI 格式的 chunk
274
  chunk = {
275
+ "id": chunk_id,
276
  "object": "chat.completion.chunk",
277
  "created": int(time.time()),
278
  "model": model,
 
285
  ],
286
  }
287
 
288
+ chunk_json = json.dumps(chunk, ensure_ascii=False)
289
+ logger.debug(f"Yielding chunk: {chunk_json}")
290
+ yield f"data: {chunk_json}\n\n"
291
 
292
  # 發送完成標記
293
  final_chunk = {
294
+ "id": chunk_id,
295
  "object": "chat.completion.chunk",
296
  "created": int(time.time()),
297
  "model": model,
 
327
 
328
  async def handle_chat_completion(self, request: ChatCompletionRequest):
329
  """Handle chat completion request"""
330
+ logger.info(f"Handling chat completion request - stream: {request.stream}, SHOW_THINK_TAGS: {settings.SHOW_THINK_TAGS}")
331
+
332
  proxy_result = await self.proxy_request(request)
333
  response = proxy_result["response"]
334
 
 
337
  request.stream if request.stream is not None else settings.DEFAULT_STREAM
338
  )
339
 
340
+ logger.info(f"Final streaming mode: {is_streaming}")
341
+
342
  if is_streaming:
343
+ # For streaming responses
344
  return StreamingResponse(
345
  self.stream_response(response, request.model),
346
  media_type="text/event-stream",
 
361
  chunks = []
362
  async for parsed in self.process_streaming_response(response):
363
  chunks.append(parsed)
364
+ logger.debug(f"Received chunk: {parsed}")
365
 
366
  if not chunks:
367
  raise HTTPException(status_code=500, detail="No response from upstream")
 
370
  logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
371
 
372
  # Aggregate content based on SHOW_THINK_TAGS setting
373
+ full_content = ""
374
+
375
+ for chunk in chunks:
376
+ delta_content = chunk.get("data", {}).get("delta_content", "")
377
+ phase = chunk.get("data", {}).get("phase", "")
378
+
379
+ if settings.SHOW_THINK_TAGS:
380
+ # Include all content
381
+ full_content += delta_content
382
+ else:
383
+ # Only include answer phase content
384
+ if phase == "answer":
385
+ full_content += delta_content
386
 
387
  logger.info(f"Aggregated content length: {len(full_content)}")
388
+ logger.debug(f"Full aggregated content preview: {full_content[:200]}...")
 
 
389
 
390
  # Apply content transformation (including think tag filtering)
391
  transformed_content = self.transform_content(full_content)
392
 
393
  logger.info(f"Transformed content length: {len(transformed_content)}")
394
+ logger.debug(f"Transformed content preview: {transformed_content[:200]}...")
395
 
396
  # Create OpenAI-compatible response
397
  return ChatCompletionResponse(