bluewinliang commited on
Commit
aabc532
·
verified ·
1 Parent(s): 01e508e

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +53 -62
proxy_handler.py CHANGED
@@ -70,20 +70,11 @@ class ProxyHandler:
70
  content = content.replace("</details>", "</think>")
71
  # Remove <summary> tags and their content
72
  content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
73
- # If there's no closing </think>, add it at the end of thinking content
74
- if "<think>" in content and "</think>" not in content:
75
- # Find where thinking ends and answer begins
76
- think_start = content.find("<think>")
77
- if think_start != -1:
78
- # Look for the start of the actual answer (capital/number/Chinese)
79
- answer_match = re.search(r"\n\s*[A-Z0-9\u4e00-\u9fff]", content[think_start:])
80
- if answer_match:
81
- insert_pos = think_start + answer_match.start()
82
- content = (
83
- content[:insert_pos] + "</think>\n" + content[insert_pos:]
84
- )
85
- else:
86
- content += "</think>"
87
 
88
  return content.strip()
89
 
@@ -265,7 +256,6 @@ class ProxyHandler:
265
  if chunk_data.startswith("data: ") and not chunk_data.startswith("data: [DONE]"):
266
  try:
267
  chunk_json = json.loads(chunk_data[6:])
268
- # 1. delta_content
269
  if chunk_json.get("choices", [{}])[0].get("delta", {}).get("content"):
270
  chunks.append(chunk_json["choices"][0]["delta"]["content"])
271
  except:
@@ -298,6 +288,7 @@ class ProxyHandler:
298
  # Generate a unique completion ID
299
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
300
  current_phase = None
 
301
 
302
  try:
303
  # Real-time streaming: process each chunk immediately as it arrives
@@ -308,7 +299,6 @@ class ProxyHandler:
308
  edit_content = data.get("edit_content", "")
309
  phase = data.get("phase", "")
310
 
311
- # Prefer delta_content; if empty use edit_content (重要修正)
312
  content_piece = delta_content if delta_content else edit_content
313
 
314
  # Track phase changes
@@ -316,24 +306,41 @@ class ProxyHandler:
316
  current_phase = phase
317
  logger.debug(f"Phase changed to: {phase}")
318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  # Apply filtering based on SHOW_THINK_TAGS and phase
320
  should_send_content = True
321
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
322
- # Skip thinking content when SHOW_THINK_TAGS=false
323
  should_send_content = False
324
- logger.debug(f"Skipping thinking content (SHOW_THINK_TAGS=false)")
325
 
326
  # Process and send content immediately if we should
327
  if content_piece and should_send_content:
328
  transformed_delta = content_piece
329
 
330
  if settings.SHOW_THINK_TAGS:
331
- # Simple tag replacement for streaming
332
  transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
333
  transformed_delta = transformed_delta.replace('</details>', '</think>')
334
  transformed_delta = re.sub(r'<summary>.*?</summary>', '', transformed_delta, flags=re.DOTALL)
335
 
336
- # Create and send OpenAI-compatible chunk immediately
 
 
 
 
337
  openai_chunk = {
338
  "id": completion_id,
339
  "object": "chat.completion.chunk",
@@ -347,7 +354,6 @@ class ProxyHandler:
347
  "finish_reason": None
348
  }]
349
  }
350
- # Yield immediately for real-time streaming
351
  yield f"data: {json.dumps(openai_chunk)}\n\n"
352
  except Exception as e:
353
  logger.error(f"Error processing streaming chunk: {e}")
@@ -369,7 +375,6 @@ class ProxyHandler:
369
  yield "data: [DONE]\n\n"
370
  except Exception as e:
371
  logger.error(f"Streaming error: {e}")
372
- # Send error in OpenAI format
373
  error_chunk = {
374
  "error": {
375
  "message": str(e),
@@ -393,35 +398,20 @@ class ProxyHandler:
393
  logger.info(f"Total chunks received: {len(chunks)}")
394
  logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
395
 
396
- # Aggregate content based on SHOW_THINK_TAGS setting
397
  if settings.SHOW_THINK_TAGS:
398
- # Include all content (delta_content + edit_content)
399
  full_content = "".join(
400
- (chunk.get("data", {}).get("delta_content", "")
401
- or chunk.get("data", {}).get("edit_content", ""))
402
- for chunk in chunks
403
  )
404
  else:
405
- # Only include answer phase content
406
  full_content = "".join(
407
- (chunk.get("data", {}).get("delta_content", "")
408
- or chunk.get("data", {}).get("edit_content", ""))
409
- for chunk in chunks
410
- if chunk.get("data", {}).get("phase") == "answer"
411
  )
412
 
413
- logger.info(f"Aggregated content length: {len(full_content)}")
414
- logger.debug(
415
- f"Full aggregated content: {full_content}"
416
- ) # Show full content for debugging
417
-
418
- # Apply content transformation (including think tag filtering)
419
  transformed_content = self.transform_content(full_content)
420
 
421
- logger.info(f"Transformed content length: {len(transformed_content)}")
422
- logger.debug(f"Transformed content: {transformed_content[:200]}...")
423
-
424
- # Create OpenAI-compatible response
425
  return ChatCompletionResponse(
426
  id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown"),
427
  created=int(time.time()),
@@ -440,18 +430,15 @@ class ProxyHandler:
440
  import uuid
441
  import time
442
 
443
- # Get cookie
444
  cookie = await cookie_manager.get_next_cookie()
445
  if not cookie:
446
  raise HTTPException(status_code=503, detail="No valid authentication available")
447
 
448
- # Prepare request data
449
  request_data = request.model_dump(exclude_none=True)
450
- target_model = "0727-360B-API" # Map GLM-4.5 to Z.AI model
451
 
452
- # Build Z.AI request format
453
  request_data = {
454
- "stream": True, # Always request streaming from Z.AI
455
  "model": target_model,
456
  "messages": request_data["messages"],
457
  "background_tasks": {
@@ -497,9 +484,9 @@ class ProxyHandler:
497
 
498
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
499
  current_phase = None
 
500
 
501
  try:
502
- # Create a new client for this streaming request to avoid conflicts
503
  async with httpx.AsyncClient(
504
  timeout=httpx.Timeout(60.0, read=300.0),
505
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
@@ -521,14 +508,12 @@ class ProxyHandler:
521
 
522
  await cookie_manager.mark_cookie_success(cookie)
523
 
524
- # Process streaming response in real-time
525
  buffer = ""
526
  async for chunk in response.aiter_text(chunk_size=1024):
527
  if not chunk:
528
  continue
529
 
530
  buffer += chunk
531
- # Process complete lines immediately
532
  while "\n" in buffer:
533
  line, buffer = buffer.split("\n", 1)
534
  line = line.strip()
@@ -538,7 +523,6 @@ class ProxyHandler:
538
 
539
  payload = line[6:].strip()
540
  if payload == "[DONE]":
541
- # Send final chunk and done
542
  final_chunk = {
543
  "id": completion_id,
544
  "object": "chat.completion.chunk",
@@ -563,27 +547,35 @@ class ProxyHandler:
563
 
564
  content_piece = delta_content if delta_content else edit_content
565
 
566
- # Track phase changes
567
- if phase != current_phase:
568
- current_phase = phase
569
- logger.debug(f"Phase changed to: {phase}")
 
 
 
 
 
 
 
 
 
 
570
 
571
- # Apply filtering based on SHOW_THINK_TAGS and phase
572
  should_send_content = True
573
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
574
  should_send_content = False
575
 
576
- # Process and send content immediately if we should
577
  if content_piece and should_send_content:
578
- # Minimal transformation for real-time streaming
579
  transformed_delta = content_piece
580
  if settings.SHOW_THINK_TAGS:
581
- # Simple tag replacement for streaming
582
  transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
583
  transformed_delta = transformed_delta.replace('</details>', '</think>')
584
  transformed_delta = re.sub(r'<summary>.*?</summary>', '', transformed_delta, flags=re.DOTALL)
 
 
 
585
 
586
- # Create and send OpenAI-compatible chunk immediately
587
  openai_chunk = {
588
  "id": completion_id,
589
  "object": "chat.completion.chunk",
@@ -597,10 +589,9 @@ class ProxyHandler:
597
  "finish_reason": None
598
  }]
599
  }
600
- # Yield immediately for real-time streaming
601
  yield f"data: {json.dumps(openai_chunk)}\n\n"
602
  except json.JSONDecodeError:
603
- continue # Skip non-JSON lines
604
  except httpx.RequestError as e:
605
  logger.error(f"Streaming request error: {e}")
606
  await cookie_manager.mark_cookie_failed(cookie)
 
70
  content = content.replace("</details>", "</think>")
71
  # Remove <summary> tags and their content
72
  content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
73
+ # Auto-close <think> when缺失
74
+ open_cnt = content.count("<think>")
75
+ close_cnt = content.count("</think>")
76
+ if open_cnt > close_cnt:
77
+ content += "</think>"
 
 
 
 
 
 
 
 
 
78
 
79
  return content.strip()
80
 
 
256
  if chunk_data.startswith("data: ") and not chunk_data.startswith("data: [DONE]"):
257
  try:
258
  chunk_json = json.loads(chunk_data[6:])
 
259
  if chunk_json.get("choices", [{}])[0].get("delta", {}).get("content"):
260
  chunks.append(chunk_json["choices"][0]["delta"]["content"])
261
  except:
 
288
  # Generate a unique completion ID
289
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
290
  current_phase = None
291
+ role_sent = False
292
 
293
  try:
294
  # Real-time streaming: process each chunk immediately as it arrives
 
299
  edit_content = data.get("edit_content", "")
300
  phase = data.get("phase", "")
301
 
 
302
  content_piece = delta_content if delta_content else edit_content
303
 
304
  # Track phase changes
 
306
  current_phase = phase
307
  logger.debug(f"Phase changed to: {phase}")
308
 
309
+ # Send role chunk once at beginning
310
+ if not role_sent:
311
+ role_sent = True
312
+ role_chunk = {
313
+ "id": completion_id,
314
+ "object": "chat.completion.chunk",
315
+ "created": int(time.time()),
316
+ "model": model,
317
+ "choices": [{
318
+ "index": 0,
319
+ "delta": {"role": "assistant"},
320
+ "finish_reason": None
321
+ }]
322
+ }
323
+ yield f"data: {json.dumps(role_chunk)}\n\n"
324
+
325
  # Apply filtering based on SHOW_THINK_TAGS and phase
326
  should_send_content = True
327
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
 
328
  should_send_content = False
 
329
 
330
  # Process and send content immediately if we should
331
  if content_piece and should_send_content:
332
  transformed_delta = content_piece
333
 
334
  if settings.SHOW_THINK_TAGS:
 
335
  transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
336
  transformed_delta = transformed_delta.replace('</details>', '</think>')
337
  transformed_delta = re.sub(r'<summary>.*?</summary>', '', transformed_delta, flags=re.DOTALL)
338
 
339
+ # Make sure <think> closed when phase switches
340
+ if settings.SHOW_THINK_TAGS and phase != "thinking" and transformed_delta and not transformed_delta.startswith("</think>"):
341
+ if transformed_delta.lstrip().startswith(">"):
342
+ transformed_delta = "</think>\n" + transformed_delta
343
+
344
  openai_chunk = {
345
  "id": completion_id,
346
  "object": "chat.completion.chunk",
 
354
  "finish_reason": None
355
  }]
356
  }
 
357
  yield f"data: {json.dumps(openai_chunk)}\n\n"
358
  except Exception as e:
359
  logger.error(f"Error processing streaming chunk: {e}")
 
375
  yield "data: [DONE]\n\n"
376
  except Exception as e:
377
  logger.error(f"Streaming error: {e}")
 
378
  error_chunk = {
379
  "error": {
380
  "message": str(e),
 
398
  logger.info(f"Total chunks received: {len(chunks)}")
399
  logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
400
 
 
401
  if settings.SHOW_THINK_TAGS:
 
402
  full_content = "".join(
403
+ (c.get("data", {}).get("delta_content", "") or c.get("data", {}).get("edit_content", ""))
404
+ for c in chunks
 
405
  )
406
  else:
 
407
  full_content = "".join(
408
+ (c.get("data", {}).get("delta_content", "") or c.get("data", {}).get("edit_content", ""))
409
+ for c in chunks
410
+ if c.get("data", {}).get("phase") == "answer"
 
411
  )
412
 
 
 
 
 
 
 
413
  transformed_content = self.transform_content(full_content)
414
 
 
 
 
 
415
  return ChatCompletionResponse(
416
  id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown"),
417
  created=int(time.time()),
 
430
  import uuid
431
  import time
432
 
 
433
  cookie = await cookie_manager.get_next_cookie()
434
  if not cookie:
435
  raise HTTPException(status_code=503, detail="No valid authentication available")
436
 
 
437
  request_data = request.model_dump(exclude_none=True)
438
+ target_model = "0727-360B-API"
439
 
 
440
  request_data = {
441
+ "stream": True,
442
  "model": target_model,
443
  "messages": request_data["messages"],
444
  "background_tasks": {
 
484
 
485
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
486
  current_phase = None
487
+ role_sent = False
488
 
489
  try:
 
490
  async with httpx.AsyncClient(
491
  timeout=httpx.Timeout(60.0, read=300.0),
492
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
 
508
 
509
  await cookie_manager.mark_cookie_success(cookie)
510
 
 
511
  buffer = ""
512
  async for chunk in response.aiter_text(chunk_size=1024):
513
  if not chunk:
514
  continue
515
 
516
  buffer += chunk
 
517
  while "\n" in buffer:
518
  line, buffer = buffer.split("\n", 1)
519
  line = line.strip()
 
523
 
524
  payload = line[6:].strip()
525
  if payload == "[DONE]":
 
526
  final_chunk = {
527
  "id": completion_id,
528
  "object": "chat.completion.chunk",
 
547
 
548
  content_piece = delta_content if delta_content else edit_content
549
 
550
+ if not role_sent:
551
+ role_sent = True
552
+ role_chunk = {
553
+ "id": completion_id,
554
+ "object": "chat.completion.chunk",
555
+ "created": int(time.time()),
556
+ "model": request.model,
557
+ "choices": [{
558
+ "index": 0,
559
+ "delta": {"role": "assistant"},
560
+ "finish_reason": None
561
+ }]
562
+ }
563
+ yield f"data: {json.dumps(role_chunk)}\n\n"
564
 
 
565
  should_send_content = True
566
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
567
  should_send_content = False
568
 
 
569
  if content_piece and should_send_content:
 
570
  transformed_delta = content_piece
571
  if settings.SHOW_THINK_TAGS:
 
572
  transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
573
  transformed_delta = transformed_delta.replace('</details>', '</think>')
574
  transformed_delta = re.sub(r'<summary>.*?</summary>', '', transformed_delta, flags=re.DOTALL)
575
+ if settings.SHOW_THINK_TAGS and phase != "thinking" and transformed_delta and not transformed_delta.startswith("</think>"):
576
+ if transformed_delta.lstrip().startswith(">") or transformed_delta[0].isalnum():
577
+ transformed_delta = "</think>\n" + transformed_delta
578
 
 
579
  openai_chunk = {
580
  "id": completion_id,
581
  "object": "chat.completion.chunk",
 
589
  "finish_reason": None
590
  }]
591
  }
 
592
  yield f"data: {json.dumps(openai_chunk)}\n\n"
593
  except json.JSONDecodeError:
594
+ continue
595
  except httpx.RequestError as e:
596
  logger.error(f"Streaming request error: {e}")
597
  await cookie_manager.mark_cookie_failed(cookie)