bluewinliang commited on
Commit
e1d2fea
·
verified ·
1 Parent(s): afc2d88

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +74 -77
proxy_handler.py CHANGED
@@ -43,7 +43,7 @@ class ProxyHandler:
43
  return content
44
 
45
  logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
46
- logger.debug(f"Original content: {content[:200]}...")
47
 
48
  # Optionally remove thinking content based on configuration
49
  if not settings.SHOW_THINK_TAGS:
@@ -91,17 +91,9 @@ class ProxyHandler:
91
  think_start = content.find("<think>")
92
  if think_start != -1:
93
  # Look for where the actual answer begins
94
- # This is typically after the thinking content, marked by:
95
- # 1. A line that starts with normal text (not continuation of thinking)
96
- # 2. Often starts with a capital letter
97
- # 3. Might be after some whitespace/newlines
98
-
99
  search_content = content[think_start + 7:] # Skip "<think>"
100
 
101
  # Look for patterns that indicate the start of the answer:
102
- # - New paragraph with capital letter
103
- # - Numbered list
104
- # - Clear sentence structure
105
  patterns = [
106
  r'\n\n+([A-Z][^<\n]*)', # New paragraph starting with capital
107
  r'\n\n+(\d+\.)', # Numbered list
@@ -114,7 +106,7 @@ class ProxyHandler:
114
  match = re.search(pattern, search_content)
115
  if match:
116
  answer_start = think_start + 7 + match.start()
117
- logger.debug(f"Found answer start at position {answer_start} with pattern: {pattern}")
118
  break
119
 
120
  if answer_start:
@@ -128,7 +120,7 @@ class ProxyHandler:
128
  # Clean up multiple newlines and spacing
129
  content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
130
 
131
- logger.debug(f"Final transformed content: {content[:200]}...")
132
  return content.strip()
133
 
134
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
@@ -376,33 +368,38 @@ class ProxyHandler:
376
 
377
  # Process and send content immediately if we should
378
  if delta_content and should_send_content:
379
- # Minimal transformation for real-time streaming
380
  transformed_delta = delta_content
381
 
382
- if settings.SHOW_THINK_TAGS:
383
- # Simple tag replacement for streaming
384
- transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
385
- transformed_delta = transformed_delta.replace('</details>', '</think>')
386
- # Remove summary tags for streaming
387
- transformed_delta = re.sub(r"<summary>.*?</summary>", "", transformed_delta, flags=re.DOTALL)
 
 
 
 
388
 
389
  # Create and send OpenAI-compatible chunk immediately
390
- openai_chunk = {
391
- "id": completion_id,
392
- "object": "chat.completion.chunk",
393
- "created": int(time.time()),
394
- "model": model,
395
- "choices": [{
396
- "index": 0,
397
- "delta": {
398
- "content": transformed_delta
399
- },
400
- "finish_reason": None
401
- }]
402
- }
403
-
404
- # Yield immediately for real-time streaming
405
- yield f"data: {json.dumps(openai_chunk)}\n\n"
 
406
 
407
  except Exception as e:
408
  logger.error(f"Error processing streaming chunk: {e}")
@@ -552,7 +549,6 @@ class ProxyHandler:
552
 
553
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
554
  current_phase = None
555
- collected_content = "" # For post-processing in non-streaming mode
556
 
557
  try:
558
  # Create a new client for this streaming request to avoid conflicts
@@ -596,21 +592,20 @@ class ProxyHandler:
596
 
597
  payload = line[6:].strip()
598
  if payload == "[DONE]":
599
- # For streaming mode, just send the final chunk and done
600
- if request.stream:
601
- final_chunk = {
602
- "id": completion_id,
603
- "object": "chat.completion.chunk",
604
- "created": int(time.time()),
605
- "model": request.model,
606
- "choices": [{
607
- "index": 0,
608
- "delta": {},
609
- "finish_reason": "stop"
610
- }]
611
- }
612
- yield f"data: {json.dumps(final_chunk)}\n\n"
613
- yield "data: [DONE]\n\n"
614
  return
615
 
616
  try:
@@ -624,44 +619,46 @@ class ProxyHandler:
624
  current_phase = phase
625
  logger.debug(f"Phase changed to: {phase}")
626
 
627
- # Collect content for potential post-processing
628
- if delta_content:
629
- collected_content += delta_content
630
-
631
  # Apply filtering based on SHOW_THINK_TAGS and phase
632
  should_send_content = True
633
 
634
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
635
  should_send_content = False
636
 
637
- # Process and send content immediately if we should (for streaming)
638
- if delta_content and should_send_content and request.stream:
639
- # Minimal transformation for real-time streaming
640
  transformed_delta = delta_content
641
 
 
642
  if settings.SHOW_THINK_TAGS:
643
- # Simple tag replacement for streaming
644
- transformed_delta = re.sub(r'<details[^>]*>', '<think>', transformed_delta)
645
- transformed_delta = transformed_delta.replace('</details>', '</think>')
646
- transformed_delta = re.sub(r"<summary>.*?</summary>", "", transformed_delta, flags=re.DOTALL)
 
 
 
 
647
 
648
  # Create and send OpenAI-compatible chunk immediately
649
- openai_chunk = {
650
- "id": completion_id,
651
- "object": "chat.completion.chunk",
652
- "created": int(time.time()),
653
- "model": request.model,
654
- "choices": [{
655
- "index": 0,
656
- "delta": {
657
- "content": transformed_delta
658
- },
659
- "finish_reason": None
660
- }]
661
- }
662
-
663
- # Yield immediately for real-time streaming
664
- yield f"data: {json.dumps(openai_chunk)}\n\n"
 
665
 
666
  except json.JSONDecodeError:
667
  continue # Skip non-JSON lines
 
43
  return content
44
 
45
  logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
46
+ logger.debug(f"Original content length: {len(content)}")
47
 
48
  # Optionally remove thinking content based on configuration
49
  if not settings.SHOW_THINK_TAGS:
 
91
  think_start = content.find("<think>")
92
  if think_start != -1:
93
  # Look for where the actual answer begins
 
 
 
 
 
94
  search_content = content[think_start + 7:] # Skip "<think>"
95
 
96
  # Look for patterns that indicate the start of the answer:
 
 
 
97
  patterns = [
98
  r'\n\n+([A-Z][^<\n]*)', # New paragraph starting with capital
99
  r'\n\n+(\d+\.)', # Numbered list
 
106
  match = re.search(pattern, search_content)
107
  if match:
108
  answer_start = think_start + 7 + match.start()
109
+ logger.debug(f"Found answer start at position {answer_start}")
110
  break
111
 
112
  if answer_start:
 
120
  # Clean up multiple newlines and spacing
121
  content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
122
 
123
+ logger.debug(f"Final transformed content length: {len(content)}")
124
  return content.strip()
125
 
126
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
 
368
 
369
  # Process and send content immediately if we should
370
  if delta_content and should_send_content:
371
+ # Minimal transformation for real-time streaming - NO TAG PROCESSING!
372
  transformed_delta = delta_content
373
 
374
+ # Only do basic replacements without complex regex for streaming
375
+ if settings.SHOW_THINK_TAGS and ('<details' in transformed_delta or '</details>' in transformed_delta):
376
+ # Simple string replacement only
377
+ if '<details' in transformed_delta:
378
+ transformed_delta = '<think>'
379
+ elif '</details>' in transformed_delta:
380
+ transformed_delta = '</think>'
381
+ # Remove summary content if present
382
+ if '<summary>' in transformed_delta or '</summary>' in transformed_delta:
383
+ transformed_delta = '' # Skip summary content entirely
384
 
385
  # Create and send OpenAI-compatible chunk immediately
386
+ if transformed_delta: # Only send if there's actual content
387
+ openai_chunk = {
388
+ "id": completion_id,
389
+ "object": "chat.completion.chunk",
390
+ "created": int(time.time()),
391
+ "model": model,
392
+ "choices": [{
393
+ "index": 0,
394
+ "delta": {
395
+ "content": transformed_delta
396
+ },
397
+ "finish_reason": None
398
+ }]
399
+ }
400
+
401
+ # Yield immediately for real-time streaming
402
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
403
 
404
  except Exception as e:
405
  logger.error(f"Error processing streaming chunk: {e}")
 
549
 
550
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
551
  current_phase = None
 
552
 
553
  try:
554
  # Create a new client for this streaming request to avoid conflicts
 
592
 
593
  payload = line[6:].strip()
594
  if payload == "[DONE]":
595
+ # Send final chunk and done
596
+ final_chunk = {
597
+ "id": completion_id,
598
+ "object": "chat.completion.chunk",
599
+ "created": int(time.time()),
600
+ "model": request.model,
601
+ "choices": [{
602
+ "index": 0,
603
+ "delta": {},
604
+ "finish_reason": "stop"
605
+ }]
606
+ }
607
+ yield f"data: {json.dumps(final_chunk)}\n\n"
608
+ yield "data: [DONE]\n\n"
 
609
  return
610
 
611
  try:
 
619
  current_phase = phase
620
  logger.debug(f"Phase changed to: {phase}")
621
 
 
 
 
 
622
  # Apply filtering based on SHOW_THINK_TAGS and phase
623
  should_send_content = True
624
 
625
  if not settings.SHOW_THINK_TAGS and phase == "thinking":
626
  should_send_content = False
627
 
628
+ # Process and send content immediately if we should
629
+ if delta_content and should_send_content:
630
+ # CRITICAL FIX: For streaming, do MINIMAL processing
631
  transformed_delta = delta_content
632
 
633
+ # Only do safe replacements for streaming
634
  if settings.SHOW_THINK_TAGS:
635
+ # Check for complete tag patterns only
636
+ if transformed_delta == '<details>' or '<details ' in transformed_delta:
637
+ transformed_delta = '<think>'
638
+ elif transformed_delta == '</details>':
639
+ transformed_delta = '</think>'
640
+ elif '<summary>' in transformed_delta or '</summary>' in transformed_delta:
641
+ # Skip summary content entirely
642
+ continue
643
 
644
  # Create and send OpenAI-compatible chunk immediately
645
+ if transformed_delta: # Only send if there's content
646
+ openai_chunk = {
647
+ "id": completion_id,
648
+ "object": "chat.completion.chunk",
649
+ "created": int(time.time()),
650
+ "model": request.model,
651
+ "choices": [{
652
+ "index": 0,
653
+ "delta": {
654
+ "content": transformed_delta
655
+ },
656
+ "finish_reason": None
657
+ }]
658
+ }
659
+
660
+ # Yield immediately for real-time streaming
661
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
662
 
663
  except json.JSONDecodeError:
664
  continue # Skip non-JSON lines