Spaces:
Paused
Paused
Upload proxy_handler.py
Browse files- proxy_handler.py +74 -77
proxy_handler.py
CHANGED
|
@@ -43,7 +43,7 @@ class ProxyHandler:
|
|
| 43 |
return content
|
| 44 |
|
| 45 |
logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
|
| 46 |
-
logger.debug(f"Original content: {content
|
| 47 |
|
| 48 |
# Optionally remove thinking content based on configuration
|
| 49 |
if not settings.SHOW_THINK_TAGS:
|
|
@@ -91,17 +91,9 @@ class ProxyHandler:
|
|
| 91 |
think_start = content.find("<think>")
|
| 92 |
if think_start != -1:
|
| 93 |
# Look for where the actual answer begins
|
| 94 |
-
# This is typically after the thinking content, marked by:
|
| 95 |
-
# 1. A line that starts with normal text (not continuation of thinking)
|
| 96 |
-
# 2. Often starts with a capital letter
|
| 97 |
-
# 3. Might be after some whitespace/newlines
|
| 98 |
-
|
| 99 |
search_content = content[think_start + 7:] # Skip "<think>"
|
| 100 |
|
| 101 |
# Look for patterns that indicate the start of the answer:
|
| 102 |
-
# - New paragraph with capital letter
|
| 103 |
-
# - Numbered list
|
| 104 |
-
# - Clear sentence structure
|
| 105 |
patterns = [
|
| 106 |
r'\n\n+([A-Z][^<\n]*)', # New paragraph starting with capital
|
| 107 |
r'\n\n+(\d+\.)', # Numbered list
|
|
@@ -114,7 +106,7 @@ class ProxyHandler:
|
|
| 114 |
match = re.search(pattern, search_content)
|
| 115 |
if match:
|
| 116 |
answer_start = think_start + 7 + match.start()
|
| 117 |
-
logger.debug(f"Found answer start at position {answer_start}
|
| 118 |
break
|
| 119 |
|
| 120 |
if answer_start:
|
|
@@ -128,7 +120,7 @@ class ProxyHandler:
|
|
| 128 |
# Clean up multiple newlines and spacing
|
| 129 |
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
|
| 130 |
|
| 131 |
-
logger.debug(f"Final transformed content: {content
|
| 132 |
return content.strip()
|
| 133 |
|
| 134 |
async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
|
|
@@ -376,33 +368,38 @@ class ProxyHandler:
|
|
| 376 |
|
| 377 |
# Process and send content immediately if we should
|
| 378 |
if delta_content and should_send_content:
|
| 379 |
-
# Minimal transformation for real-time streaming
|
| 380 |
transformed_delta = delta_content
|
| 381 |
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
# Create and send OpenAI-compatible chunk immediately
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
"
|
| 397 |
-
|
| 398 |
-
"
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
|
|
|
| 406 |
|
| 407 |
except Exception as e:
|
| 408 |
logger.error(f"Error processing streaming chunk: {e}")
|
|
@@ -552,7 +549,6 @@ class ProxyHandler:
|
|
| 552 |
|
| 553 |
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
| 554 |
current_phase = None
|
| 555 |
-
collected_content = "" # For post-processing in non-streaming mode
|
| 556 |
|
| 557 |
try:
|
| 558 |
# Create a new client for this streaming request to avoid conflicts
|
|
@@ -596,21 +592,20 @@ class ProxyHandler:
|
|
| 596 |
|
| 597 |
payload = line[6:].strip()
|
| 598 |
if payload == "[DONE]":
|
| 599 |
-
#
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
"
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
yield "data: [DONE]\n\n"
|
| 614 |
return
|
| 615 |
|
| 616 |
try:
|
|
@@ -624,44 +619,46 @@ class ProxyHandler:
|
|
| 624 |
current_phase = phase
|
| 625 |
logger.debug(f"Phase changed to: {phase}")
|
| 626 |
|
| 627 |
-
# Collect content for potential post-processing
|
| 628 |
-
if delta_content:
|
| 629 |
-
collected_content += delta_content
|
| 630 |
-
|
| 631 |
# Apply filtering based on SHOW_THINK_TAGS and phase
|
| 632 |
should_send_content = True
|
| 633 |
|
| 634 |
if not settings.SHOW_THINK_TAGS and phase == "thinking":
|
| 635 |
should_send_content = False
|
| 636 |
|
| 637 |
-
# Process and send content immediately if we should
|
| 638 |
-
if delta_content and should_send_content
|
| 639 |
-
#
|
| 640 |
transformed_delta = delta_content
|
| 641 |
|
|
|
|
| 642 |
if settings.SHOW_THINK_TAGS:
|
| 643 |
-
#
|
| 644 |
-
transformed_delta
|
| 645 |
-
|
| 646 |
-
transformed_delta
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
|
| 648 |
# Create and send OpenAI-compatible chunk immediately
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
"
|
| 656 |
-
|
| 657 |
-
"
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
|
|
|
| 665 |
|
| 666 |
except json.JSONDecodeError:
|
| 667 |
continue # Skip non-JSON lines
|
|
|
|
| 43 |
return content
|
| 44 |
|
| 45 |
logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
|
| 46 |
+
logger.debug(f"Original content length: {len(content)}")
|
| 47 |
|
| 48 |
# Optionally remove thinking content based on configuration
|
| 49 |
if not settings.SHOW_THINK_TAGS:
|
|
|
|
| 91 |
think_start = content.find("<think>")
|
| 92 |
if think_start != -1:
|
| 93 |
# Look for where the actual answer begins
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
search_content = content[think_start + 7:] # Skip "<think>"
|
| 95 |
|
| 96 |
# Look for patterns that indicate the start of the answer:
|
|
|
|
|
|
|
|
|
|
| 97 |
patterns = [
|
| 98 |
r'\n\n+([A-Z][^<\n]*)', # New paragraph starting with capital
|
| 99 |
r'\n\n+(\d+\.)', # Numbered list
|
|
|
|
| 106 |
match = re.search(pattern, search_content)
|
| 107 |
if match:
|
| 108 |
answer_start = think_start + 7 + match.start()
|
| 109 |
+
logger.debug(f"Found answer start at position {answer_start}")
|
| 110 |
break
|
| 111 |
|
| 112 |
if answer_start:
|
|
|
|
| 120 |
# Clean up multiple newlines and spacing
|
| 121 |
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
|
| 122 |
|
| 123 |
+
logger.debug(f"Final transformed content length: {len(content)}")
|
| 124 |
return content.strip()
|
| 125 |
|
| 126 |
async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
|
|
|
|
| 368 |
|
| 369 |
# Process and send content immediately if we should
|
| 370 |
if delta_content and should_send_content:
|
| 371 |
+
# Minimal transformation for real-time streaming - NO TAG PROCESSING!
|
| 372 |
transformed_delta = delta_content
|
| 373 |
|
| 374 |
+
# Only do basic replacements without complex regex for streaming
|
| 375 |
+
if settings.SHOW_THINK_TAGS and ('<details' in transformed_delta or '</details>' in transformed_delta):
|
| 376 |
+
# Simple string replacement only
|
| 377 |
+
if '<details' in transformed_delta:
|
| 378 |
+
transformed_delta = '<think>'
|
| 379 |
+
elif '</details>' in transformed_delta:
|
| 380 |
+
transformed_delta = '</think>'
|
| 381 |
+
# Remove summary content if present
|
| 382 |
+
if '<summary>' in transformed_delta or '</summary>' in transformed_delta:
|
| 383 |
+
transformed_delta = '' # Skip summary content entirely
|
| 384 |
|
| 385 |
# Create and send OpenAI-compatible chunk immediately
|
| 386 |
+
if transformed_delta: # Only send if there's actual content
|
| 387 |
+
openai_chunk = {
|
| 388 |
+
"id": completion_id,
|
| 389 |
+
"object": "chat.completion.chunk",
|
| 390 |
+
"created": int(time.time()),
|
| 391 |
+
"model": model,
|
| 392 |
+
"choices": [{
|
| 393 |
+
"index": 0,
|
| 394 |
+
"delta": {
|
| 395 |
+
"content": transformed_delta
|
| 396 |
+
},
|
| 397 |
+
"finish_reason": None
|
| 398 |
+
}]
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
# Yield immediately for real-time streaming
|
| 402 |
+
yield f"data: {json.dumps(openai_chunk)}\n\n"
|
| 403 |
|
| 404 |
except Exception as e:
|
| 405 |
logger.error(f"Error processing streaming chunk: {e}")
|
|
|
|
| 549 |
|
| 550 |
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
| 551 |
current_phase = None
|
|
|
|
| 552 |
|
| 553 |
try:
|
| 554 |
# Create a new client for this streaming request to avoid conflicts
|
|
|
|
| 592 |
|
| 593 |
payload = line[6:].strip()
|
| 594 |
if payload == "[DONE]":
|
| 595 |
+
# Send final chunk and done
|
| 596 |
+
final_chunk = {
|
| 597 |
+
"id": completion_id,
|
| 598 |
+
"object": "chat.completion.chunk",
|
| 599 |
+
"created": int(time.time()),
|
| 600 |
+
"model": request.model,
|
| 601 |
+
"choices": [{
|
| 602 |
+
"index": 0,
|
| 603 |
+
"delta": {},
|
| 604 |
+
"finish_reason": "stop"
|
| 605 |
+
}]
|
| 606 |
+
}
|
| 607 |
+
yield f"data: {json.dumps(final_chunk)}\n\n"
|
| 608 |
+
yield "data: [DONE]\n\n"
|
|
|
|
| 609 |
return
|
| 610 |
|
| 611 |
try:
|
|
|
|
| 619 |
current_phase = phase
|
| 620 |
logger.debug(f"Phase changed to: {phase}")
|
| 621 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
# Apply filtering based on SHOW_THINK_TAGS and phase
|
| 623 |
should_send_content = True
|
| 624 |
|
| 625 |
if not settings.SHOW_THINK_TAGS and phase == "thinking":
|
| 626 |
should_send_content = False
|
| 627 |
|
| 628 |
+
# Process and send content immediately if we should
|
| 629 |
+
if delta_content and should_send_content:
|
| 630 |
+
# CRITICAL FIX: For streaming, do MINIMAL processing
|
| 631 |
transformed_delta = delta_content
|
| 632 |
|
| 633 |
+
# Only do safe replacements for streaming
|
| 634 |
if settings.SHOW_THINK_TAGS:
|
| 635 |
+
# Check for complete tag patterns only
|
| 636 |
+
if transformed_delta == '<details>' or '<details ' in transformed_delta:
|
| 637 |
+
transformed_delta = '<think>'
|
| 638 |
+
elif transformed_delta == '</details>':
|
| 639 |
+
transformed_delta = '</think>'
|
| 640 |
+
elif '<summary>' in transformed_delta or '</summary>' in transformed_delta:
|
| 641 |
+
# Skip summary content entirely
|
| 642 |
+
continue
|
| 643 |
|
| 644 |
# Create and send OpenAI-compatible chunk immediately
|
| 645 |
+
if transformed_delta: # Only send if there's content
|
| 646 |
+
openai_chunk = {
|
| 647 |
+
"id": completion_id,
|
| 648 |
+
"object": "chat.completion.chunk",
|
| 649 |
+
"created": int(time.time()),
|
| 650 |
+
"model": request.model,
|
| 651 |
+
"choices": [{
|
| 652 |
+
"index": 0,
|
| 653 |
+
"delta": {
|
| 654 |
+
"content": transformed_delta
|
| 655 |
+
},
|
| 656 |
+
"finish_reason": None
|
| 657 |
+
}]
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
# Yield immediately for real-time streaming
|
| 661 |
+
yield f"data: {json.dumps(openai_chunk)}\n\n"
|
| 662 |
|
| 663 |
except json.JSONDecodeError:
|
| 664 |
continue # Skip non-JSON lines
|