Spaces:
Running
Running
Fix: Stream thinking tokens live instead of showing all at once
Browse filesThe regex required a complete <think>...</think> block to match,
so nothing appeared in the Thinking field until the closing tag
arrived. Now parse_thinking_blocks() accepts a streaming flag
that detects unclosed <think> tags and extracts partial content
for real-time display.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -58,28 +58,45 @@ def load_model():
|
|
| 58 |
raise
|
| 59 |
|
| 60 |
|
| 61 |
-
def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
| 62 |
"""
|
| 63 |
Parse thinking blocks from model output.
|
| 64 |
Supports both <think> and <thinking> tags.
|
| 65 |
|
| 66 |
Args:
|
| 67 |
content: Full model response
|
|
|
|
| 68 |
|
| 69 |
Returns:
|
| 70 |
Tuple of (thinking_content, summary_content)
|
| 71 |
"""
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
matches = re.findall(pattern, content, re.DOTALL)
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
-
summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
|
| 81 |
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
|
|
@@ -158,8 +175,8 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
|
|
| 158 |
converted = converter.convert(content)
|
| 159 |
full_response += converted
|
| 160 |
|
| 161 |
-
# Parse thinking blocks and summary
|
| 162 |
-
thinking_blocks, summary = parse_thinking_blocks(full_response)
|
| 163 |
|
| 164 |
# Update thinking field (only show thinking blocks, not raw stream)
|
| 165 |
current_thinking = thinking_blocks if thinking_blocks else ""
|
|
|
|
| 58 |
raise
|
| 59 |
|
| 60 |
|
| 61 |
+
def parse_thinking_blocks(content: str, streaming: bool = False) -> Tuple[str, str]:
|
| 62 |
"""
|
| 63 |
Parse thinking blocks from model output.
|
| 64 |
Supports both <think> and <thinking> tags.
|
| 65 |
|
| 66 |
Args:
|
| 67 |
content: Full model response
|
| 68 |
+
streaming: If True, handle unclosed <think> tags for live display
|
| 69 |
|
| 70 |
Returns:
|
| 71 |
Tuple of (thinking_content, summary_content)
|
| 72 |
"""
|
| 73 |
+
closed_pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
|
| 74 |
+
open_pattern = r'<think(?:ing)?>([^<]*)$'
|
|
|
|
| 75 |
|
| 76 |
+
# Extract completed thinking blocks
|
| 77 |
+
closed_matches = re.findall(closed_pattern, content, re.DOTALL)
|
| 78 |
+
# Remove completed blocks to get summary
|
| 79 |
+
remaining = re.sub(closed_pattern, '', content, flags=re.DOTALL).strip()
|
| 80 |
|
| 81 |
+
thinking_parts = [m.strip() for m in closed_matches if m.strip()]
|
|
|
|
| 82 |
|
| 83 |
+
if streaming:
|
| 84 |
+
# Check for unclosed <think> tag (model still generating thinking tokens)
|
| 85 |
+
open_match = re.search(open_pattern, content, re.DOTALL)
|
| 86 |
+
if open_match:
|
| 87 |
+
partial = open_match.group(1).strip()
|
| 88 |
+
if partial:
|
| 89 |
+
thinking_parts.append(partial)
|
| 90 |
+
# Nothing after the open tag counts as summary yet
|
| 91 |
+
remaining = re.sub(r'<think(?:ing)?>[^<]*$', '', remaining, flags=re.DOTALL).strip()
|
| 92 |
+
|
| 93 |
+
thinking = '\n\n'.join(thinking_parts)
|
| 94 |
+
|
| 95 |
+
if not thinking and not closed_matches:
|
| 96 |
+
# No thinking tags found at all
|
| 97 |
+
return ("", content if not content.startswith('<think') else "")
|
| 98 |
+
|
| 99 |
+
return (thinking, remaining)
|
| 100 |
|
| 101 |
|
| 102 |
def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
|
|
|
|
| 175 |
converted = converter.convert(content)
|
| 176 |
full_response += converted
|
| 177 |
|
| 178 |
+
# Parse thinking blocks and summary (streaming=True for partial tags)
|
| 179 |
+
thinking_blocks, summary = parse_thinking_blocks(full_response, streaming=True)
|
| 180 |
|
| 181 |
# Update thinking field (only show thinking blocks, not raw stream)
|
| 182 |
current_thinking = thinking_blocks if thinking_blocks else ""
|