Luigi Claude Opus 4.5 commited on
Commit
96d104f
·
1 Parent(s): 7b4eca1

Fix: Stream thinking tokens live instead of showing all at once

Browse files

The regex required a complete <think>...</think> block to match,
so nothing appeared in the Thinking field until the closing tag
arrived. Now parse_thinking_blocks() accepts a streaming flag
that detects unclosed <think> tags and extracts partial content
for real-time display.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -58,28 +58,45 @@ def load_model():
58
  raise
59
 
60
 
61
- def parse_thinking_blocks(content: str) -> Tuple[str, str]:
62
  """
63
  Parse thinking blocks from model output.
64
  Supports both <think> and <thinking> tags.
65
 
66
  Args:
67
  content: Full model response
 
68
 
69
  Returns:
70
  Tuple of (thinking_content, summary_content)
71
  """
72
- # Match both <think> and <thinking> tags
73
- pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
74
- matches = re.findall(pattern, content, re.DOTALL)
75
 
76
- if not matches:
77
- return ("", content)
 
 
78
 
79
- thinking = '\n\n'.join(match.strip() for match in matches)
80
- summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
81
 
82
- return (thinking, summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
 
85
  def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
@@ -158,8 +175,8 @@ def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0
158
  converted = converter.convert(content)
159
  full_response += converted
160
 
161
- # Parse thinking blocks and summary from accumulated response
162
- thinking_blocks, summary = parse_thinking_blocks(full_response)
163
 
164
  # Update thinking field (only show thinking blocks, not raw stream)
165
  current_thinking = thinking_blocks if thinking_blocks else ""
 
58
  raise
59
 
60
 
61
+ def parse_thinking_blocks(content: str, streaming: bool = False) -> Tuple[str, str]:
62
  """
63
  Parse thinking blocks from model output.
64
  Supports both <think> and <thinking> tags.
65
 
66
  Args:
67
  content: Full model response
68
+ streaming: If True, handle unclosed <think> tags for live display
69
 
70
  Returns:
71
  Tuple of (thinking_content, summary_content)
72
  """
73
+ closed_pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
74
+ open_pattern = r'<think(?:ing)?>([^<]*)$'
 
75
 
76
+ # Extract completed thinking blocks
77
+ closed_matches = re.findall(closed_pattern, content, re.DOTALL)
78
+ # Remove completed blocks to get summary
79
+ remaining = re.sub(closed_pattern, '', content, flags=re.DOTALL).strip()
80
 
81
+ thinking_parts = [m.strip() for m in closed_matches if m.strip()]
 
82
 
83
+ if streaming:
84
+ # Check for unclosed <think> tag (model still generating thinking tokens)
85
+ open_match = re.search(open_pattern, content, re.DOTALL)
86
+ if open_match:
87
+ partial = open_match.group(1).strip()
88
+ if partial:
89
+ thinking_parts.append(partial)
90
+ # Nothing after the open tag counts as summary yet
91
+ remaining = re.sub(r'<think(?:ing)?>[^<]*$', '', remaining, flags=re.DOTALL).strip()
92
+
93
+ thinking = '\n\n'.join(thinking_parts)
94
+
95
+ if not thinking and not closed_matches:
96
+ # No thinking tags found at all
97
+ return ("", content if not content.startswith('<think') else "")
98
+
99
+ return (thinking, remaining)
100
 
101
 
102
  def summarize_streaming(file_obj, max_tokens: int = 2048, temperature: float = 0.6) -> Generator[Tuple[str, str], None, None]:
 
175
  converted = converter.convert(content)
176
  full_response += converted
177
 
178
+ # Parse thinking blocks and summary (streaming=True for partial tags)
179
+ thinking_blocks, summary = parse_thinking_blocks(full_response, streaming=True)
180
 
181
  # Update thinking field (only show thinking blocks, not raw stream)
182
  current_thinking = thinking_blocks if thinking_blocks else ""