Spaces:
Running
Running
Fix: Support both <think> and <thinking> tag formats in parser
Browse files- Update regex pattern to match both <think> and <thinking> tags
- Fixes issue where models using <think> tags had all output in Thinking field
- Summary field now correctly displays content outside thinking blocks
- Applied to both app.py (Gradio) and summarize_transcript.py (CLI)
- Updated CLAUDE.md documentation
Resolves: Summary output remaining empty when model uses <think> tags
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- CLAUDE.md +6 -4
- app.py +3 -1
- summarize_transcript.py +5 -3
CLAUDE.md
CHANGED
|
@@ -135,14 +135,16 @@ def summarize_streaming(...) -> Generator[Tuple[str, str], None, None]:
|
|
| 135 |
|
| 136 |
### Thinking Block Parsing
|
| 137 |
|
| 138 |
-
Models may wrap reasoning in special tags that should be separated from final output
|
| 139 |
|
| 140 |
-
|
| 141 |
-
-
|
|
|
|
| 142 |
|
| 143 |
Regex pattern:
|
| 144 |
```python
|
| 145 |
-
|
|
|
|
| 146 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 147 |
thinking = '\n\n'.join(match.strip() for match in matches)
|
| 148 |
summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
|
|
|
|
| 135 |
|
| 136 |
### Thinking Block Parsing
|
| 137 |
|
| 138 |
+
Models may wrap reasoning in special tags that should be separated from final output.
|
| 139 |
|
| 140 |
+
**Both versions now support both tag formats:**
|
| 141 |
+
- `<think>reasoning</think>` (common with Qwen models)
|
| 142 |
+
- `<thinking>reasoning</thinking>` (Claude-style)
|
| 143 |
|
| 144 |
Regex pattern:
|
| 145 |
```python
|
| 146 |
+
# Matches both <think> and <thinking> tags
|
| 147 |
+
pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
|
| 148 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 149 |
thinking = '\n\n'.join(match.strip() for match in matches)
|
| 150 |
summary = re.sub(pattern, '', content, flags=re.DOTALL).strip()
|
app.py
CHANGED
|
@@ -61,6 +61,7 @@ def load_model():
|
|
| 61 |
def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
| 62 |
"""
|
| 63 |
Parse thinking blocks from model output.
|
|
|
|
| 64 |
|
| 65 |
Args:
|
| 66 |
content: Full model response
|
|
@@ -68,7 +69,8 @@ def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
|
| 68 |
Returns:
|
| 69 |
Tuple of (thinking_content, summary_content)
|
| 70 |
"""
|
| 71 |
-
|
|
|
|
| 72 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 73 |
|
| 74 |
if not matches:
|
|
|
|
| 61 |
def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
| 62 |
"""
|
| 63 |
Parse thinking blocks from model output.
|
| 64 |
+
Supports both <think> and <thinking> tags.
|
| 65 |
|
| 66 |
Args:
|
| 67 |
content: Full model response
|
|
|
|
| 69 |
Returns:
|
| 70 |
Tuple of (thinking_content, summary_content)
|
| 71 |
"""
|
| 72 |
+
# Match both <think> and <thinking> tags
|
| 73 |
+
pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
|
| 74 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 75 |
|
| 76 |
if not matches:
|
summarize_transcript.py
CHANGED
|
@@ -36,17 +36,19 @@ def read_transcript(file_path):
|
|
| 36 |
|
| 37 |
def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
| 38 |
"""
|
| 39 |
-
Parse thinking blocks from
|
|
|
|
| 40 |
|
| 41 |
Args:
|
| 42 |
content: Full model response containing thinking blocks and summary
|
| 43 |
|
| 44 |
Returns:
|
| 45 |
Tuple of (thinking_content, summary_content)
|
| 46 |
-
- thinking_content: All text between <think> tags (or empty string)
|
| 47 |
- summary_content: All text outside thinking blocks (or full content if no tags)
|
| 48 |
"""
|
| 49 |
-
|
|
|
|
| 50 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 51 |
|
| 52 |
if not matches:
|
|
|
|
| 36 |
|
| 37 |
def parse_thinking_blocks(content: str) -> Tuple[str, str]:
|
| 38 |
"""
|
| 39 |
+
Parse thinking blocks from model output.
|
| 40 |
+
Supports both <think> and <thinking> tags.
|
| 41 |
|
| 42 |
Args:
|
| 43 |
content: Full model response containing thinking blocks and summary
|
| 44 |
|
| 45 |
Returns:
|
| 46 |
Tuple of (thinking_content, summary_content)
|
| 47 |
+
- thinking_content: All text between <think>/<thinking> tags (or empty string)
|
| 48 |
- summary_content: All text outside thinking blocks (or full content if no tags)
|
| 49 |
"""
|
| 50 |
+
# Match both <think> and <thinking> tags
|
| 51 |
+
pattern = r'<think(?:ing)?>(.*?)</think(?:ing)?>'
|
| 52 |
matches = re.findall(pattern, content, re.DOTALL)
|
| 53 |
|
| 54 |
if not matches:
|