Spaces:
Sleeping
Sleeping
ming commited on
Commit ·
bf21a65
1
Parent(s): fe47248
fix: Use Qwen chat template and harden NDJSON parsing
Browse filesSTEP 1: Use Qwen2.5 native chat template
- Replace manual Phi-3 format with tokenizer.apply_chat_template()
- Ensures correct special tokens for Qwen models
- Adds explicit NDJSON-only reminder in user message
- Critical fix: previous Phi-3 format was incompatible with Qwen
STEP 2: Harden NDJSON parsing
- Add heuristic check: skip lines not starting with '{' or missing 'op'
- Prevents parsing C++ code, prose, or random hallucinations
- Reduces log spam from invalid JSON parsing attempts
- Cleaner error messages for debugging
This should fix the model hallucination issue where Qwen2.5-1.5B
was generating C++ code instead of NDJSON patches.
app/services/structured_summarizer.py
CHANGED
|
@@ -207,7 +207,7 @@ Rules:
|
|
| 207 |
return False
|
| 208 |
|
| 209 |
def _build_prompt(self, text: str, style: str) -> str:
|
| 210 |
-
"""Build the complete prompt for
|
| 211 |
system_prompt = self._build_system_prompt()
|
| 212 |
style_instruction = self._build_style_instruction(style)
|
| 213 |
|
|
@@ -217,15 +217,30 @@ Rules:
|
|
| 217 |
text = text[:max_chars]
|
| 218 |
logger.warning(f"Truncated text from {len(text)} to {max_chars} chars")
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
)
|
| 226 |
|
| 227 |
-
return full_prompt
|
| 228 |
-
|
| 229 |
async def summarize_structured_stream(
|
| 230 |
self,
|
| 231 |
text: str,
|
|
@@ -416,11 +431,21 @@ Rules:
|
|
| 416 |
if not line:
|
| 417 |
continue
|
| 418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
# Try to parse JSON patch
|
| 420 |
try:
|
| 421 |
patch = json.loads(line)
|
| 422 |
except json.JSONDecodeError as e:
|
| 423 |
-
logger.warning(
|
|
|
|
|
|
|
| 424 |
continue
|
| 425 |
|
| 426 |
# Apply patch to state
|
|
|
|
| 207 |
return False
|
| 208 |
|
| 209 |
def _build_prompt(self, text: str, style: str) -> str:
|
| 210 |
+
"""Build the complete prompt for Qwen2.5 using its chat template."""
|
| 211 |
system_prompt = self._build_system_prompt()
|
| 212 |
style_instruction = self._build_style_instruction(style)
|
| 213 |
|
|
|
|
| 217 |
text = text[:max_chars]
|
| 218 |
logger.warning(f"Truncated text from {len(text)} to {max_chars} chars")
|
| 219 |
|
| 220 |
+
messages = [
|
| 221 |
+
{
|
| 222 |
+
"role": "system",
|
| 223 |
+
"content": system_prompt,
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"role": "user",
|
| 227 |
+
"content": (
|
| 228 |
+
f"{style_instruction}\n\n"
|
| 229 |
+
f"Article:\n{text}\n\n"
|
| 230 |
+
"Remember: respond ONLY with newline-delimited JSON patch objects "
|
| 231 |
+
"as described in the system message. "
|
| 232 |
+
"No explanations, no comments, no markdown, no code, no prose."
|
| 233 |
+
),
|
| 234 |
+
},
|
| 235 |
+
]
|
| 236 |
+
|
| 237 |
+
# Let Qwen's tokenizer construct the correct special tokens and format
|
| 238 |
+
return self.tokenizer.apply_chat_template(
|
| 239 |
+
messages,
|
| 240 |
+
tokenize=False,
|
| 241 |
+
add_generation_prompt=True,
|
| 242 |
)
|
| 243 |
|
|
|
|
|
|
|
| 244 |
async def summarize_structured_stream(
|
| 245 |
self,
|
| 246 |
text: str,
|
|
|
|
| 431 |
if not line:
|
| 432 |
continue
|
| 433 |
|
| 434 |
+
# Heuristic: skip anything that clearly isn't a JSON patch object
|
| 435 |
+
# This filters out lines like "#include <bits/stdc++.h>" or random prose.
|
| 436 |
+
if not line.startswith("{") or "op" not in line:
|
| 437 |
+
logger.warning(
|
| 438 |
+
f"Skipping non-JSON-looking line: {line[:80]}..."
|
| 439 |
+
)
|
| 440 |
+
continue
|
| 441 |
+
|
| 442 |
# Try to parse JSON patch
|
| 443 |
try:
|
| 444 |
patch = json.loads(line)
|
| 445 |
except json.JSONDecodeError as e:
|
| 446 |
+
logger.warning(
|
| 447 |
+
f"Failed to parse NDJSON line: {line[:100]}... Error: {e}"
|
| 448 |
+
)
|
| 449 |
continue
|
| 450 |
|
| 451 |
# Apply patch to state
|