ming commited on
Commit
bf21a65
·
1 Parent(s): fe47248

fix: Use Qwen chat template and harden NDJSON parsing

Browse files

STEP 1: Use Qwen2.5 native chat template
- Replace manual Phi-3 format with tokenizer.apply_chat_template()
- Ensures correct special tokens for Qwen models
- Adds explicit NDJSON-only reminder in user message
- Critical fix: previous Phi-3 format was incompatible with Qwen

STEP 2: Harden NDJSON parsing
- Add heuristic check: skip lines not starting with '{' or missing 'op'
- Prevents parsing C++ code, prose, or random hallucinations
- Reduces log spam from invalid JSON parsing attempts
- Cleaner error messages for debugging

This should fix the model hallucination issue where Qwen2.5-1.5B
was generating C++ code instead of NDJSON patches.

app/services/structured_summarizer.py CHANGED
@@ -207,7 +207,7 @@ Rules:
207
  return False
208
 
209
  def _build_prompt(self, text: str, style: str) -> str:
210
- """Build the complete prompt for Phi-3."""
211
  system_prompt = self._build_system_prompt()
212
  style_instruction = self._build_style_instruction(style)
213
 
@@ -217,15 +217,30 @@ Rules:
217
  text = text[:max_chars]
218
  logger.warning(f"Truncated text from {len(text)} to {max_chars} chars")
219
 
220
- # Phi-3 chat template format
221
- full_prompt = (
222
- f"<|system|>\n{system_prompt}\n<|end|>\n"
223
- f"<|user|>\n{style_instruction}\n\nArticle:\n{text}\n<|end|>\n"
224
- f"<|assistant|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  )
226
 
227
- return full_prompt
228
-
229
  async def summarize_structured_stream(
230
  self,
231
  text: str,
@@ -416,11 +431,21 @@ Rules:
416
  if not line:
417
  continue
418
 
 
 
 
 
 
 
 
 
419
  # Try to parse JSON patch
420
  try:
421
  patch = json.loads(line)
422
  except json.JSONDecodeError as e:
423
- logger.warning(f"Failed to parse NDJSON line: {line[:100]}... Error: {e}")
 
 
424
  continue
425
 
426
  # Apply patch to state
 
207
  return False
208
 
209
  def _build_prompt(self, text: str, style: str) -> str:
210
+ """Build the complete prompt for Qwen2.5 using its chat template."""
211
  system_prompt = self._build_system_prompt()
212
  style_instruction = self._build_style_instruction(style)
213
 
 
217
  text = text[:max_chars]
218
  logger.warning(f"Truncated text from {len(text)} to {max_chars} chars")
219
 
220
+ messages = [
221
+ {
222
+ "role": "system",
223
+ "content": system_prompt,
224
+ },
225
+ {
226
+ "role": "user",
227
+ "content": (
228
+ f"{style_instruction}\n\n"
229
+ f"Article:\n{text}\n\n"
230
+ "Remember: respond ONLY with newline-delimited JSON patch objects "
231
+ "as described in the system message. "
232
+ "No explanations, no comments, no markdown, no code, no prose."
233
+ ),
234
+ },
235
+ ]
236
+
237
+ # Let Qwen's tokenizer construct the correct special tokens and format
238
+ return self.tokenizer.apply_chat_template(
239
+ messages,
240
+ tokenize=False,
241
+ add_generation_prompt=True,
242
  )
243
 
 
 
244
  async def summarize_structured_stream(
245
  self,
246
  text: str,
 
431
  if not line:
432
  continue
433
 
434
+ # Heuristic: skip anything that clearly isn't a JSON patch object
435
+ # This filters out lines like "#include <bits/stdc++.h>" or random prose.
436
+ if not line.startswith("{") or "op" not in line:
437
+ logger.warning(
438
+ f"Skipping non-JSON-looking line: {line[:80]}..."
439
+ )
440
+ continue
441
+
442
  # Try to parse JSON patch
443
  try:
444
  patch = json.loads(line)
445
  except json.JSONDecodeError as e:
446
+ logger.warning(
447
+ f"Failed to parse NDJSON line: {line[:100]}... Error: {e}"
448
+ )
449
  continue
450
 
451
  # Apply patch to state