tao-shen Claude Opus 4.6 commited on
Commit
44f1609
·
1 Parent(s): bcd1283

fix: improve bilingual parsing to split English/Chinese by paragraph

Browse files

LLM often ignores the --- separator and outputs English paragraph
followed by Chinese paragraph. New fallback detects Chinese chars
to find the split point, so chatlog EN/ZH toggle works correctly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/conversation-loop.py +26 -0
scripts/conversation-loop.py CHANGED
@@ -657,6 +657,10 @@ def call_llm(system_prompt, user_prompt):
657
  return ""
658
 
659
 
 
 
 
 
660
  def parse_bilingual(text):
661
  """Parse bilingual response into (en, zh). Handle action tags gracefully."""
662
  # Remove action tags and content blocks for display
@@ -664,6 +668,7 @@ def parse_bilingual(text):
664
  display = re.sub(r'\[CONTENT\].*?\[/CONTENT\]', '', display, flags=re.DOTALL)
665
  display = display.strip()
666
 
 
667
  if '\n---\n' in display:
668
  parts = display.split('\n---\n', 1)
669
  return parts[0].strip(), parts[1].strip()
@@ -672,6 +677,27 @@ def parse_bilingual(text):
672
  en, zh = parts[0].strip(), parts[1].strip()
673
  if en and zh:
674
  return en, zh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  return display, display
676
 
677
 
 
657
  return ""
658
 
659
 
660
+ def _has_chinese(s):
661
+ """Check if string contains Chinese characters."""
662
+ return bool(re.search(r'[\u4e00-\u9fff]', s))
663
+
664
  def parse_bilingual(text):
665
  """Parse bilingual response into (en, zh). Handle action tags gracefully."""
666
  # Remove action tags and content blocks for display
 
668
  display = re.sub(r'\[CONTENT\].*?\[/CONTENT\]', '', display, flags=re.DOTALL)
669
  display = display.strip()
670
 
671
+ # 1. Explicit --- separator
672
  if '\n---\n' in display:
673
  parts = display.split('\n---\n', 1)
674
  return parts[0].strip(), parts[1].strip()
 
677
  en, zh = parts[0].strip(), parts[1].strip()
678
  if en and zh:
679
  return en, zh
680
+
681
+ # 2. Fallback: split on double-newline between English and Chinese paragraphs
682
+ paragraphs = re.split(r'\n{2,}', display)
683
+ if len(paragraphs) >= 2:
684
+ # Find the split point: first paragraph with Chinese is the start of zh
685
+ en_parts = []
686
+ zh_parts = []
687
+ found_zh = False
688
+ for p in paragraphs:
689
+ p = p.strip()
690
+ if not p:
691
+ continue
692
+ if not found_zh and _has_chinese(p):
693
+ found_zh = True
694
+ if found_zh:
695
+ zh_parts.append(p)
696
+ else:
697
+ en_parts.append(p)
698
+ if en_parts and zh_parts:
699
+ return '\n\n'.join(en_parts), '\n\n'.join(zh_parts)
700
+
701
  return display, display
702
 
703