TomLii commited on
Commit
34518d3
·
1 Parent(s): 1a201e4

Decode literal \n escapes so Markdown tables actually render

Browse files

Follow-up to the "..." fix: Quest-4B's endpoint was returning answer
content with newlines stored as the two-character sequence backslash+n
instead of real newlines, so pipe tables collapsed into a one-line blob
like `\n| Color | Hex |\n|---|---|\n...`. Add decode_escaped_whitespace,
call it from extract_answer, and gate the transform on a dominance
heuristic so legitimate code snippets containing a single \n are left
alone.

Made-with: Cursor

Files changed (1) hide show
  1. app.py +40 -3
app.py CHANGED
@@ -831,6 +831,40 @@ def strip_think_blocks(text: str) -> str:
831
  )
832
 
833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
834
  def _is_placeholder_answer(text: str) -> bool:
835
  return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
836
 
@@ -868,7 +902,10 @@ def extract_answer(text: str) -> Optional[str]:
868
  2. Truncated `<answer>...` with no closing tag (tokens ran out);
869
  in that case we take everything after the opening tag.
870
  """
871
- cleaned = strip_think_blocks(text or "")
 
 
 
872
 
873
  full_match = re.search(
874
  r"<answer>\s*(.*?)\s*</answer>",
@@ -876,7 +913,7 @@ def extract_answer(text: str) -> Optional[str]:
876
  flags=re.DOTALL | re.IGNORECASE,
877
  )
878
  if full_match is not None:
879
- candidate = full_match.group(1).strip()
880
  if candidate and not _is_placeholder_answer(candidate):
881
  return candidate
882
  # Closed block was a placeholder / empty: fail fast. Do NOT fall
@@ -888,7 +925,7 @@ def extract_answer(text: str) -> Optional[str]:
888
  r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
889
  )
890
  if open_match is not None:
891
- candidate = open_match.group(1).strip()
892
  if candidate and not _is_placeholder_answer(candidate):
893
  return candidate
894
 
 
831
  )
832
 
833
 
834
+ def decode_escaped_whitespace(text: str) -> str:
835
+ """Decode literal `\\n`/`\\t`/`\\r` sequences back to real whitespace.
836
+
837
+ Some OpenAI-compatible servers (and some vLLM builds when a tokenizer's
838
+ chat template escapes control characters) return `choices[0].message.content`
839
+ with newlines stored as the two-character backslash+n sequence rather than
840
+ as a real newline. That breaks Markdown rendering because a pipe table on
841
+ a single line is not a table — it is just a sentence with `|` in it, which
842
+ is exactly the symptom we saw with:
843
+
844
+ \\n| Color | Hex |\\n|---|---|\\n| Red | #FF0000 |...
845
+
846
+ We only decode when the escapes dominate (at least 3 of them, and at
847
+ least as many as the real newlines in the text). That keeps us from
848
+ corrupting legitimate backslash-n pairs that happen to appear in a code
849
+ sample the model produced.
850
+ """
851
+ if not text:
852
+ return text
853
+ escaped_newlines = text.count("\\n")
854
+ if escaped_newlines == 0 and "\\t" not in text and "\\r" not in text:
855
+ return text
856
+ real_newlines = text.count("\n")
857
+ if escaped_newlines < max(3, real_newlines + 1):
858
+ return text
859
+ # Preserve real backslashes so that `\\\\n` (an actual `\n` the model
860
+ # wrote) doesn't get collapsed to a newline.
861
+ sentinel = "\x00__BS__\x00"
862
+ out = text.replace("\\\\", sentinel)
863
+ out = out.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t")
864
+ out = out.replace(sentinel, "\\")
865
+ return out
866
+
867
+
868
  def _is_placeholder_answer(text: str) -> bool:
869
  return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
870
 
 
902
  2. Truncated `<answer>...` with no closing tag (tokens ran out);
903
  in that case we take everything after the opening tag.
904
  """
905
+ # Decode escaped whitespace on the whole output first so the <answer>
906
+ # regex can actually match the opening and closing tags across lines.
907
+ decoded = decode_escaped_whitespace(text or "")
908
+ cleaned = strip_think_blocks(decoded)
909
 
910
  full_match = re.search(
911
  r"<answer>\s*(.*?)\s*</answer>",
 
913
  flags=re.DOTALL | re.IGNORECASE,
914
  )
915
  if full_match is not None:
916
+ candidate = decode_escaped_whitespace(full_match.group(1).strip())
917
  if candidate and not _is_placeholder_answer(candidate):
918
  return candidate
919
  # Closed block was a placeholder / empty: fail fast. Do NOT fall
 
925
  r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
926
  )
927
  if open_match is not None:
928
+ candidate = decode_escaped_whitespace(open_match.group(1).strip())
929
  if candidate and not _is_placeholder_answer(candidate):
930
  return candidate
931