Decode literal \n escapes so Markdown tables actually render
Browse filesFollow-up to the "..." fix: Quest-4B's endpoint was returning answer
content with newlines stored as the two-character sequence backslash+n
instead of real newlines, so pipe tables collapsed into a one-line blob
like `\n| Color | Hex |\n|---|---|\n...`. Add decode_escaped_whitespace,
call it from extract_answer, and gate the transform on a dominance
heuristic so legitimate code snippets containing a single \n are left
alone.
Made-with: Cursor
app.py
CHANGED
|
@@ -831,6 +831,40 @@ def strip_think_blocks(text: str) -> str:
|
|
| 831 |
)
|
| 832 |
|
| 833 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
def _is_placeholder_answer(text: str) -> bool:
|
| 835 |
return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
|
| 836 |
|
|
@@ -868,7 +902,10 @@ def extract_answer(text: str) -> Optional[str]:
|
|
| 868 |
2. Truncated `<answer>...` with no closing tag (tokens ran out);
|
| 869 |
in that case we take everything after the opening tag.
|
| 870 |
"""
|
| 871 |
-
|
|
|
|
|
|
|
|
|
|
| 872 |
|
| 873 |
full_match = re.search(
|
| 874 |
r"<answer>\s*(.*?)\s*</answer>",
|
|
@@ -876,7 +913,7 @@ def extract_answer(text: str) -> Optional[str]:
|
|
| 876 |
flags=re.DOTALL | re.IGNORECASE,
|
| 877 |
)
|
| 878 |
if full_match is not None:
|
| 879 |
-
candidate = full_match.group(1).strip()
|
| 880 |
if candidate and not _is_placeholder_answer(candidate):
|
| 881 |
return candidate
|
| 882 |
# Closed block was a placeholder / empty: fail fast. Do NOT fall
|
|
@@ -888,7 +925,7 @@ def extract_answer(text: str) -> Optional[str]:
|
|
| 888 |
r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
|
| 889 |
)
|
| 890 |
if open_match is not None:
|
| 891 |
-
candidate = open_match.group(1).strip()
|
| 892 |
if candidate and not _is_placeholder_answer(candidate):
|
| 893 |
return candidate
|
| 894 |
|
|
|
|
| 831 |
)
|
| 832 |
|
| 833 |
|
| 834 |
+
def decode_escaped_whitespace(text: str) -> str:
|
| 835 |
+
"""Decode literal `\\n`/`\\t`/`\\r` sequences back to real whitespace.
|
| 836 |
+
|
| 837 |
+
Some OpenAI-compatible servers (and some vLLM builds when a tokenizer's
|
| 838 |
+
chat template escapes control characters) return `choices[0].message.content`
|
| 839 |
+
with newlines stored as the two-character backslash+n sequence rather than
|
| 840 |
+
as a real newline. That breaks Markdown rendering because a pipe table on
|
| 841 |
+
a single line is not a table — it is just a sentence with `|` in it, which
|
| 842 |
+
is exactly the symptom we saw with:
|
| 843 |
+
|
| 844 |
+
\\n| Color | Hex |\\n|---|---|\\n| Red | #FF0000 |...
|
| 845 |
+
|
| 846 |
+
We only decode when the escapes dominate (at least 3 of them, and at
|
| 847 |
+
least as many as the real newlines in the text). That keeps us from
|
| 848 |
+
corrupting legitimate backslash-n pairs that happen to appear in a code
|
| 849 |
+
sample the model produced.
|
| 850 |
+
"""
|
| 851 |
+
if not text:
|
| 852 |
+
return text
|
| 853 |
+
escaped_newlines = text.count("\\n")
|
| 854 |
+
if escaped_newlines == 0 and "\\t" not in text and "\\r" not in text:
|
| 855 |
+
return text
|
| 856 |
+
real_newlines = text.count("\n")
|
| 857 |
+
if escaped_newlines < max(3, real_newlines + 1):
|
| 858 |
+
return text
|
| 859 |
+
# Preserve real backslashes so that `\\\\n` (an actual `\n` the model
|
| 860 |
+
# wrote) doesn't get collapsed to a newline.
|
| 861 |
+
sentinel = "\x00__BS__\x00"
|
| 862 |
+
out = text.replace("\\\\", sentinel)
|
| 863 |
+
out = out.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t")
|
| 864 |
+
out = out.replace(sentinel, "\\")
|
| 865 |
+
return out
|
| 866 |
+
|
| 867 |
+
|
| 868 |
def _is_placeholder_answer(text: str) -> bool:
|
| 869 |
return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
|
| 870 |
|
|
|
|
| 902 |
2. Truncated `<answer>...` with no closing tag (tokens ran out);
|
| 903 |
in that case we take everything after the opening tag.
|
| 904 |
"""
|
| 905 |
+
# Decode escaped whitespace on the whole output first so the <answer>
|
| 906 |
+
# regex can actually match the opening and closing tags across lines.
|
| 907 |
+
decoded = decode_escaped_whitespace(text or "")
|
| 908 |
+
cleaned = strip_think_blocks(decoded)
|
| 909 |
|
| 910 |
full_match = re.search(
|
| 911 |
r"<answer>\s*(.*?)\s*</answer>",
|
|
|
|
| 913 |
flags=re.DOTALL | re.IGNORECASE,
|
| 914 |
)
|
| 915 |
if full_match is not None:
|
| 916 |
+
candidate = decode_escaped_whitespace(full_match.group(1).strip())
|
| 917 |
if candidate and not _is_placeholder_answer(candidate):
|
| 918 |
return candidate
|
| 919 |
# Closed block was a placeholder / empty: fail fast. Do NOT fall
|
|
|
|
| 925 |
r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
|
| 926 |
)
|
| 927 |
if open_match is not None:
|
| 928 |
+
candidate = decode_escaped_whitespace(open_match.group(1).strip())
|
| 929 |
if candidate and not _is_placeholder_answer(candidate):
|
| 930 |
return candidate
|
| 931 |
|