Spaces:

osunlp
/

QUEST

Running

TomLii commited on 26 days ago

Commit

34518d3

1 Parent(s): 1a201e4

Decode literal \n escapes so Markdown tables actually render

Follow-up to the "..." fix: Quest-4B's endpoint was returning answer
content with newlines stored as the two-character sequence backslash+n
instead of real newlines, so pipe tables collapsed into a one-line blob
like `\n| Color | Hex |\n|---|---|\n...`. Add decode_escaped_whitespace,
call it from extract_answer, and gate the transform on a dominance
heuristic so legitimate code snippets containing a single \n are left
alone.

Made-with: Cursor

Files changed (1) hide show

app.py +40 -3

app.py CHANGED Viewed

@@ -831,6 +831,40 @@ def strip_think_blocks(text: str) -> str:
     )
 def _is_placeholder_answer(text: str) -> bool:
     return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
@@ -868,7 +902,10 @@ def extract_answer(text: str) -> Optional[str]:
     2. Truncated `<answer>...` with no closing tag (tokens ran out);
        in that case we take everything after the opening tag.
     """
-    cleaned = strip_think_blocks(text or "")
     full_match = re.search(
         r"<answer>\s*(.*?)\s*</answer>",
@@ -876,7 +913,7 @@ def extract_answer(text: str) -> Optional[str]:
         flags=re.DOTALL | re.IGNORECASE,
     )
     if full_match is not None:
-        candidate = full_match.group(1).strip()
         if candidate and not _is_placeholder_answer(candidate):
             return candidate
         # Closed block was a placeholder / empty: fail fast. Do NOT fall
@@ -888,7 +925,7 @@ def extract_answer(text: str) -> Optional[str]:
         r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
     )
     if open_match is not None:
-        candidate = open_match.group(1).strip()
         if candidate and not _is_placeholder_answer(candidate):
             return candidate

     )
+def decode_escaped_whitespace(text: str) -> str:
+    """Decode literal `\\n`/`\\t`/`\\r` sequences back to real whitespace.
+    Some OpenAI-compatible servers (and some vLLM builds when a tokenizer's
+    chat template escapes control characters) return `choices[0].message.content`
+    with newlines stored as the two-character backslash+n sequence rather than
+    as a real newline. That breaks Markdown rendering because a pipe table on
+    a single line is not a table — it is just a sentence with `|` in it, which
+    is exactly the symptom we saw with:
+        \\n| Color | Hex |\\n|---|---|\\n| Red | #FF0000 |...
+    We only decode when the escapes dominate (at least 3 of them, and at
+    least as many as the real newlines in the text). That keeps us from
+    corrupting legitimate backslash-n pairs that happen to appear in a code
+    sample the model produced.
+    """
+    if not text:
+        return text
+    escaped_newlines = text.count("\\n")
+    if escaped_newlines == 0 and "\\t" not in text and "\\r" not in text:
+        return text
+    real_newlines = text.count("\n")
+    if escaped_newlines < max(3, real_newlines + 1):
+        return text
+    # Preserve real backslashes so that `\\\\n` (an actual `\n` the model
+    # wrote) doesn't get collapsed to a newline.
+    sentinel = "\x00__BS__\x00"
+    out = text.replace("\\\\", sentinel)
+    out = out.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t")
+    out = out.replace(sentinel, "\\")
+    return out
 def _is_placeholder_answer(text: str) -> bool:
     return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
     2. Truncated `<answer>...` with no closing tag (tokens ran out);
        in that case we take everything after the opening tag.
     """
+    # Decode escaped whitespace on the whole output first so the <answer>
+    # regex can actually match the opening and closing tags across lines.
+    decoded = decode_escaped_whitespace(text or "")
+    cleaned = strip_think_blocks(decoded)
     full_match = re.search(
         r"<answer>\s*(.*?)\s*</answer>",
         flags=re.DOTALL | re.IGNORECASE,
     )
     if full_match is not None:
+        candidate = decode_escaped_whitespace(full_match.group(1).strip())
         if candidate and not _is_placeholder_answer(candidate):
             return candidate
         # Closed block was a placeholder / empty: fail fast. Do NOT fall
         r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
     )
     if open_match is not None:
+        candidate = decode_escaped_whitespace(open_match.group(1).strip())
         if candidate and not _is_placeholder_answer(candidate):
             return candidate