KiWA001 commited on
Commit
0dd1f34
·
1 Parent(s): 1734552

fix: stronger unescape logic using unicode_escape codec

Browse files
Files changed (1) hide show
  1. sanitizer.py +10 -8
sanitizer.py CHANGED
@@ -110,14 +110,16 @@ def sanitize_response(text: str) -> str:
110
  cleaned = ARTIFACT_N_PATTERN.sub("\n", cleaned)
111
 
112
  # === Unescape JSON/Raw Literals ===
113
- # User wants "\n" to be actual newline and "\"" to be actual quote.
114
- # We try to unescape, but carefully.
115
- if "\\n" in cleaned or '\\"' in cleaned:
116
- try:
117
- # First try standard replace for safety and speed
118
- cleaned = cleaned.replace("\\n", "\n").replace('\\"', '"')
119
- except Exception:
120
- pass
 
 
121
 
122
  # === Spam Removal ===
123
  for pattern in COMPILED_SPAM:
 
110
  cleaned = ARTIFACT_N_PATTERN.sub("\n", cleaned)
111
 
112
  # === Unescape JSON/Raw Literals ===
113
+ # Robustly decode escape sequences like \n, \", \t using Python's codec
114
+ try:
115
+ # If it looks like a JSON string literal (wrapped in quotes), strip them first
116
+ if cleaned.startswith('"') and cleaned.endswith('"'):
117
+ cleaned = cleaned[1:-1]
118
+
119
+ cleaned = cleaned.encode('utf-8').decode('unicode_escape')
120
+ except Exception:
121
+ # Fallback to manual replacement if codec fails
122
+ cleaned = cleaned.replace("\\n", "\n").replace('\\"', '"')
123
 
124
  # === Spam Removal ===
125
  for pattern in COMPILED_SPAM: