Spaces:
Sleeping
Sleeping
Chirag-Bhardwaj commited on
Commit ·
d23bfc4
1
Parent(s): a67f0ce
Preserve \n through whitespace cleanup so "new line" survives strip
Browse filesThe "new line" → \n substitution happens before the final cleanup. When
a VAD segment contains only "new line", the resulting lone \n was wiped
by text.strip() and the surrounding \s+ patterns, so the segment came
back as "" and the client discarded it. Switched the whitespace patterns
to [ \t] / [ \t\r] so \n survives to the client.
server.py
CHANGED
|
@@ -78,10 +78,15 @@ def _replace_spoken_punctuation(text: str) -> str:
|
|
| 78 |
text = re.sub(r"\{([,.:;!?/\-+])\}", r"\1", text)
|
| 79 |
for pattern, replacement in _SPOKEN_PUNCTUATION:
|
| 80 |
text = pattern.sub(replacement, text)
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
| 83 |
text = re.sub(r" +", " ", text)
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
# ---------------------------------------------------------------------------
|
|
|
|
| 78 |
text = re.sub(r"\{([,.:;!?/\-+])\}", r"\1", text)
|
| 79 |
for pattern, replacement in _SPOKEN_PUNCTUATION:
|
| 80 |
text = pattern.sub(replacement, text)
|
| 81 |
+
# NB: \s would also match \n that we just inserted via "new line" — use
|
| 82 |
+
# [ \t] so the newline survives to the client.
|
| 83 |
+
text = re.sub(r"[ \t]+([,.:;!?)\]])", r"\1", text)
|
| 84 |
+
text = re.sub(r"([([\[])[ \t]+", r"\1", text)
|
| 85 |
text = re.sub(r" +", " ", text)
|
| 86 |
+
# Trim spaces/tabs/CR only — preserving \n means a segment that's just
|
| 87 |
+
# "new line" (now "\n") doesn't get wiped to empty and discarded by the
|
| 88 |
+
# client as a zero-length segment.
|
| 89 |
+
return re.sub(r"^[ \t\r]+|[ \t\r]+$", "", text)
|
| 90 |
|
| 91 |
|
| 92 |
# ---------------------------------------------------------------------------
|