Spaces:
Sleeping
Sleeping
fix: distribute text by sentence boundaries instead of character proportion
Browse filesSplit transcript at punctuation marks before assigning to VAD segments,
so sentences are never cut mid-phrase. Falls back to character-level for
text without punctuation.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- model/voxtral-server/main.py +39 -26
model/voxtral-server/main.py
CHANGED
|
@@ -259,44 +259,57 @@ def _segments_from_vad(audio: np.ndarray, sr: int) -> tuple[list[dict], str]:
|
|
| 259 |
return segs, "vad"
|
| 260 |
|
| 261 |
|
| 262 |
-
def
|
| 263 |
-
"""Split text into
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
if len(tokens) <= 1 and len(text) > 1:
|
| 268 |
-
return list(text)
|
| 269 |
-
return tokens
|
| 270 |
|
| 271 |
|
| 272 |
def _distribute_text(full_text: str, segs: list[dict]) -> list[dict]:
|
| 273 |
-
"""
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
|
|
|
| 277 |
return [{**s, "text": ""} for s in segs]
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
total_dur = sum(s["end"] - s["start"] for s in segs)
|
| 280 |
if total_dur <= 0:
|
| 281 |
-
|
| 282 |
-
return result + [{**s, "text": ""} for s in segs[1:]]
|
| 283 |
|
| 284 |
is_cjk = len(full_text.split()) <= 1 and len(full_text) > 1
|
| 285 |
sep = "" if is_cjk else " "
|
| 286 |
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
| 289 |
for i, seg in enumerate(segs):
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
|
| 302 |
# βββ Emotion analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 259 |
return segs, "vad"
|
| 260 |
|
| 261 |
|
| 262 |
+
def _split_sentences(text: str) -> list[str]:
|
| 263 |
+
"""Split text into sentences at punctuation boundaries (CJK + Latin)."""
|
| 264 |
+
import re
|
| 265 |
+
parts = re.split(r'(?<=[οΌοΌγ?!])\s*', text)
|
| 266 |
+
return [p for p in parts if p.strip()]
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
|
| 269 |
def _distribute_text(full_text: str, segs: list[dict]) -> list[dict]:
|
| 270 |
+
"""Assign complete sentences to segments by time proportion.
|
| 271 |
+
Sentences are never split mid-punctuation; each segment gets whole sentences.
|
| 272 |
+
Falls back to character-level splitting if no sentence boundaries found.
|
| 273 |
+
"""
|
| 274 |
+
if not full_text or not segs:
|
| 275 |
return [{**s, "text": ""} for s in segs]
|
| 276 |
|
| 277 |
+
if len(segs) == 1:
|
| 278 |
+
return [{**segs[0], "text": full_text}]
|
| 279 |
+
|
| 280 |
+
sentences = _split_sentences(full_text)
|
| 281 |
+
# Fallback: split by character if no sentence boundaries
|
| 282 |
+
if len(sentences) <= 1:
|
| 283 |
+
is_cjk = len(full_text.split()) <= 1
|
| 284 |
+
sentences = list(full_text) if is_cjk else full_text.split()
|
| 285 |
+
|
| 286 |
total_dur = sum(s["end"] - s["start"] for s in segs)
|
| 287 |
if total_dur <= 0:
|
| 288 |
+
return [{**segs[0], "text": full_text}] + [{**s, "text": ""} for s in segs[1:]]
|
|
|
|
| 289 |
|
| 290 |
is_cjk = len(full_text.split()) <= 1 and len(full_text) > 1
|
| 291 |
sep = "" if is_cjk else " "
|
| 292 |
|
| 293 |
+
# Assign each sentence to the segment whose cumulative time covers its proportional position
|
| 294 |
+
n = len(sentences)
|
| 295 |
+
result_texts: list[list[str]] = [[] for _ in segs]
|
| 296 |
+
|
| 297 |
+
cumulative = 0.0
|
| 298 |
for i, seg in enumerate(segs):
|
| 299 |
+
cumulative += (seg["end"] - seg["start"]) / total_dur
|
| 300 |
+
# Assign sentences whose proportional position falls within this segment's cumulative range
|
| 301 |
+
threshold = cumulative * n
|
| 302 |
+
while len(result_texts[i]) + sum(len(t) for t in result_texts[:i]) < round(threshold):
|
| 303 |
+
idx = sum(len(t) for t in result_texts)
|
| 304 |
+
if idx >= n:
|
| 305 |
+
break
|
| 306 |
+
result_texts[i].append(sentences[idx])
|
| 307 |
+
|
| 308 |
+
# Ensure any leftover sentences go to the last segment
|
| 309 |
+
assigned = sum(len(t) for t in result_texts)
|
| 310 |
+
result_texts[-1].extend(sentences[assigned:])
|
| 311 |
+
|
| 312 |
+
return [{**seg, "text": sep.join(texts)} for seg, texts in zip(segs, result_texts)]
|
| 313 |
|
| 314 |
|
| 315 |
# βββ Emotion analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|