Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,6 @@ import hashlib
|
|
| 13 |
import json
|
| 14 |
from pathlib import Path
|
| 15 |
from tqdm.asyncio import tqdm
|
| 16 |
-
import ssml.builder as ssml
|
| 17 |
|
| 18 |
class TimingManager:
|
| 19 |
def __init__(self):
|
|
@@ -206,17 +205,62 @@ class AudioCache:
|
|
| 206 |
cache_file = self.cache_dir / f"{cache_key}.wav"
|
| 207 |
audio.export(str(cache_file), format="wav")
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
class SpeechEnhancer:
|
| 210 |
@staticmethod
|
| 211 |
def add_speech_marks(text: str) -> str:
|
| 212 |
"""Add SSML marks for better speech control"""
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
@staticmethod
|
| 222 |
def enhance_timing(segment: Segment) -> Segment:
|
|
|
|
| 13 |
import json
|
| 14 |
from pathlib import Path
|
| 15 |
from tqdm.asyncio import tqdm
|
|
|
|
| 16 |
|
| 17 |
class TimingManager:
|
| 18 |
def __init__(self):
|
|
|
|
| 205 |
cache_file = self.cache_dir / f"{cache_key}.wav"
|
| 206 |
audio.export(str(cache_file), format="wav")
|
| 207 |
|
| 208 |
+
class SSMLBuilder:
|
| 209 |
+
def __init__(self):
|
| 210 |
+
self.content = []
|
| 211 |
+
|
| 212 |
+
def add_text(self, text: str):
|
| 213 |
+
self.content.append(text)
|
| 214 |
+
return self
|
| 215 |
+
|
| 216 |
+
def add_break(self, strength: str = "medium"):
|
| 217 |
+
self.content.append(f'<break strength="{strength}"/>')
|
| 218 |
+
return self
|
| 219 |
+
|
| 220 |
+
def add_prosody(self, text: str, rate: str = "medium", pitch: str = "medium"):
|
| 221 |
+
self.content.append(
|
| 222 |
+
f'<prosody rate="{rate}" pitch="{pitch}">{text}</prosody>'
|
| 223 |
+
)
|
| 224 |
+
return self
|
| 225 |
+
|
| 226 |
+
def add_sentence(self, text: str):
|
| 227 |
+
self.content.append(f'<s>{text}</s>')
|
| 228 |
+
return self
|
| 229 |
+
|
| 230 |
+
def __str__(self):
|
| 231 |
+
return (
|
| 232 |
+
'<?xml version="1.0"?>'
|
| 233 |
+
'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">'
|
| 234 |
+
f'{"".join(self.content)}'
|
| 235 |
+
'</speak>'
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
class SpeechEnhancer:
|
| 239 |
@staticmethod
|
| 240 |
def add_speech_marks(text: str) -> str:
|
| 241 |
"""Add SSML marks for better speech control"""
|
| 242 |
+
ssml = SSMLBuilder()
|
| 243 |
+
|
| 244 |
+
# Split text and add appropriate SSML tags
|
| 245 |
+
sentences = text.split('. ')
|
| 246 |
+
for i, sentence in enumerate(sentences):
|
| 247 |
+
sentence = sentence.strip()
|
| 248 |
+
if not sentence:
|
| 249 |
+
continue
|
| 250 |
+
|
| 251 |
+
ssml.add_sentence(sentence)
|
| 252 |
+
|
| 253 |
+
# Add appropriate breaks between sentences
|
| 254 |
+
if i < len(sentences) - 1:
|
| 255 |
+
ssml.add_break("strong")
|
| 256 |
+
|
| 257 |
+
# Add breaks at commas
|
| 258 |
+
if ',' in sentence:
|
| 259 |
+
parts = sentence.split(',')
|
| 260 |
+
for part in parts[:-1]:
|
| 261 |
+
ssml.add_break("medium")
|
| 262 |
+
|
| 263 |
+
return str(ssml)
|
| 264 |
|
| 265 |
@staticmethod
|
| 266 |
def enhance_timing(segment: Segment) -> Segment:
|