Spaces:
Running
Running
antigravity
commited on
Commit
·
e081c7f
1
Parent(s):
35881ba
feat: add fragment_interval API parameter for adjustable sentence gap
Browse files- app.py +6 -4
- genie_tts/Core/TTSPlayer.py +2 -2
- genie_tts/Internal.py +8 -2
- genie_tts/Utils/Shared.py +1 -0
app.py
CHANGED
|
@@ -101,7 +101,8 @@ async def upload_and_tts(
|
|
| 101 |
text: str = Form(...),
|
| 102 |
language: str = Form("zh"),
|
| 103 |
text_lang: str = Form(None),
|
| 104 |
-
speed: float = Form(1.0),
|
|
|
|
| 105 |
file: UploadFile = File(...)
|
| 106 |
):
|
| 107 |
"""
|
|
@@ -128,7 +129,7 @@ async def upload_and_tts(
|
|
| 128 |
|
| 129 |
out_path = f"/tmp/out_{ts}.wav"
|
| 130 |
# 🟢 执行 TTS
|
| 131 |
-
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed)
|
| 132 |
|
| 133 |
# 🟢 关键:强制等待文件出现(最多等5秒)
|
| 134 |
wait_time = 0
|
|
@@ -165,7 +166,8 @@ async def dynamic_tts(
|
|
| 165 |
prompt_text: str = Form(None),
|
| 166 |
prompt_lang: str = Form("zh"),
|
| 167 |
text_lang: str = Form(None),
|
| 168 |
-
speed: float = Form(1.0),
|
|
|
|
| 169 |
use_default_ref: bool = Form(True)
|
| 170 |
):
|
| 171 |
"""
|
|
@@ -187,7 +189,7 @@ async def dynamic_tts(
|
|
| 187 |
genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
|
| 188 |
|
| 189 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 190 |
-
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed)
|
| 191 |
|
| 192 |
# 🟢 等待文件生成(最多等5秒)
|
| 193 |
wait_time = 0
|
|
|
|
| 101 |
text: str = Form(...),
|
| 102 |
language: str = Form("zh"),
|
| 103 |
text_lang: str = Form(None),
|
| 104 |
+
speed: float = Form(1.0),
|
| 105 |
+
fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
|
| 106 |
file: UploadFile = File(...)
|
| 107 |
):
|
| 108 |
"""
|
|
|
|
| 129 |
|
| 130 |
out_path = f"/tmp/out_{ts}.wav"
|
| 131 |
# 🟢 执行 TTS
|
| 132 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
|
| 133 |
|
| 134 |
# 🟢 关键:强制等待文件出现(最多等5秒)
|
| 135 |
wait_time = 0
|
|
|
|
| 166 |
prompt_text: str = Form(None),
|
| 167 |
prompt_lang: str = Form("zh"),
|
| 168 |
text_lang: str = Form(None),
|
| 169 |
+
speed: float = Form(1.0),
|
| 170 |
+
fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
|
| 171 |
use_default_ref: bool = Form(True)
|
| 172 |
):
|
| 173 |
"""
|
|
|
|
| 189 |
genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
|
| 190 |
|
| 191 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 192 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
|
| 193 |
|
| 194 |
# 🟢 等待文件生成(最多等5秒)
|
| 195 |
wait_time = 0
|
genie_tts/Core/TTSPlayer.py
CHANGED
|
@@ -150,8 +150,8 @@ class TTSPlayer:
|
|
| 150 |
|
| 151 |
def _save_session_audio(self):
|
| 152 |
try:
|
| 153 |
-
# 🔥
|
| 154 |
-
fragment_interval =
|
| 155 |
zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
|
| 156 |
|
| 157 |
padded_chunks = []
|
|
|
|
| 150 |
|
| 151 |
def _save_session_audio(self):
|
| 152 |
try:
|
| 153 |
+
# 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
|
| 154 |
+
fragment_interval = context.current_fragment_interval
|
| 155 |
zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
|
| 156 |
|
| 157 |
padded_chunks = []
|
genie_tts/Internal.py
CHANGED
|
@@ -194,7 +194,8 @@ async def tts_async(
|
|
| 194 |
split_sentence: bool = False,
|
| 195 |
save_path: Union[str, PathLike, None] = None,
|
| 196 |
text_language: str = None,
|
| 197 |
-
speed: float = 1.0,
|
|
|
|
| 198 |
) -> AsyncIterator[bytes]:
|
| 199 |
"""
|
| 200 |
Asynchronously generates speech from text and yields audio chunks.
|
|
@@ -245,6 +246,8 @@ async def tts_async(
|
|
| 245 |
context.current_text_language = normalize_language(text_language) if text_language else None
|
| 246 |
# 设置语速
|
| 247 |
context.current_speed = speed
|
|
|
|
|
|
|
| 248 |
|
| 249 |
# 3. 使用新的回调接口启动 TTS 会话
|
| 250 |
tts_player.start_session(
|
|
@@ -273,7 +276,8 @@ def tts(
|
|
| 273 |
split_sentence: bool = True,
|
| 274 |
save_path: Union[str, PathLike, None] = None,
|
| 275 |
text_language: str = None,
|
| 276 |
-
speed: float = 1.0,
|
|
|
|
| 277 |
) -> None:
|
| 278 |
"""
|
| 279 |
Synchronously generates speech from text.
|
|
@@ -309,6 +313,8 @@ def tts(
|
|
| 309 |
context.current_text_language = normalize_language(text_language) if text_language else None
|
| 310 |
# 设置语速
|
| 311 |
context.current_speed = speed
|
|
|
|
|
|
|
| 312 |
|
| 313 |
tts_player.start_session(
|
| 314 |
play=play,
|
|
|
|
| 194 |
split_sentence: bool = False,
|
| 195 |
save_path: Union[str, PathLike, None] = None,
|
| 196 |
text_language: str = None,
|
| 197 |
+
speed: float = 1.0,
|
| 198 |
+
fragment_interval: float = 0.3, # 句子间隔时长(秒)
|
| 199 |
) -> AsyncIterator[bytes]:
|
| 200 |
"""
|
| 201 |
Asynchronously generates speech from text and yields audio chunks.
|
|
|
|
| 246 |
context.current_text_language = normalize_language(text_language) if text_language else None
|
| 247 |
# 设置语速
|
| 248 |
context.current_speed = speed
|
| 249 |
+
# 设置句子间隔
|
| 250 |
+
context.current_fragment_interval = fragment_interval
|
| 251 |
|
| 252 |
# 3. 使用新的回调接口启动 TTS 会话
|
| 253 |
tts_player.start_session(
|
|
|
|
| 276 |
split_sentence: bool = True,
|
| 277 |
save_path: Union[str, PathLike, None] = None,
|
| 278 |
text_language: str = None,
|
| 279 |
+
speed: float = 1.0,
|
| 280 |
+
fragment_interval: float = 0.3, # 句子间隔时长(秒)
|
| 281 |
) -> None:
|
| 282 |
"""
|
| 283 |
Synchronously generates speech from text.
|
|
|
|
| 313 |
context.current_text_language = normalize_language(text_language) if text_language else None
|
| 314 |
# 设置语速
|
| 315 |
context.current_speed = speed
|
| 316 |
+
# 设置句子间隔
|
| 317 |
+
context.current_fragment_interval = fragment_interval
|
| 318 |
|
| 319 |
tts_player.start_session(
|
| 320 |
play=play,
|
genie_tts/Utils/Shared.py
CHANGED
|
@@ -10,6 +10,7 @@ class Context:
|
|
| 10 |
self.current_prompt_audio: Optional['ReferenceAudio'] = None
|
| 11 |
self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
|
| 12 |
self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
context: Context = Context()
|
|
|
|
| 10 |
self.current_prompt_audio: Optional['ReferenceAudio'] = None
|
| 11 |
self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
|
| 12 |
self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
|
| 13 |
+
self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
|
| 14 |
|
| 15 |
|
| 16 |
context: Context = Context()
|