Spaces:
Sleeping
Sleeping
antigravity commited on
Commit ·
82f54c3
1
Parent(s): e081c7f
feat: add fade_duration API parameter for smooth fade in/out transitions
Browse files- app.py +6 -4
- genie_tts/Core/TTSPlayer.py +19 -1
- genie_tts/Internal.py +8 -2
- genie_tts/Utils/Shared.py +1 -0
app.py
CHANGED
|
@@ -102,7 +102,8 @@ async def upload_and_tts(
|
|
| 102 |
language: str = Form("zh"),
|
| 103 |
text_lang: str = Form(None),
|
| 104 |
speed: float = Form(1.0),
|
| 105 |
-
fragment_interval: float = Form(0.3),
|
|
|
|
| 106 |
file: UploadFile = File(...)
|
| 107 |
):
|
| 108 |
"""
|
|
@@ -129,7 +130,7 @@ async def upload_and_tts(
|
|
| 129 |
|
| 130 |
out_path = f"/tmp/out_{ts}.wav"
|
| 131 |
# 🟢 执行 TTS
|
| 132 |
-
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
|
| 133 |
|
| 134 |
# 🟢 关键:强制等待文件出现(最多等5秒)
|
| 135 |
wait_time = 0
|
|
@@ -167,7 +168,8 @@ async def dynamic_tts(
|
|
| 167 |
prompt_lang: str = Form("zh"),
|
| 168 |
text_lang: str = Form(None),
|
| 169 |
speed: float = Form(1.0),
|
| 170 |
-
fragment_interval: float = Form(0.3),
|
|
|
|
| 171 |
use_default_ref: bool = Form(True)
|
| 172 |
):
|
| 173 |
"""
|
|
@@ -189,7 +191,7 @@ async def dynamic_tts(
|
|
| 189 |
genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
|
| 190 |
|
| 191 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 192 |
-
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
|
| 193 |
|
| 194 |
# 🟢 等待文件生成(最多等5秒)
|
| 195 |
wait_time = 0
|
|
|
|
| 102 |
language: str = Form("zh"),
|
| 103 |
text_lang: str = Form(None),
|
| 104 |
speed: float = Form(1.0),
|
| 105 |
+
fragment_interval: float = Form(0.3),
|
| 106 |
+
fade_duration: float = Form(0.0), # 淡入淡出时长(秒)
|
| 107 |
file: UploadFile = File(...)
|
| 108 |
):
|
| 109 |
"""
|
|
|
|
| 130 |
|
| 131 |
out_path = f"/tmp/out_{ts}.wav"
|
| 132 |
# 🟢 执行 TTS
|
| 133 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval, fade_duration=fade_duration)
|
| 134 |
|
| 135 |
# 🟢 关键:强制等待文件出现(最多等5秒)
|
| 136 |
wait_time = 0
|
|
|
|
| 168 |
prompt_lang: str = Form("zh"),
|
| 169 |
text_lang: str = Form(None),
|
| 170 |
speed: float = Form(1.0),
|
| 171 |
+
fragment_interval: float = Form(0.3),
|
| 172 |
+
fade_duration: float = Form(0.0), # 淡入淡出时长(秒)
|
| 173 |
use_default_ref: bool = Form(True)
|
| 174 |
):
|
| 175 |
"""
|
|
|
|
| 191 |
genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
|
| 192 |
|
| 193 |
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 194 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval, fade_duration=fade_duration)
|
| 195 |
|
| 196 |
# 🟢 等待文件生成(最多等5秒)
|
| 197 |
wait_time = 0
|
genie_tts/Core/TTSPlayer.py
CHANGED
|
@@ -152,11 +152,29 @@ class TTSPlayer:
|
|
| 152 |
try:
|
| 153 |
# 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
|
| 154 |
fragment_interval = context.current_fragment_interval
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
|
| 156 |
|
| 157 |
padded_chunks = []
|
| 158 |
for i, chunk in enumerate(self._session_audio_chunks):
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
# 在除最后一块外的每块后面添加静音
|
| 161 |
if i < len(self._session_audio_chunks) - 1:
|
| 162 |
padded_chunks.append(zero_padding)
|
|
|
|
| 152 |
try:
|
| 153 |
# 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
|
| 154 |
fragment_interval = context.current_fragment_interval
|
| 155 |
+
fade_duration = context.current_fade_duration
|
| 156 |
+
|
| 157 |
+
# 淡入淡出处理函数
|
| 158 |
+
def apply_fade(audio: np.ndarray, fade_samples: int) -> np.ndarray:
|
| 159 |
+
if fade_samples <= 0 or len(audio) < fade_samples * 2:
|
| 160 |
+
return audio
|
| 161 |
+
audio = audio.copy().flatten()
|
| 162 |
+
# 淡入(开头)
|
| 163 |
+
fade_in = np.linspace(0, 1, fade_samples)
|
| 164 |
+
audio[:fade_samples] *= fade_in
|
| 165 |
+
# 淡出(结尾)
|
| 166 |
+
fade_out = np.linspace(1, 0, fade_samples)
|
| 167 |
+
audio[-fade_samples:] *= fade_out
|
| 168 |
+
return audio
|
| 169 |
+
|
| 170 |
+
fade_samples = int(self.sample_rate * fade_duration)
|
| 171 |
zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
|
| 172 |
|
| 173 |
padded_chunks = []
|
| 174 |
for i, chunk in enumerate(self._session_audio_chunks):
|
| 175 |
+
# 对每个音频块应用淡入淡出
|
| 176 |
+
processed_chunk = apply_fade(chunk, fade_samples) if fade_duration > 0 else chunk.flatten()
|
| 177 |
+
padded_chunks.append(processed_chunk)
|
| 178 |
# 在除最后一块外的每块后面添加静音
|
| 179 |
if i < len(self._session_audio_chunks) - 1:
|
| 180 |
padded_chunks.append(zero_padding)
|
genie_tts/Internal.py
CHANGED
|
@@ -195,7 +195,8 @@ async def tts_async(
|
|
| 195 |
save_path: Union[str, PathLike, None] = None,
|
| 196 |
text_language: str = None,
|
| 197 |
speed: float = 1.0,
|
| 198 |
-
fragment_interval: float = 0.3,
|
|
|
|
| 199 |
) -> AsyncIterator[bytes]:
|
| 200 |
"""
|
| 201 |
Asynchronously generates speech from text and yields audio chunks.
|
|
@@ -248,6 +249,8 @@ async def tts_async(
|
|
| 248 |
context.current_speed = speed
|
| 249 |
# 设置句子间隔
|
| 250 |
context.current_fragment_interval = fragment_interval
|
|
|
|
|
|
|
| 251 |
|
| 252 |
# 3. 使用新的回调接口启动 TTS 会话
|
| 253 |
tts_player.start_session(
|
|
@@ -277,7 +280,8 @@ def tts(
|
|
| 277 |
save_path: Union[str, PathLike, None] = None,
|
| 278 |
text_language: str = None,
|
| 279 |
speed: float = 1.0,
|
| 280 |
-
fragment_interval: float = 0.3,
|
|
|
|
| 281 |
) -> None:
|
| 282 |
"""
|
| 283 |
Synchronously generates speech from text.
|
|
@@ -315,6 +319,8 @@ def tts(
|
|
| 315 |
context.current_speed = speed
|
| 316 |
# 设置句子间隔
|
| 317 |
context.current_fragment_interval = fragment_interval
|
|
|
|
|
|
|
| 318 |
|
| 319 |
tts_player.start_session(
|
| 320 |
play=play,
|
|
|
|
| 195 |
save_path: Union[str, PathLike, None] = None,
|
| 196 |
text_language: str = None,
|
| 197 |
speed: float = 1.0,
|
| 198 |
+
fragment_interval: float = 0.3,
|
| 199 |
+
fade_duration: float = 0.0, # 淡入淡出时长(秒)
|
| 200 |
) -> AsyncIterator[bytes]:
|
| 201 |
"""
|
| 202 |
Asynchronously generates speech from text and yields audio chunks.
|
|
|
|
| 249 |
context.current_speed = speed
|
| 250 |
# 设置句子间隔
|
| 251 |
context.current_fragment_interval = fragment_interval
|
| 252 |
+
# 设置淡入淡出
|
| 253 |
+
context.current_fade_duration = fade_duration
|
| 254 |
|
| 255 |
# 3. 使用新的回调接口启动 TTS 会话
|
| 256 |
tts_player.start_session(
|
|
|
|
| 280 |
save_path: Union[str, PathLike, None] = None,
|
| 281 |
text_language: str = None,
|
| 282 |
speed: float = 1.0,
|
| 283 |
+
fragment_interval: float = 0.3,
|
| 284 |
+
fade_duration: float = 0.0, # 淡入淡出时长(秒)
|
| 285 |
) -> None:
|
| 286 |
"""
|
| 287 |
Synchronously generates speech from text.
|
|
|
|
| 319 |
context.current_speed = speed
|
| 320 |
# 设置句子间隔
|
| 321 |
context.current_fragment_interval = fragment_interval
|
| 322 |
+
# 设置淡入淡出
|
| 323 |
+
context.current_fade_duration = fade_duration
|
| 324 |
|
| 325 |
tts_player.start_session(
|
| 326 |
play=play,
|
genie_tts/Utils/Shared.py
CHANGED
|
@@ -11,6 +11,7 @@ class Context:
|
|
| 11 |
self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
|
| 12 |
self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
|
| 13 |
self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
context: Context = Context()
|
|
|
|
| 11 |
self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
|
| 12 |
self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
|
| 13 |
self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
|
| 14 |
+
self.current_fade_duration: float = 0.0 # 淡入淡出时长(秒),0 表示不使用
|
| 15 |
|
| 16 |
|
| 17 |
context: Context = Context()
|