antigravity commited on
Commit
82f54c3
·
1 Parent(s): e081c7f

feat: add fade_duration API parameter for smooth fade in/out transitions

Browse files
app.py CHANGED
@@ -102,7 +102,8 @@ async def upload_and_tts(
102
  language: str = Form("zh"),
103
  text_lang: str = Form(None),
104
  speed: float = Form(1.0),
105
- fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
 
106
  file: UploadFile = File(...)
107
  ):
108
  """
@@ -129,7 +130,7 @@ async def upload_and_tts(
129
 
130
  out_path = f"/tmp/out_{ts}.wav"
131
  # 🟢 执行 TTS
132
- genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
133
 
134
  # 🟢 关键:强制等待文件出现(最多等5秒)
135
  wait_time = 0
@@ -167,7 +168,8 @@ async def dynamic_tts(
167
  prompt_lang: str = Form("zh"),
168
  text_lang: str = Form(None),
169
  speed: float = Form(1.0),
170
- fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
 
171
  use_default_ref: bool = Form(True)
172
  ):
173
  """
@@ -189,7 +191,7 @@ async def dynamic_tts(
189
  genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
190
 
191
  out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
192
- genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
193
 
194
  # 🟢 等待文件生成(最多等5秒)
195
  wait_time = 0
 
102
  language: str = Form("zh"),
103
  text_lang: str = Form(None),
104
  speed: float = Form(1.0),
105
+ fragment_interval: float = Form(0.3),
106
+ fade_duration: float = Form(0.0), # 淡入淡出时长(秒)
107
  file: UploadFile = File(...)
108
  ):
109
  """
 
130
 
131
  out_path = f"/tmp/out_{ts}.wav"
132
  # 🟢 执行 TTS
133
+ genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval, fade_duration=fade_duration)
134
 
135
  # 🟢 关键:强制等待文件出现(最多等5秒)
136
  wait_time = 0
 
168
  prompt_lang: str = Form("zh"),
169
  text_lang: str = Form(None),
170
  speed: float = Form(1.0),
171
+ fragment_interval: float = Form(0.3),
172
+ fade_duration: float = Form(0.0), # 淡入淡出时长(秒)
173
  use_default_ref: bool = Form(True)
174
  ):
175
  """
 
191
  genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
192
 
193
  out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
194
+ genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval, fade_duration=fade_duration)
195
 
196
  # 🟢 等待文件生成(最多等5秒)
197
  wait_time = 0
genie_tts/Core/TTSPlayer.py CHANGED
@@ -152,11 +152,29 @@ class TTSPlayer:
152
  try:
153
  # 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
154
  fragment_interval = context.current_fragment_interval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
156
 
157
  padded_chunks = []
158
  for i, chunk in enumerate(self._session_audio_chunks):
159
- padded_chunks.append(chunk)
 
 
160
  # 在除最后一块外的每块后面添加静音
161
  if i < len(self._session_audio_chunks) - 1:
162
  padded_chunks.append(zero_padding)
 
152
  try:
153
  # 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
154
  fragment_interval = context.current_fragment_interval
155
+ fade_duration = context.current_fade_duration
156
+
157
+ # 淡入淡出处理函数
158
+ def apply_fade(audio: np.ndarray, fade_samples: int) -> np.ndarray:
159
+ if fade_samples <= 0 or len(audio) < fade_samples * 2:
160
+ return audio
161
+ audio = audio.copy().flatten()
162
+ # 淡入(开头)
163
+ fade_in = np.linspace(0, 1, fade_samples)
164
+ audio[:fade_samples] *= fade_in
165
+ # 淡出(结尾)
166
+ fade_out = np.linspace(1, 0, fade_samples)
167
+ audio[-fade_samples:] *= fade_out
168
+ return audio
169
+
170
+ fade_samples = int(self.sample_rate * fade_duration)
171
  zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
172
 
173
  padded_chunks = []
174
  for i, chunk in enumerate(self._session_audio_chunks):
175
+ # 对每个音频块应用淡入淡出
176
+ processed_chunk = apply_fade(chunk, fade_samples) if fade_duration > 0 else chunk.flatten()
177
+ padded_chunks.append(processed_chunk)
178
  # 在除最后一块外的每块后面添加静音
179
  if i < len(self._session_audio_chunks) - 1:
180
  padded_chunks.append(zero_padding)
genie_tts/Internal.py CHANGED
@@ -195,7 +195,8 @@ async def tts_async(
195
  save_path: Union[str, PathLike, None] = None,
196
  text_language: str = None,
197
  speed: float = 1.0,
198
- fragment_interval: float = 0.3, # 句子间隔时长(秒)
 
199
  ) -> AsyncIterator[bytes]:
200
  """
201
  Asynchronously generates speech from text and yields audio chunks.
@@ -248,6 +249,8 @@ async def tts_async(
248
  context.current_speed = speed
249
  # 设置句子间隔
250
  context.current_fragment_interval = fragment_interval
 
 
251
 
252
  # 3. 使用新的回调接口启动 TTS 会话
253
  tts_player.start_session(
@@ -277,7 +280,8 @@ def tts(
277
  save_path: Union[str, PathLike, None] = None,
278
  text_language: str = None,
279
  speed: float = 1.0,
280
- fragment_interval: float = 0.3, # 句子间隔时长(秒)
 
281
  ) -> None:
282
  """
283
  Synchronously generates speech from text.
@@ -315,6 +319,8 @@ def tts(
315
  context.current_speed = speed
316
  # 设置句子间隔
317
  context.current_fragment_interval = fragment_interval
 
 
318
 
319
  tts_player.start_session(
320
  play=play,
 
195
  save_path: Union[str, PathLike, None] = None,
196
  text_language: str = None,
197
  speed: float = 1.0,
198
+ fragment_interval: float = 0.3,
199
+ fade_duration: float = 0.0, # 淡入淡出时长(秒)
200
  ) -> AsyncIterator[bytes]:
201
  """
202
  Asynchronously generates speech from text and yields audio chunks.
 
249
  context.current_speed = speed
250
  # 设置句子间隔
251
  context.current_fragment_interval = fragment_interval
252
+ # 设置淡入淡出
253
+ context.current_fade_duration = fade_duration
254
 
255
  # 3. 使用新的回调接口启动 TTS 会话
256
  tts_player.start_session(
 
280
  save_path: Union[str, PathLike, None] = None,
281
  text_language: str = None,
282
  speed: float = 1.0,
283
+ fragment_interval: float = 0.3,
284
+ fade_duration: float = 0.0, # 淡入淡出时长(秒)
285
  ) -> None:
286
  """
287
  Synchronously generates speech from text.
 
319
  context.current_speed = speed
320
  # 设置句子间隔
321
  context.current_fragment_interval = fragment_interval
322
+ # 设置淡入淡出
323
+ context.current_fade_duration = fade_duration
324
 
325
  tts_player.start_session(
326
  play=play,
genie_tts/Utils/Shared.py CHANGED
@@ -11,6 +11,7 @@ class Context:
11
  self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
12
  self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
13
  self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
 
14
 
15
 
16
  context: Context = Context()
 
11
  self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
12
  self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
13
  self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
14
+ self.current_fade_duration: float = 0.0 # 淡入淡出时长(秒),0 表示不使用
15
 
16
 
17
  context: Context = Context()