antigravity commited on
Commit
e081c7f
·
1 Parent(s): 35881ba

feat: add fragment_interval API parameter for adjustable sentence gap

Browse files
app.py CHANGED
@@ -101,7 +101,8 @@ async def upload_and_tts(
101
  text: str = Form(...),
102
  language: str = Form("zh"),
103
  text_lang: str = Form(None),
104
- speed: float = Form(1.0), # 语速调节(0.5-2.0)
 
105
  file: UploadFile = File(...)
106
  ):
107
  """
@@ -128,7 +129,7 @@ async def upload_and_tts(
128
 
129
  out_path = f"/tmp/out_{ts}.wav"
130
  # 🟢 执行 TTS
131
- genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed)
132
 
133
  # 🟢 关键:强制等待文件出现(最多等5秒)
134
  wait_time = 0
@@ -165,7 +166,8 @@ async def dynamic_tts(
165
  prompt_text: str = Form(None),
166
  prompt_lang: str = Form("zh"),
167
  text_lang: str = Form(None),
168
- speed: float = Form(1.0), # 语速调节(0.5-2.0)
 
169
  use_default_ref: bool = Form(True)
170
  ):
171
  """
@@ -187,7 +189,7 @@ async def dynamic_tts(
187
  genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
188
 
189
  out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
190
- genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed)
191
 
192
  # 🟢 等待文件生成(最多等5秒)
193
  wait_time = 0
 
101
  text: str = Form(...),
102
  language: str = Form("zh"),
103
  text_lang: str = Form(None),
104
+ speed: float = Form(1.0),
105
+ fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
106
  file: UploadFile = File(...)
107
  ):
108
  """
 
129
 
130
  out_path = f"/tmp/out_{ts}.wav"
131
  # 🟢 执行 TTS
132
+ genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
133
 
134
  # 🟢 关键:强制等待文件出现(最多等5秒)
135
  wait_time = 0
 
166
  prompt_text: str = Form(None),
167
  prompt_lang: str = Form("zh"),
168
  text_lang: str = Form(None),
169
+ speed: float = Form(1.0),
170
+ fragment_interval: float = Form(0.3), # 句子间隔时长(秒)
171
  use_default_ref: bool = Form(True)
172
  ):
173
  """
 
189
  genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
190
 
191
  out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
192
+ genie_tts.tts(character_name, text, save_path=out_path, play=False, text_language=text_lang, speed=speed, fragment_interval=fragment_interval)
193
 
194
  # 🟢 等待文件生成(最多等5秒)
195
  wait_time = 0
genie_tts/Core/TTSPlayer.py CHANGED
@@ -150,8 +150,8 @@ class TTSPlayer:
150
 
151
  def _save_session_audio(self):
152
  try:
153
- # 🔥 修复:在音频块之间添加静音填充,实现自然过渡(参考 GPT-SoVITS fragment_interval)
154
- fragment_interval = 0.3 # 句子间隔时长(秒)
155
  zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
156
 
157
  padded_chunks = []
 
150
 
151
  def _save_session_audio(self):
152
  try:
153
+ # 🔥 句子间静音间隔(从 context 读取,支持 API 动态调节)
154
+ fragment_interval = context.current_fragment_interval
155
  zero_padding = np.zeros(int(self.sample_rate * fragment_interval), dtype=np.float32)
156
 
157
  padded_chunks = []
genie_tts/Internal.py CHANGED
@@ -194,7 +194,8 @@ async def tts_async(
194
  split_sentence: bool = False,
195
  save_path: Union[str, PathLike, None] = None,
196
  text_language: str = None,
197
- speed: float = 1.0, # 语速调节(0.5-2.0)
 
198
  ) -> AsyncIterator[bytes]:
199
  """
200
  Asynchronously generates speech from text and yields audio chunks.
@@ -245,6 +246,8 @@ async def tts_async(
245
  context.current_text_language = normalize_language(text_language) if text_language else None
246
  # 设置语速
247
  context.current_speed = speed
 
 
248
 
249
  # 3. 使用新的回调接口启动 TTS 会话
250
  tts_player.start_session(
@@ -273,7 +276,8 @@ def tts(
273
  split_sentence: bool = True,
274
  save_path: Union[str, PathLike, None] = None,
275
  text_language: str = None,
276
- speed: float = 1.0, # 语速调节(0.5-2.0)
 
277
  ) -> None:
278
  """
279
  Synchronously generates speech from text.
@@ -309,6 +313,8 @@ def tts(
309
  context.current_text_language = normalize_language(text_language) if text_language else None
310
  # 设置语速
311
  context.current_speed = speed
 
 
312
 
313
  tts_player.start_session(
314
  play=play,
 
194
  split_sentence: bool = False,
195
  save_path: Union[str, PathLike, None] = None,
196
  text_language: str = None,
197
+ speed: float = 1.0,
198
+ fragment_interval: float = 0.3, # 句子间隔时长(秒)
199
  ) -> AsyncIterator[bytes]:
200
  """
201
  Asynchronously generates speech from text and yields audio chunks.
 
246
  context.current_text_language = normalize_language(text_language) if text_language else None
247
  # 设置语速
248
  context.current_speed = speed
249
+ # 设置句子间隔
250
+ context.current_fragment_interval = fragment_interval
251
 
252
  # 3. 使用新的回调接口启动 TTS 会话
253
  tts_player.start_session(
 
276
  split_sentence: bool = True,
277
  save_path: Union[str, PathLike, None] = None,
278
  text_language: str = None,
279
+ speed: float = 1.0,
280
+ fragment_interval: float = 0.3, # 句子间隔时长(秒)
281
  ) -> None:
282
  """
283
  Synchronously generates speech from text.
 
313
  context.current_text_language = normalize_language(text_language) if text_language else None
314
  # 设置语速
315
  context.current_speed = speed
316
+ # 设置句子间隔
317
+ context.current_fragment_interval = fragment_interval
318
 
319
  tts_player.start_session(
320
  play=play,
genie_tts/Utils/Shared.py CHANGED
@@ -10,6 +10,7 @@ class Context:
10
  self.current_prompt_audio: Optional['ReferenceAudio'] = None
11
  self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
12
  self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
 
13
 
14
 
15
  context: Context = Context()
 
10
  self.current_prompt_audio: Optional['ReferenceAudio'] = None
11
  self.current_text_language: Optional[str] = None # 目标文本语言(跨语言TTS)
12
  self.current_speed: float = 1.0 # 语速调节(0.5-2.0)
13
+ self.current_fragment_interval: float = 0.3 # 句子间隔时长(秒)
14
 
15
 
16
  context: Context = Context()