smartwang commited on
Commit
a9ab89b
·
1 Parent(s): a8ab3f2
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -242,8 +242,12 @@ def infer_voice_clone( part, language,audio_tuple,ref_text,use_xvector_only):
242
  return wavs[0], sr
243
 
244
  @spaces.GPU
245
- def infer_voice_clone_from_prompt(part, language, voice_clone_prompt):
246
  """Single segment inference for Voice Clone using pre-extracted prompt."""
 
 
 
 
247
  tts = load_model("Base", "0.6B")
248
  wavs, sr = tts.generate_voice_clone(
249
  text=part,
@@ -368,10 +372,6 @@ def generate_voice_clone_from_prompt_file(prompt_file_path, target_text, languag
368
  logger.info(f"开始 Voice Clone 生成任务(使用特征文件)。语言: {language}, 目标文本长度: {len(target_text)}, 特征文件: {prompt_file_path}")
369
  try:
370
  # 加载预提取的音频特征
371
- logger.info("正在加载音频特征文件...")
372
- voice_clone_prompt = torch.load(prompt_file_path, map_location='cpu')
373
- logger.info("音频特征文件加载成功。")
374
-
375
  text_parts = split_text(target_text.strip())
376
  logger.info(f"目标目标文本已切分为 {len(text_parts)} 段。")
377
  all_wavs = []
@@ -379,7 +379,7 @@ def generate_voice_clone_from_prompt_file(prompt_file_path, target_text, languag
379
 
380
  for i, part in enumerate(progress.tqdm(text_parts, desc="正在生成分段")):
381
  logger.info(f"正在处理第 {i+1}/{len(text_parts)} 段文本...")
382
- wav, current_sr = infer_voice_clone_from_prompt(part, language, voice_clone_prompt)
383
  all_wavs.append(wav)
384
  sr = current_sr
385
 
 
242
  return wavs[0], sr
243
 
244
  @spaces.GPU
245
+ def infer_voice_clone_from_prompt(part, language, prompt_file_path):
246
  """Single segment inference for Voice Clone using pre-extracted prompt."""
247
+ logger.info("正在加载音频特征文件...")
248
+ voice_clone_prompt = torch.load(prompt_file_path, map_location='cuda', weights_only=False)
249
+ logger.info("音频特征文件加载成功。")
250
+
251
  tts = load_model("Base", "0.6B")
252
  wavs, sr = tts.generate_voice_clone(
253
  text=part,
 
372
  logger.info(f"开始 Voice Clone 生成任务(使用特征文件)。语言: {language}, 目标文本长度: {len(target_text)}, 特征文件: {prompt_file_path}")
373
  try:
374
  # 加载预提取的音频特征
 
 
 
 
375
  text_parts = split_text(target_text.strip())
376
  logger.info(f"目标目标文本已切分为 {len(text_parts)} 段。")
377
  all_wavs = []
 
379
 
380
  for i, part in enumerate(progress.tqdm(text_parts, desc="正在生成分段")):
381
  logger.info(f"正在处理第 {i+1}/{len(text_parts)} 段文本...")
382
+ wav, current_sr = infer_voice_clone_from_prompt(part, language, prompt_file_path)
383
  all_wavs.append(wav)
384
  sr = current_sr
385