Spaces:
Running on Zero
Running on Zero
app.py
CHANGED
|
@@ -242,8 +242,12 @@ def infer_voice_clone( part, language,audio_tuple,ref_text,use_xvector_only):
|
|
| 242 |
return wavs[0], sr
|
| 243 |
|
| 244 |
@spaces.GPU
|
| 245 |
-
def infer_voice_clone_from_prompt(part, language,
|
| 246 |
"""Single segment inference for Voice Clone using pre-extracted prompt."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
tts = load_model("Base", "0.6B")
|
| 248 |
wavs, sr = tts.generate_voice_clone(
|
| 249 |
text=part,
|
|
@@ -368,10 +372,6 @@ def generate_voice_clone_from_prompt_file(prompt_file_path, target_text, languag
|
|
| 368 |
logger.info(f"开始 Voice Clone 生成任务(使用特征文件)。语言: {language}, 目标文本长度: {len(target_text)}, 特征文件: {prompt_file_path}")
|
| 369 |
try:
|
| 370 |
# 加载预提取的音频特征
|
| 371 |
-
logger.info("正在加载音频特征文件...")
|
| 372 |
-
voice_clone_prompt = torch.load(prompt_file_path, map_location='cpu')
|
| 373 |
-
logger.info("音频特征文件加载成功。")
|
| 374 |
-
|
| 375 |
text_parts = split_text(target_text.strip())
|
| 376 |
logger.info(f"目标目标文本已切分为 {len(text_parts)} 段。")
|
| 377 |
all_wavs = []
|
|
@@ -379,7 +379,7 @@ def generate_voice_clone_from_prompt_file(prompt_file_path, target_text, languag
|
|
| 379 |
|
| 380 |
for i, part in enumerate(progress.tqdm(text_parts, desc="正在生成分段")):
|
| 381 |
logger.info(f"正在处理第 {i+1}/{len(text_parts)} 段文本...")
|
| 382 |
-
wav, current_sr = infer_voice_clone_from_prompt(part, language,
|
| 383 |
all_wavs.append(wav)
|
| 384 |
sr = current_sr
|
| 385 |
|
|
|
|
| 242 |
return wavs[0], sr
|
| 243 |
|
| 244 |
@spaces.GPU
|
| 245 |
+
def infer_voice_clone_from_prompt(part, language, prompt_file_path):
|
| 246 |
"""Single segment inference for Voice Clone using pre-extracted prompt."""
|
| 247 |
+
logger.info("正在加载音频特征文件...")
|
| 248 |
+
voice_clone_prompt = torch.load(prompt_file_path, map_location='cuda', weights_only=False)
|
| 249 |
+
logger.info("音频特征文件加载成功。")
|
| 250 |
+
|
| 251 |
tts = load_model("Base", "0.6B")
|
| 252 |
wavs, sr = tts.generate_voice_clone(
|
| 253 |
text=part,
|
|
|
|
| 372 |
logger.info(f"开始 Voice Clone 生成任务(使用特征文件)。语言: {language}, 目标文本长度: {len(target_text)}, 特征文件: {prompt_file_path}")
|
| 373 |
try:
|
| 374 |
# 加载预提取的音频特征
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
text_parts = split_text(target_text.strip())
|
| 376 |
logger.info(f"目标目标文本已切分为 {len(text_parts)} 段。")
|
| 377 |
all_wavs = []
|
|
|
|
| 379 |
|
| 380 |
for i, part in enumerate(progress.tqdm(text_parts, desc="正在生成分段")):
|
| 381 |
logger.info(f"正在处理第 {i+1}/{len(text_parts)} 段文本...")
|
| 382 |
+
wav, current_sr = infer_voice_clone_from_prompt(part, language, prompt_file_path)
|
| 383 |
all_wavs.append(wav)
|
| 384 |
sr = current_sr
|
| 385 |
|