Spaces:
Running on Zero
Running on Zero
app.py
CHANGED
|
@@ -292,9 +292,22 @@ def extract_voice_clone_prompt(ref_audio,ref_text,use_xvector_only):
|
|
| 292 |
if audio_tuple is None:
|
| 293 |
return None, "错误:需要参考音频。"
|
| 294 |
|
| 295 |
-
if not use_xvector_only and (not ref_text or not ref_text.strip()):
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
voice_clone_prompt = tts.create_voice_clone_prompt(
|
| 300 |
ref_audio=audio_tuple,
|
|
|
|
| 292 |
if audio_tuple is None:
|
| 293 |
return None, "错误:需要参考音频。"
|
| 294 |
|
| 295 |
+
# if not use_xvector_only and (not ref_text or not ref_text.strip()):
|
| 296 |
+
# return None, "错误:未启用 '仅使用 x-vector' 时需要参考文本。"
|
| 297 |
+
model_size = "base"
|
| 298 |
+
logger.info(f"开始 Whisper 语音识别任务。模型: {model_size}, 音频路径: {ref_audio}")
|
| 299 |
+
try:
|
| 300 |
+
model = load_whisper_model(model_size)
|
| 301 |
+
# 使用 transcribe 方法进行转录
|
| 302 |
+
# whisper 会自动处理音频加载和重采样
|
| 303 |
+
result = model.transcribe(ref_audio)
|
| 304 |
+
text = result["text"]
|
| 305 |
+
logger.info(f"Whisper 识别完成。文本长度: {len(text)}")
|
| 306 |
+
ref_text = text.strip()
|
| 307 |
+
logger.error(f"Whisper 识别成功:{ref_text}")
|
| 308 |
+
use_xvector_only = False
|
| 309 |
+
except Exception as e:
|
| 310 |
+
logger.error(f"Whisper 识别失败: {str(e)}", exc_info=True)
|
| 311 |
|
| 312 |
voice_clone_prompt = tts.create_voice_clone_prompt(
|
| 313 |
ref_audio=audio_tuple,
|