Spaces:

smartwang
/

magicvoice

Running on Zero

smartwang commited on Feb 14

Commit

265bb42

1 Parent(s): a9d9345

T

Files changed (1) hide show

app.py CHANGED Viewed

@@ -292,9 +292,22 @@ def extract_voice_clone_prompt(ref_audio,ref_text,use_xvector_only):
     if audio_tuple is None:
         return None, "错误：需要参考音频。"
-    if not use_xvector_only and (not ref_text or not ref_text.strip()):
-        return None, "错误：未启用 '仅使用 x-vector' 时需要参考文本。"
     voice_clone_prompt = tts.create_voice_clone_prompt(
         ref_audio=audio_tuple,

     if audio_tuple is None:
         return None, "错误：需要参考音频。"
+    # if not use_xvector_only and (not ref_text or not ref_text.strip()):
+    #     return None, "错误：未启用 '仅使用 x-vector' 时需要参考文本。"
+    model_size = "base"
+    logger.info(f"开始 Whisper 语音识别任务。模型: {model_size}, 音频路径: {ref_audio}")
+    try:
+        model = load_whisper_model(model_size)
+        # 使用 transcribe 方法进行转录
+        # whisper 会自动处理音频加载和重采样
+        result = model.transcribe(ref_audio)
+        text = result["text"]
+        logger.info(f"Whisper 识别完成。文本长度: {len(text)}")
+        ref_text = text.strip()
+        logger.error(f"Whisper 识别成功：{ref_text}")
+        use_xvector_only = False
+    except Exception as e:
+        logger.error(f"Whisper 识别失败: {str(e)}", exc_info=True)
     voice_clone_prompt = tts.create_voice_clone_prompt(
         ref_audio=audio_tuple,