Spaces:

1een
/

whisper

Running

App Files Files Community

1een commited on Jul 22, 2025

Commit

d2ebecd

1 Parent(s): fba960c

ff

Browse files

Files changed (1) hide show

fixed_app.py +106 -0

fixed_app.py CHANGED Viewed

@@ -80,6 +80,56 @@ def load_model(model_name: str):
     logger.error(f"找不到模型 {model_name}，请确保模型文件存在")
     raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
 def decode_audio(audio_base64: str) -> str:
     """解码base64音频数据并保存为临时文件，返回文件路径"""
     try:
@@ -192,6 +242,12 @@ async def transcribe_audio(request: AudioRequest):
         logger.info(f"使用whisper二进制: {whisper_binary}")
         # 构建命令 - 根据二进制文件类型调整参数
         if "whisper-cli" in whisper_binary:
             # 新的whisper-cli命令格式 - 不输出到文件，直接输出到stdout
@@ -280,6 +336,11 @@ async def transcribe_audio(request: AudioRequest):
                 # 清理临时文件
                 if os.path.exists(audio_file):
                     os.unlink(audio_file)
         return StreamingResponse(event_stream(), media_type="text/event-stream")
     except Exception as e:
@@ -355,6 +416,50 @@ async def test_audio_decode(request: AudioRequest):
             "error": str(e)
         }
 @app.get("/")
 async def root():
     """根路径"""
@@ -367,6 +472,7 @@ async def root():
             "transcribe": "/transcribe",
             "test": "/test",
             "test-audio": "/test-audio",
             "test-transcribe": "/test-transcribe"
         }
     }

     logger.error(f"找不到模型 {model_name}，请确保模型文件存在")
     raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
+async def convert_audio_to_wav(input_file: str) -> str:
+    """使用ffmpeg将音频文件转换为WAV格式"""
+    try:
+        # 创建输出文件路径
+        output_file = input_file.rsplit('.', 1)[0] + '_converted.wav'
+        # 构建ffmpeg命令
+        cmd = [
+            'ffmpeg',
+            '-i', input_file,           # 输入文件
+            '-acodec', 'pcm_s16le',     # 音频编码器：16位PCM
+            '-ar', '16000',             # 采样率：16kHz（whisper推荐）
+            '-ac', '1',                 # 声道数：单声道
+            '-y',                       # 覆盖输出文件
+            output_file
+        ]
+        logger.info(f"开始音频转换: {' '.join(cmd)}")
+        # 执行ffmpeg命令
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await proc.communicate()
+        if proc.returncode != 0:
+            error_msg = stderr.decode() if stderr else "Unknown ffmpeg error"
+            logger.error(f"音频转换失败: {error_msg}")
+            raise HTTPException(status_code=500, detail=f"Audio conversion failed: {error_msg}")
+        # 验证输出文件是否存在
+        if not os.path.exists(output_file):
+            raise HTTPException(status_code=500, detail="Converted audio file not found")
+        # 删除原始文件
+        if os.path.exists(input_file):
+            os.unlink(input_file)
+        logger.info(f"音频转换成功: {output_file}, 大小: {os.path.getsize(output_file)} 字节")
+        return output_file
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"音频转换过程中出错: {e}")
+        raise HTTPException(status_code=500, detail=f"Audio conversion error: {str(e)}")
 def decode_audio(audio_base64: str) -> str:
     """解码base64音频数据并保存为临时文件，返回文件路径"""
     try:
         logger.info(f"使用whisper二进制: {whisper_binary}")
+        # 检查音频格式，如果不支持则转换为WAV
+        supported_formats = ('.wav', '.flac', '.mp3', '.ogg')
+        if not audio_file.endswith(supported_formats):
+            logger.info(f"音频格式不直接支持，将转换为WAV: {audio_file}")
+            audio_file = await convert_audio_to_wav(audio_file)
         # 构建命令 - 根据二进制文件类型调整参数
         if "whisper-cli" in whisper_binary:
             # 新的whisper-cli命令格式 - 不输出到文件，直接输出到stdout
                 # 清理临时文件
                 if os.path.exists(audio_file):
                     os.unlink(audio_file)
+                # 如果有转换后的文件，也要清理
+                if audio_file.endswith('_converted.wav'):
+                    original_file = audio_file.replace('_converted.wav', '.m4a')
+                    if os.path.exists(original_file):
+                        os.unlink(original_file)
         return StreamingResponse(event_stream(), media_type="text/event-stream")
     except Exception as e:
             "error": str(e)
         }
+@app.post("/test-convert")
+async def test_audio_conversion(request: AudioRequest):
+    """测试音频格式转换功能"""
+    try:
+        # 解码音频
+        audio_file = decode_audio(request.audio)
+        logger.info(f"原始音频文件: {audio_file}")
+        # 检查是否需要转换
+        if not audio_file.endswith(('.wav', '.flac', '.mp3', '.ogg')):
+            logger.info("需要转换格式")
+            converted_file = await convert_audio_to_wav(audio_file)
+            # 获取转换后的文件信息
+            converted_size = os.path.getsize(converted_file)
+            # 清理文件
+            os.unlink(converted_file)
+            return {
+                "status": "success",
+                "message": "音频转换成功",
+                "original_file": audio_file,
+                "converted_file": converted_file,
+                "converted_size": converted_size,
+                "conversion_needed": True
+            }
+        else:
+            # 清理文件
+            os.unlink(audio_file)
+            return {
+                "status": "success",
+                "message": "音频格式已支持，无需转换",
+                "original_file": audio_file,
+                "conversion_needed": False
+            }
+    except Exception as e:
+        return {
+            "status": "error",
+            "error": str(e)
+        }
 @app.get("/")
 async def root():
     """根路径"""
             "transcribe": "/transcribe",
             "test": "/test",
             "test-audio": "/test-audio",
+            "test-convert": "/test-convert",
             "test-transcribe": "/test-transcribe"
         }
     }