Spaces:

1een
/

whisper

Sleeping

App Files Files Community

1een commited on Jul 22, 2025

Commit

1d36f6f

1 Parent(s): 25e95ed

re

Browse files

Files changed (2) hide show

fixed_app.py +83 -69
startup.sh +1 -1

fixed_app.py CHANGED Viewed

@@ -3,11 +3,12 @@ from pydantic import BaseModel
 import base64
 import tempfile
 import os
 from typing import Optional
 import logging
 import time
-from fastapi.responses import StreamingResponse
 import asyncio
 # 设置缓存目录
 os.environ['XDG_CACHE_HOME'] = '/app/.cache'
@@ -180,7 +181,7 @@ def decode_audio(audio_base64: str) -> str:
 @app.post("/transcribe")
 async def transcribe_audio(request: AudioRequest):
-    """音频转录API，异步调用 whisper.cpp 并流式返回分段结果"""
     try:
         logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
@@ -191,7 +192,7 @@ async def transcribe_audio(request: AudioRequest):
         model_path = load_model(request.model)
         logger.info(f"使用模型: {model_path}")
-        # 检查whisper.cpp二进制路径，优先使用新的whisper-cli
         whisper_binary = "/app/build/bin/whisper-cli"
         logger.info(f"使用whisper二进制: {whisper_binary}")
@@ -201,92 +202,105 @@ async def transcribe_audio(request: AudioRequest):
             logger.info(f"音频格式不直接支持，将转换为WAV: {audio_file}")
             audio_file = await convert_audio_to_wav(audio_file)
-        # 新的whisper-cli命令格式 - 不输出到文件，直接输出到stdout
         cmd = [
             whisper_binary,
             "-m", model_path,
             "-f", audio_file,
             "-l", request.language or "zh",
             "-oj",  # --output-json: 输出JSON格式
-            "-pp",  # --print-progress: 显示进度
             "-t", str(os.cpu_count() or 1),
         ]
-        # 记录完整命令
-        # /app/build/bin/whisper-cli -m /app/models/ggml-tiny.bin -f /tmp/tmpx04yuy50_converted.wav -l zh -oj -pp -t 16
-        logger.info(f"完整命令: {' '.join(cmd)}")
         # 添加可选参数
         if request.beam_size:
             cmd += ["-bs", str(request.beam_size)]  # --beam-size 的简写
         if request.temperature:
             cmd += ["-tp", str(request.temperature)]  # --temperature 的简写
-        async def event_stream():
             proc = await asyncio.create_subprocess_exec(
                 *cmd,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.STDOUT,
             )
-            try:
-                output_buffer = []
-                async for line in proc.stdout:
-                    decoded_line = line.decode().strip()
-                    if decoded_line:  # 只处理非空行
-                        logger.info(f"whisper输出: {decoded_line}")
-                        output_buffer.append(decoded_line)
-                        # 如果是JSON格式，作为SSE事件发送
-                        if decoded_line.startswith("{") and decoded_line.endswith("}"):
-                            yield f"data: {decoded_line}\n\n"
-                        # 如果包含进度信息，发送进度更新
-                        elif any(keyword in decoded_line.lower() for keyword in ["progress", "%", "processing"]):
-                            yield f"data: {{\"progress\": \"{decoded_line}\"}}\n\n"
-                        # 发送所有输出作为调试信息
-                        else:
-                            yield f"data: {{\"debug\": \"{decoded_line}\"}}\n\n"
-                # 等待进程完成
-                exit_code = await proc.wait()
-                logger.info(f"命令执行完成，退出码: {exit_code}")
-                # 如果没有JSON输出但有其他输出，尝试读取输出文件
-                if output_buffer and not any(line.startswith("{") for line in output_buffer):
-                    # 尝试读取whisper-cli生成的JSON文件
-                    json_output_file = audio_file + ".json"
-                    if os.path.exists(json_output_file):
-                        try:
-                            with open(json_output_file, 'r', encoding='utf-8') as f:
-                                json_content = f.read()
-                                yield f"data: {json_content}\n\n"
-                            # 删除输出文件
-                            os.unlink(json_output_file)
-                        except Exception as e:
-                            logger.error(f"读取JSON输出文件失败: {e}")
-                    # 将所有输出作为文本结果返回
-                    text_result = "\n".join(output_buffer)
-                    yield f"data: {{\"text\": \"{text_result}\", \"type\": \"text_output\"}}\n\n"
-                # 发送结束信号
-                yield f"data: {{\"status\": \"completed\", \"exit_code\": {exit_code}, \"total_lines\": {len(output_buffer)}}}\n\n"
-            except Exception as e:
-                logger.error(f"处理过程中出错: {e}")
-                yield f"data: {{\"error\": \"{str(e)}\", \"status\": \"error\"}}\n\n"
-            finally:
-                # 清理临时文件
-                if os.path.exists(audio_file):
-                    os.unlink(audio_file)
-                # 如果有转换后的文件，也要清理
-                if audio_file.endswith('_converted.wav'):
-                    original_file = audio_file.replace('_converted.wav', '.m4a')
-                    if os.path.exists(original_file):
-                        os.unlink(original_file)
-        return StreamingResponse(event_stream(), media_type="text/event-stream")
     except Exception as e:
         logger.error(f"转录失败: {e}")
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")

 import base64
 import tempfile
 import os
+import json
 from typing import Optional
 import logging
 import time
 import asyncio
+import shutil
 # 设置缓存目录
 os.environ['XDG_CACHE_HOME'] = '/app/.cache'
 @app.post("/transcribe")
 async def transcribe_audio(request: AudioRequest):
+    """音频转录API，异步调用 whisper.cpp 并返回转录结果"""
     try:
         logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
         model_path = load_model(request.model)
         logger.info(f"使用模型: {model_path}")
+        # 检查whisper.cpp二进制路径
         whisper_binary = "/app/build/bin/whisper-cli"
         logger.info(f"使用whisper二进制: {whisper_binary}")
             logger.info(f"音频格式不直接支持，将转换为WAV: {audio_file}")
             audio_file = await convert_audio_to_wav(audio_file)
+        # 创建临时目录用于输出
+        temp_dir = tempfile.mkdtemp()
+        output_file = os.path.join(temp_dir, "output")
+        # 构建命令
         cmd = [
             whisper_binary,
             "-m", model_path,
             "-f", audio_file,
             "-l", request.language or "zh",
             "-oj",  # --output-json: 输出JSON格式
+            "-of", output_file,  # 指定输出文件
             "-t", str(os.cpu_count() or 1),
         ]
         # 添加可选参数
         if request.beam_size:
             cmd += ["-bs", str(request.beam_size)]  # --beam-size 的简写
         if request.temperature:
             cmd += ["-tp", str(request.temperature)]  # --temperature 的简写
+        logger.info(f"完整命令: {' '.join(cmd)}")
+        try:
+            # 执行命令
+            start_time = time.time()
+            logger.info(f"开始执行命令: {' '.join(cmd)}")
             proc = await asyncio.create_subprocess_exec(
                 *cmd,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.STDOUT,
             )
+            # 收集输出
+            stdout, _ = await proc.communicate()
+            output_text = stdout.decode() if stdout else ""
+            # 记录输出日志
+            for line in output_text.splitlines():
+                if line.strip():
+                    logger.info(f"whisper输出: {line.strip()}")
+            # 检查退出码
+            exit_code = proc.returncode
+            processing_time = time.time() - start_time
+            logger.info(f"命令执行完成，退出码: {exit_code}，处理时间: {processing_time:.2f}秒")
+            # 读取JSON输出文件
+            json_output_file = output_file + ".json"
+            result = {}
+            if os.path.exists(json_output_file):
+                try:
+                    with open(json_output_file, 'r', encoding='utf-8') as f:
+                        result = json.loads(f.read())
+                    logger.info(f"成功读取JSON输出文件: {json_output_file}")
+                except Exception as e:
+                    logger.error(f"读取JSON输出文件失败: {e}")
+                    result = {"error": f"Failed to read JSON output: {str(e)}"}
+            else:
+                # 如果没有JSON输出，使用命令行输出
+                logger.warning(f"未找到JSON输出文件: {json_output_file}")
+                result = {
+                    "text": output_text,
+                    "status": "completed" if exit_code == 0 else "failed",
+                    "exit_code": exit_code
+                }
+            # 添加处理信息
+            result["processing_info"] = {
+                "model": request.model,
+                "language": request.language,
+                "exit_code": exit_code,
+                "processing_time_seconds": round(processing_time, 2),
+                "command": " ".join(cmd)
+            }
+            return result
+        except Exception as e:
+            logger.error(f"处理过程中出错: {e}")
+            raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
+        finally:
+            # 清理临时文件
+            if os.path.exists(audio_file):
+                os.unlink(audio_file)
+            # 如果有转换后的文件，也要清理
+            if audio_file.endswith('_converted.wav'):
+                original_file = audio_file.replace('_converted.wav', '.m4a')
+                if os.path.exists(original_file):
+                    os.unlink(original_file)
+            # 清理输出文件
+            json_output_file = output_file + ".json"
+            if os.path.exists(json_output_file):
+                os.unlink(json_output_file)
+            # 清理临时目录
+            if os.path.exists(temp_dir):
+                import shutil
+                shutil.rmtree(temp_dir, ignore_errors=True)
     except Exception as e:
         logger.error(f"转录失败: {e}")
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")

startup.sh CHANGED Viewed

@@ -26,4 +26,4 @@ fi
 # 直接启动应用，不需要安装额外依赖
 echo "Starting FastAPI application..."
-python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info || python3 -m uvicorn simple_app:app --host 0.0.0.0 --port 7860 --log-level info

 # 直接启动应用，不需要安装额外依赖
 echo "Starting FastAPI application..."
+python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info