Spaces:

1een
/

whisper

Running

App Files Files Community

1een commited on Jul 24, 2025

Commit

705e333

1 Parent(s): 6a589e7

8

Browse files

Files changed (3) hide show

app.py +0 -329
fixed_app.py +28 -22
startup.sh +1 -1

app.py DELETED Viewed

@@ -1,329 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-import base64
-import io
-import tempfile
-import os
-import requests
-from typing import Optional, List, Dict, Any
-import logging
-from urllib.parse import urlparse
-import time
-from fastapi.responses import StreamingResponse
-import subprocess
-import asyncio
-# 设置缓存目录
-os.environ['XDG_CACHE_HOME'] = '/app/.cache'
-# os.environ['TORCH_HOME'] = '/app/.cache/torch'
-# 确保缓存目录存在
-os.makedirs('/app/.cache', exist_ok=True)
-# os.makedirs('/app/.cache/torch', exist_ok=True)
-# 配置日志
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = FastAPI(title="Whisper API", version="1.0.0")
-# 启动事件：预加载模型
-@app.on_event("startup")
-async def startup_event():
-    """应用启动时的初始化操作"""
-    logger.info("Starting Whisper API...")
-    try:
-        # 不在启动时预加载模型，改为按需加载以避免启动阻塞
-        logger.info("Whisper API ready - models will be loaded on demand")
-    except Exception as e:
-        logger.error(f"Startup warning: {e}")
-        # 继续启动，不因为模型加载失败而阻塞
-    logger.info("Whisper API startup complete")
-# 全局变量存储模型
-models = {}
-# 预加载模型列表
-PRELOAD_MODELS = ["tiny", "base", "small"]
-class AudioRequest(BaseModel):
-    audio: str  # base64 编码的音频数据
-    model: str = "base"  # 改为small模型，准确度更高
-    language: Optional[str] = "zh"  # 默认中文
-    task: Optional[str] = "transcribe"
-    temperature: Optional[float] = 0.0 # 温度越高，生成文本的随机性越大，温度越低，生成文本的随机性越小
-    word_timestamps: Optional[bool] = False  # 默认关闭词级时间戳
-    # output_format: str = "text"    # 支持 json 或 text
-    compression_ratio_threshold: Optional[float] = 2.4 # 压缩比阈值，用于过滤掉低质量的片段
-    logprob_threshold: Optional[float] = -1.0 # 对数概率阈值，用于过滤掉低质量的片段
-    no_speech_threshold: Optional[float] = 0.6 # 无语音阈值，用于过滤掉无语音的片段
-    device: Optional[str] = None
-    fp16: Optional[bool] = False  # CPU 默认关闭 fp16
-    beam_size: Optional[int] = 1  # 默认束搜索为1
-    condition_on_previous_text: Optional[bool] = False  # 默认关闭上下文
-def get_device():
-    return "cpu"
-def load_model(model_name: str):
-    """确保模型文件存在，返回模型路径"""
-    # 检查多个可能的模型路径
-    possible_paths = [
-        f"/app/models/ggml-{model_name}.bin",
-        f"/app/models/{model_name}.bin",
-        f"/app/models/for-tests-ggml-{model_name}.bin",
-        f"/models/ggml-{model_name}.bin",
-        f"/models/{model_name}.bin"
-    ]
-    # 检查是否有任何一个路径存在
-    for path in possible_paths:
-        if os.path.exists(path):
-            logger.info(f"找到模型: {path}")
-            return path
-    # 如果没有找到，使用测试模型
-    test_model = "/app/models/for-tests-ggml-base.bin"
-    if os.path.exists(test_model):
-        logger.info(f"使用测试模型: {test_model}")
-        return test_model
-    # 如果连测试模型都没有，报错
-    logger.error(f"找不到模型 {model_name}，请确保模型文件存在")
-    raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
-def preload_models():
-    """启动时预加载模型"""
-    # device = get_device()
-    # logger.info(f"预加载模型到设备: {device}")
-    total_start_time = time.time()
-    for model_name in PRELOAD_MODELS:
-        try:
-            model_start_time = time.time()
-            logger.info(f"开始预加载模型: {model_name}")
-            load_model(model_name)
-            model_load_time = time.time() - model_start_time
-            logger.info(f"模型 {model_name} 预加载成功，耗时: {model_load_time:.2f}秒")
-        except Exception as e:
-            logger.error(f"模型 {model_name} 预加载失败: {e}")
-            # 继续加载其他模型，不中断程序启动
-    total_time = time.time() - total_start_time
-    logger.info(f"所有模型预加载完成，总耗时: {total_time:.2f}秒")
-class TranscriptionProgressLogger:
-    """转录进度日志记录器"""
-    def __init__(self, request_id: str = None):
-        self.request_id = request_id or str(int(time.time()))
-        self.start_time = time.time()
-        self.segment_count = 0
-        self.last_segment_time = self.start_time
-        self.segments_info = []
-    def log_start(self, audio_duration: float = None):
-        """记录转录开始"""
-        if audio_duration:
-            logger.info(f"[{self.request_id}] 开始转录 - 音频时长: {audio_duration:.2f}秒")
-        else:
-            logger.info(f"[{self.request_id}] 开始转录音频")
-    def log_segment_progress(self, segment_id: int, start_time: float, end_time: float, text: str):
-        """记录片段转录进度"""
-        self.segment_count += 1
-        current_time = time.time()
-        # 计算从上一个片段到现在的时间
-        segment_processing_time = current_time - self.last_segment_time
-        self.last_segment_time = current_time
-        # 计算总耗时
-        total_elapsed = current_time - self.start_time
-        # 存储片段信息
-        self.segments_info.append({
-            "id": segment_id,
-            "start": start_time,
-            "end": end_time,
-            "duration": end_time - start_time,
-            "processing_time": segment_processing_time
-        })
-        # 计算实时速度比（音频时长与处理时间的比值）
-        segment_duration = end_time - start_time
-        speed_ratio = segment_duration / segment_processing_time if segment_processing_time > 0 else 0
-        # 记录日志
-        logger.info(
-            f"[{self.request_id}] 片段 {segment_id}/{self.segment_count} "
-            f"({start_time:.1f}s-{end_time:.1f}s, 时长:{segment_duration:.1f}s): "
-            f"'{text[:30]}{'...' if len(text) > 30 else ''}' "
-            f"(处理耗时: {segment_processing_time:.2f}s, 速度比: {speed_ratio:.1f}x, 总耗时: {total_elapsed:.2f}s)"
-        )
-    def log_completion(self, total_segments: int, total_text_length: int):
-        """记录转录完成"""
-        elapsed = time.time() - self.start_time
-        # 计算总音频时长
-        total_audio_duration = sum(segment["duration"] for segment in self.segments_info) if self.segments_info else 0
-        # 计算平均速度比
-        avg_speed_ratio = total_audio_duration / elapsed if elapsed > 0 else 0
-        # 计算每秒处理的文本量
-        text_per_second = total_text_length / elapsed if elapsed > 0 else 0
-        logger.info(
-            f"[{self.request_id}] 转录完成 - "
-            f"总片段: {total_segments}, "
-            f"文本长度: {total_text_length}字符, "
-            f"音频时长: {total_audio_duration:.2f}秒, "
-            f"处理耗时: {elapsed:.2f}秒, "
-            f"平均速度比: {avg_speed_ratio:.1f}x, "
-            f"处理速度: {text_per_second:.1f}字/秒"
-        )
-def decode_audio(audio_base64: str) -> tuple:
-    """解码base64音频数据并保存为临时文件，返回文件路径和音频大小"""
-    try:
-        # 移除data URL前缀（如果存在）
-        if "," in audio_base64:
-            audio_base64 = audio_base64.split(",")[1]
-        # 解码base64
-        start_time = time.time()
-        audio_data = base64.b64decode(audio_base64)
-        decode_time = time.time() - start_time
-        # 获取音频大小（字节）
-        audio_size = len(audio_data)
-        logger.info(f"音频解码完成: {audio_size/1024:.2f} KB, 耗时: {decode_time:.2f}s")
-        # 创建临时文件
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
-            temp_file.write(audio_data)
-            return temp_file.name
-    except Exception as e:
-        logger.error(f"音频解码失败: {str(e)}")
-        raise HTTPException(status_code=400, detail=f"Invalid audio data: {str(e)}")
-@app.post("/transcribe")
-async def transcribe_audio(request: AudioRequest):
-    """音频转录API，异步调用 whisper.cpp 并流式返回分段结果"""
-    try:
-        # 解码音频并保存为临时文件
-        audio_file = decode_audio(request.audio)
-        model_path = load_model(request.model)  # 确保模型存在
-        # 检查whisper.cpp二进制路径
-        whisper_binary = "/app/build/bin/main"
-        if not os.path.exists(whisper_binary):
-            # 尝试其他可能的路径
-            possible_binaries = [
-                "/app/main",
-                "/usr/local/bin/whisper",
-                "/usr/local/bin/whisper.cpp"
-            ]
-            for binary in possible_binaries:
-                if os.path.exists(binary):
-                    whisper_binary = binary
-                    break
-        logger.info(f"使用whisper二进制: {whisper_binary}")
-        logger.info(f"使用模型: {model_path}")
-        cmd = [
-            whisper_binary,  # whisper.cpp 主程序路径
-            "-m", model_path,
-            "-f", audio_file,
-            "-l", request.language or "zh",
-            "--output-json",
-            "--print-progress",
-            "--split-on-word",
-            "-t", str(os.cpu_count() or 1),
-        ]
-    except Exception as e:
-        logger.error(f"准备转录失败: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to prepare transcription: {str(e)}")
-    # 添加可选参数
-    if request.beam_size:
-        cmd += ["--beam-size", str(request.beam_size)]
-    if request.temperature:
-        cmd += ["--temperature", str(request.temperature)]
-    # 其���参数可按需添加
-    async def event_stream():
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.STDOUT,
-        )
-        try:
-            async for line in proc.stdout:
-                line = line.decode().strip()
-                if line.startswith("{"):
-                    yield f"data: {line}\n\n"
-            await proc.wait()
-        finally:
-            # 清理临时文件
-            if os.path.exists(audio_file):
-                os.unlink(audio_file)
-    return StreamingResponse(event_stream(), media_type="text/event-stream")
-@app.get("/health")
-async def health_check():
-    """健康检查"""
-    try:
-        # 检查whisper.cpp二进制是否存在
-        whisper_binary = "/app/build/bin/main"
-        binary_exists = os.path.exists(whisper_binary)
-        # 检查模型目录
-        model_dirs = ["/app/models", "/models"]
-        model_files = []
-        for dir_path in model_dirs:
-            if os.path.exists(dir_path):
-                try:
-                    model_files.extend([f"{dir_path}/{f}" for f in os.listdir(dir_path) if f.endswith(".bin")])
-                except:
-                    pass
-        return {
-            "status": "healthy",
-            "whisper_binary": whisper_binary,
-            "binary_exists": binary_exists,
-            "model_dirs": {dir_path: os.path.exists(dir_path) for dir_path in model_dirs},
-            "available_models": model_files
-        }
-    except Exception as e:
-        return {
-            "status": "error",
-            "error": str(e)
-        }
-@app.get("/models")
-async def list_models():
-    """列出可用模型"""
-    return {
-        "models": ["tiny", "base", "small", "medium", "large", "turbo"]
-    }
-@app.get("/")
-async def root():
-    """根路径"""
-    return {
-        "message": "Whisper API is running",
-        "version": "1.0.0",
-        "endpoints": {
-            "health": "/health",
-            "models": "/models",
-            "transcribe": "/transcribe"
-        }
-    }
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

fixed_app.py CHANGED Viewed

@@ -8,7 +8,6 @@ from typing import Optional
 import logging
 import time
 import asyncio
-import shutil
 # 设置缓存目录
 os.environ['XDG_CACHE_HOME'] = '/app/.cache'
@@ -202,6 +201,28 @@ def parse_whisper_output(output_file: str, stdout: bytes, exit_code: int) -> dic
         }
     return result
 @app.post("/transcribe")
 async def transcribe_audio(request: AudioRequest):
     """音频转录API，异步调用 whisper.cpp 并返回转录结果"""
@@ -254,7 +275,7 @@ async def transcribe_audio(request: AudioRequest):
                 whisper_binary,
                 "-m", model_path,
                 "-f", audio_file,
-                "-l", request.language or "zh",
                 "-oj",  # --output-json: 输出JSON格式
                 "-of", output_file,  # 指定输出文件
                 "-t", str(request.threads),  # 使用所有CPU核心
@@ -272,8 +293,6 @@ async def transcribe_audio(request: AudioRequest):
         if request.temperature:
             cmd += ["-tp", str(request.temperature)]  # --temperature 的简写
-        # logger.info(f"完整命令: {' '.join(cmd)}")
         try:
             # 执行命令
             start_time = time.time()
@@ -300,9 +319,9 @@ async def transcribe_audio(request: AudioRequest):
                 logger.warning("输出包含非UTF-8字符，已替换")
             # 记录输出日志
-            for line in output_text.splitlines():
-                if line.strip():
-                    logger.info(f"whisper输出: {line.strip()}")
             # 检查退出码
             exit_code = proc.returncode
@@ -312,6 +331,7 @@ async def transcribe_audio(request: AudioRequest):
             # 读取JSON输出文件
             result = parse_whisper_output(output_file, stdout, exit_code)
             result["processing_time"] = f"{processing_time:.2f}"
             return result
@@ -329,21 +349,7 @@ async def transcribe_audio(request: AudioRequest):
             raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
         finally:
             # 清理临时文件
-            if os.path.exists(audio_file):
-                os.unlink(audio_file)
-            # 如果有转换后的文件，也要清理
-            if audio_file.endswith('_converted.wav'):
-                original_file = audio_file.replace('_converted.wav', '.m4a')
-                if os.path.exists(original_file):
-                    os.unlink(original_file)
-            # 清理输出文件
-            json_output_file = output_file + ".json"
-            if os.path.exists(json_output_file):
-                os.unlink(json_output_file)
-            # 清理临时目录
-            if os.path.exists(temp_dir):
-                import shutil
-                shutil.rmtree(temp_dir, ignore_errors=True)
     except Exception as e:
         logger.error(f"转录失败: {e}")
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")

 import logging
 import time
 import asyncio
 # 设置缓存目录
 os.environ['XDG_CACHE_HOME'] = '/app/.cache'
         }
     return result
+def cleanup_temp_files(audio_file, output_file, temp_dir):
+    """清理音频、输出文件和临时目录"""
+    try:
+        # 删除音频文件
+        if audio_file and os.path.exists(audio_file):
+            os.unlink(audio_file)
+        # 删除转换后的文件（如 _converted.wav）
+        if audio_file and audio_file.endswith('_converted.wav'):
+            original_file = audio_file.replace('_converted.wav', '.m4a')
+            if os.path.exists(original_file):
+                os.unlink(original_file)
+        # 删除输出JSON文件
+        json_output_file = output_file + ".json"
+        if os.path.exists(json_output_file):
+            os.unlink(json_output_file)
+        # 删除临时目录
+        if temp_dir and os.path.exists(temp_dir):
+            import shutil
+            shutil.rmtree(temp_dir, ignore_errors=True)
+    except Exception as e:
+        logger.warning(f"清理临时文件时出错: {e}")
 @app.post("/transcribe")
 async def transcribe_audio(request: AudioRequest):
     """音频转录API，异步调用 whisper.cpp 并返回转录结果"""
                 whisper_binary,
                 "-m", model_path,
                 "-f", audio_file,
+                "-l", request.language or "auto",
                 "-oj",  # --output-json: 输出JSON格式
                 "-of", output_file,  # 指定输出文件
                 "-t", str(request.threads),  # 使用所有CPU核心
         if request.temperature:
             cmd += ["-tp", str(request.temperature)]  # --temperature 的简写
         try:
             # 执行命令
             start_time = time.time()
                 logger.warning("输出包含非UTF-8字符，已替换")
             # 记录输出日志
+            # for line in output_text.splitlines():
+            #     if line.strip():
+            #         logger.info(f"whisper输出: {line.strip()}")
             # 检查退出码
             exit_code = proc.returncode
             # 读取JSON输出文件
             result = parse_whisper_output(output_file, stdout, exit_code)
             result["processing_time"] = f"{processing_time:.2f}"
+            result["cmd"] = " ".join(cmd)
             return result
             raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
         finally:
             # 清理临时文件
+            cleanup_temp_files(audio_file, output_file, temp_dir)
     except Exception as e:
         logger.error(f"转录失败: {e}")
         raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")

startup.sh CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/bin/bash
 # 显示环境信息
-echo "=== Whisper API Startup 0.7==="
 echo "Python version: $(python3 --version)"
 echo "Current directory: $(pwd)"
 # echo "Files in /app:"

 #!/bin/bash
 # 显示环境信息
+echo "=== Whisper API Startup 0.8==="
 echo "Python version: $(python3 --version)"
 echo "Current directory: $(pwd)"
 # echo "Files in /app:"