Spaces:

1een
/

whisper

Running

App Files Files Community

1een commited on Jul 24, 2025

Commit

d49166f

1 Parent(s): 705e333

9

Browse files

Files changed (3) hide show

README.md +19 -12
fixed_app.py +11 -15
startup.sh +1 -1

README.md CHANGED Viewed

@@ -35,13 +35,19 @@ docker run -p 7860:7860 -v /your/model/dir:/models whisper-api
 ## API 使用
-### 转录接口（流式SSE输出）
 ```bash
 POST /transcribe
 ```
-- 支持参数：audio（base64）、model、language、beam_size、temperature等
-- 返回：Server-Sent Events (SSE)，每个分段为一条JSON
 ### 示例请求
@@ -49,18 +55,19 @@ POST /transcribe
 {
   "audio": "data:audio/wav;base64,...",
   "model": "base",
-  "language": "zh"
 }
 ```
-### 示例响应（流式SSE）
-```
-data: {"id":0,"start":0.0,"end":2.5,"text":"片段文本"}
-data: {"id":1,...}
-...
 ```
 ## 主要特性

 ## API 使用
 ```bash
 POST /transcribe
 ```
+- 支持参数：
+  - audio（base64，必填）
+  - model（如 tiny/base/small/...，可选，默认 tiny）
+  - language（如 zh/en/auto，可选，默认 zh）
+  - temperature（float，可选，默认0.0）
+  - beam_size（int，可选，默认1）
+  - fast_mode（bool，可选，默认true，极致加速）
+  - vad（bool，可选，默认false，启用端点检测）
+  - threads（int，可选，默认4，推理线程数）
 ### 示例请求
 {
   "audio": "data:audio/wav;base64,...",
   "model": "base",
+  "language": "zh",
+  "vad": true,
+  "threads": 8
 }
 ```
+### 返回内容包含：
+```json
+{
+  "full_text"：完整转写文本
+  "processing_time"：处理耗时（秒）
+  "cmd"：实际调用的whisper命令行
+}
 ```
 ## 主要特性

fixed_app.py CHANGED Viewed

@@ -30,6 +30,7 @@ class AudioRequest(BaseModel):
     fast_mode: Optional[bool] = True  # 快速模式
     vad: Optional[bool] = False
     threads: Optional[int] = 4
 def load_model(model_name: str):
     """确保模型文件存在，返回模型路径"""
@@ -61,24 +62,19 @@ def load_model(model_name: str):
     logger.error(f"找不到模型 {model_name}，请确保模型文件存在")
     raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
-async def convert_audio_to_wav(input_file: str) -> str:
-    """使用ffmpeg将音频文件转换为WAV格式"""
     try:
         # 创建输出文件路径
         output_file = input_file.rsplit('.', 1)[0] + '_converted.wav'
-        # 构建ffmpeg命令
-        cmd = [
-            'ffmpeg',
-            '-i', input_file,           # 输入文件
-            '-ar', '16000',             # 采样率：16kHz（whisper推荐）
-            '-ac', '1',                 # 声道数：单声道
-            '-acodec', 'pcm_s16le',     # 音频编码器：16位PCM
-            '-y',                       # 覆盖输出文件
-            output_file
-        ]
-        logger.info(f"开始音频转换: {' '.join(cmd)}")
         # 执行ffmpeg命令
         proc = await asyncio.create_subprocess_exec(
@@ -102,7 +98,7 @@ async def convert_audio_to_wav(input_file: str) -> str:
         if os.path.exists(input_file):
             os.unlink(input_file)
-        logger.info(f"音频转换成功: {output_file}, 大小: {os.path.getsize(output_file)} 字节")
         return output_file
     except HTTPException:
@@ -244,7 +240,7 @@ async def transcribe_audio(request: AudioRequest):
         supported_formats = ('.wav', '.flac', '.mp3', '.ogg')
         if not audio_file.endswith(supported_formats):
             logger.info(f"音频格式不直接支持，将转换为WAV: {audio_file}")
-            audio_file = await convert_audio_to_wav(audio_file)
         # 创建临时目录用于输出
         temp_dir = tempfile.mkdtemp()

     fast_mode: Optional[bool] = True  # 快速模式
     vad: Optional[bool] = False
     threads: Optional[int] = 4
+    atempo: Optional[float] = 1.0
 def load_model(model_name: str):
     """确保模型文件存在，返回模型路径"""
     logger.error(f"找不到模型 {model_name}，请确保模型文件存在")
     raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
+async def convert_audio_to_wav(input_file: str, atempo: float = 1.0) -> str:
+    """使用ffmpeg将音频文件转换为WAV格式，支持atempo变速"""
     try:
         # 创建输出文件路径
         output_file = input_file.rsplit('.', 1)[0] + '_converted.wav'
+        # 构建ffmpeg命令 采样率：16kHz 单声道 音频编码器：16位PCM
+        if atempo != 1.0: # 使用ffmpeg的atempo滤镜进行变速
+            cmd = f"ffmpeg -i {input_file} -ar 16000 -ac 1 -c:a pcm_s16le -filter:a \"atempo={atempo}\" -y {output_file}"
+        else: # 如果atempo为1.0，则直接转换为WAV格式
+            cmd = f"ffmpeg -i {input_file} -ar 16000 -ac 1 -c:a pcm_s16le -y {output_file}"
+        logger.info(f"开始音频转换: {cmd}")
         # 执行ffmpeg命令
         proc = await asyncio.create_subprocess_exec(
         if os.path.exists(input_file):
             os.unlink(input_file)
+        logger.info(f"音频转换成功: {output_file}, 大小: {os.path.getsize(output_file)} 字节, 时长: {os.path.getsize(output_file) / 16000} 秒")
         return output_file
     except HTTPException:
         supported_formats = ('.wav', '.flac', '.mp3', '.ogg')
         if not audio_file.endswith(supported_formats):
             logger.info(f"音频格式不直接支持，将转换为WAV: {audio_file}")
+            audio_file = await convert_audio_to_wav(audio_file, request.atempo)
         # 创建临时目录用于输出
         temp_dir = tempfile.mkdtemp()

startup.sh CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/bin/bash
 # 显示环境信息
-echo "=== Whisper API Startup 0.8==="
 echo "Python version: $(python3 --version)"
 echo "Current directory: $(pwd)"
 # echo "Files in /app:"

 #!/bin/bash
 # 显示环境信息
+echo "=== Whisper API Startup 0.9==="
 echo "Python version: $(python3 --version)"
 echo "Current directory: $(pwd)"
 # echo "Files in /app:"