File size: 16,229 Bytes
2961dc7 1d36f6f 2961dc7 c87bc33 2961dc7 c87bc33 3bced8d 6365211 d49166f 2961dc7 9adc6b6 2961dc7 1a4a00d 2961dc7 d49166f d2ebecd d49166f d446107 f45b0e7 d446107 d2ebecd d446107 d2ebecd f45b0e7 d2ebecd 2961dc7 2fc4687 2961dc7 2fc4687 2961dc7 017b2c6 2961dc7 2fc4687 017b2c6 fba960c 2fc4687 2961dc7 f8947d1 8bbc7d1 f8947d1 6a589e7 f8947d1 8bbc7d1 f8947d1 705e333 2961dc7 1d36f6f 2961dc7 2281cd2 2961dc7 2281cd2 2961dc7 1d36f6f 97315e2 2961dc7 d2ebecd d49166f d2ebecd 1d36f6f c87bc33 60bc93d 6a589e7 c87bc33 6365211 648bfa5 c87bc33 6365211 3bced8d c87bc33 705e333 c87bc33 6365211 c87bc33 2961dc7 c87bc33 97315e2 2961dc7 1d36f6f 6a589e7 2961dc7 3972a4f f8947d1 6a589e7 f8947d1 1d36f6f 705e333 1d36f6f f8947d1 b7bc2ec 705e333 1d36f6f f8947d1 1d36f6f f8947d1 1d36f6f 705e333 2961dc7 97315e2 2961dc7 3e0478e 2961dc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import base64
import tempfile
import os
import json
from typing import Optional
import logging
import time
import asyncio
# 设置缓存目录
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
# 确保缓存目录存在
os.makedirs('/app/.cache', exist_ok=True)
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Whisper API", version="1.0.0")
class AudioRequest(BaseModel):
audio: str # base64 编码的音频数据
model: str = "tiny" # 默认使用tiny模型以提高速度
language: Optional[str] = "zh" # 默认中文
temperature: Optional[float] = 0.0
beam_size: Optional[int] = 1
fast_mode: Optional[bool] = True # 快速模式
vad: Optional[bool] = False
threads: Optional[int] = 4
atempo: Optional[float] = 1.0
def load_model(model_name: str):
"""确保模型文件存在,返回模型路径"""
# 检查多个可能的模型路径
possible_paths = [
f"/app/models/ggml-{model_name}.bin",
f"/app/models/for-tests-ggml-{model_name}.bin"
]
# 检查是否有任何一个路径存在
for path in possible_paths:
if os.path.exists(path):
logger.info(f"找到模型: {path}")
return path
# 如果没有找到,使用测试模型
test_models = [
"/app/models/for-tests-ggml-base.bin",
"/app/models/ggml-base.en.bin",
"/app/models/for-tests-ggml-tiny.bin"
]
for test_model in test_models:
if os.path.exists(test_model):
logger.info(f"使用测试模型: {test_model}")
return test_model
# 如果连测试模型都没有,报错
logger.error(f"找不到模型 {model_name},请确保模型文件存在")
raise HTTPException(status_code=500, detail=f"Model {model_name} not found")
async def convert_audio_to_wav(input_file: str, atempo: float = 1.0) -> str:
"""使用ffmpeg将音频文件转换为WAV格式,支持atempo变速"""
try:
# 创建输出文件路径
output_file = input_file.rsplit('.', 1)[0] + '_converted.wav'
# 构建ffmpeg命令 采样率:16kHz 单声道 音频编码器:16位PCM
cmd = [
"ffmpeg",
"-i", input_file,
"-ar", "16000",
"-ac", "1",
"-c:a", "pcm_s16le",
]
if atempo != 1.0:
cmd += ["-filter:a", f"atempo={atempo}"]
cmd += [
"-y",
output_file
]
logger.info(f"开始音频转换: {' '.join(cmd)}")
# 执行ffmpeg命令
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
error_msg = stderr.decode() if stderr else "Unknown ffmpeg error"
logger.error(f"音频转换失败: {error_msg}")
raise HTTPException(status_code=500, detail=f"Audio conversion failed: {error_msg}")
# 验证输出文件是否存在
if not os.path.exists(output_file):
raise HTTPException(status_code=500, detail="Converted audio file not found")
# 删除原始文件
if os.path.exists(input_file):
os.unlink(input_file)
# 采样率16kHz,单声道,16位=2字节
file_size = os.path.getsize(output_file)
duration_sec = file_size / (16000 * 2 * 1) # 采样率*字节数*声道数
logger.info(f"音频转换成功: {output_file}, 大小: {file_size} 字节, 时长: {duration_sec:.2f} 秒")
return output_file
except HTTPException:
raise
except Exception as e:
logger.error(f"音频转换过程中出错: {e}")
raise HTTPException(status_code=500, detail=f"Audio conversion error: {str(e)}")
def decode_audio(audio_base64: str) -> str:
"""解码base64音频数据并保存为临时文件,返回文件路径"""
try:
# 移除data URL前缀(如果存在)
if "," in audio_base64:
parts = audio_base64.split(",", 1)
mime_type = parts[0] if len(parts) > 1 else ""
audio_base64 = parts[1] if len(parts) > 1 else parts[0]
logger.info(f"检测到MIME类型: {mime_type}")
# 解码base64
try:
audio_data = base64.b64decode(audio_base64)
logger.info(f"成功解码音频数据,大小: {len(audio_data)} 字节")
except Exception as e:
logger.error(f"Base64解码失败: {e}")
raise HTTPException(status_code=400, detail=f"Invalid base64 data: {str(e)}")
# 检测音频格式
file_extension = ".wav" # 默认
if len(audio_data) >= 12:
header = audio_data[:12]
if header[:4] == b'RIFF' and header[8:12] == b'WAVE':
file_extension = ".wav"
logger.info("检测到WAV格式")
elif b'ftyp' in header and b'M4A' in header:
file_extension = ".m4a"
logger.info("检测到M4A格式")
elif header[:3] == b'ID3' or header[:2] == b'\xff\xfb':
file_extension = ".mp3"
logger.info("检测到MP3格式")
elif header[:4] == b'OggS':
file_extension = ".ogg"
logger.info("检测到OGG格式")
elif header[:4] == b'fLaC':
file_extension = ".flac"
logger.info("检测到FLAC格式")
else:
logger.warning(f"未知音频格式,文件头: {header.hex()}")
# 创建临时文件,使用检测到的扩展名
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension, mode="wb") as temp_file:
temp_file.write(audio_data)
temp_path = temp_file.name
# 确保文件可读
os.chmod(temp_path, 0o644)
# 验证文件是否存在且可读
if not os.path.exists(temp_path):
raise HTTPException(status_code=500, detail="Failed to create temporary audio file")
logger.info(f"音频文件已保存到: {temp_path}, 大小: {os.path.getsize(temp_path)} 字节, 格式: {file_extension}")
# 检查格式兼容性
supported_formats = [".wav", ".flac", ".mp3", ".ogg"]
if file_extension not in supported_formats:
logger.warning(f"音频格式 {file_extension} 可能不被whisper-cli支持,支持的格式: {supported_formats}")
return temp_path
except HTTPException:
raise
except Exception as e:
logger.error(f"音频解码失败: {str(e)}")
raise HTTPException(status_code=400, detail=f"Invalid audio data: {str(e)}")
def parse_whisper_output(output_file: str, stdout: bytes, exit_code: int) -> dict:
"""解析whisper输出文件,如果有JSON则读取,否则返回stdout内容"""
json_output_file = output_file + ".json"
result = {}
if os.path.exists(json_output_file):
try:
with open(json_output_file, 'r', encoding='utf-8', errors='replace') as f:
result = json.loads(f.read())
result["full_text"] = "".join([item["text"] for item in result.get("transcription", [])])
logger.info(f"成功读取JSON输出文件: {json_output_file}")
except Exception as e:
logger.error(f"读取JSON输出文件失败: {e}")
result = {"error": f"Failed to read JSON output: {str(e)}"}
else:
# 如果没有JSON输出,使用命令行输出
logger.warning(f"未找到JSON输出文件: {json_output_file}")
result = {
"text": stdout.decode(errors='replace'), # 使用stdout作为文本输出
"status": "completed" if exit_code == 0 else "failed",
"exit_code": exit_code
}
return result
def cleanup_temp_files(audio_file, output_file, temp_dir):
"""清理音频、输出文件和临时目录"""
try:
# 删除音频文件
if audio_file and os.path.exists(audio_file):
os.unlink(audio_file)
# 删除转换后的文件(如 _converted.wav)
if audio_file and audio_file.endswith('_converted.wav'):
original_file = audio_file.replace('_converted.wav', '.m4a')
if os.path.exists(original_file):
os.unlink(original_file)
# 删除输出JSON文件
json_output_file = output_file + ".json"
if os.path.exists(json_output_file):
os.unlink(json_output_file)
# 删除临时目录
if temp_dir and os.path.exists(temp_dir):
import shutil
shutil.rmtree(temp_dir, ignore_errors=True)
except Exception as e:
logger.warning(f"清理临时文件时出错: {e}")
@app.post("/transcribe")
async def transcribe_audio(request: AudioRequest):
"""音频转录API,异步调用 whisper.cpp 并返回转录结果"""
try:
logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
# 解码音频并保存为临时文件
audio_file = decode_audio(request.audio)
# 获取模型路径
model_path = load_model(request.model)
logger.info(f"使用模型: {model_path}")
# 检查whisper.cpp二进制路径
whisper_binary = "/app/build/bin/whisper-cli"
logger.info(f"使用whisper二进制: {whisper_binary}")
# 检查音频格式,如果不支持则转换为WAV
supported_formats = ('.wav', '.flac', '.mp3', '.ogg')
if not audio_file.endswith(supported_formats):
logger.info(f"音频格式不直接支持,将转换为WAV: {audio_file}")
audio_file = await convert_audio_to_wav(audio_file, request.atempo)
# 创建临时目录用于输出
temp_dir = tempfile.mkdtemp()
output_file = os.path.join(temp_dir, "output")
# 构建命令 - 根据fast_mode调整参数
if request.fast_mode:
# 快速模式:牺牲一些精度换取速度
cmd = [
whisper_binary,
"-m", model_path,
"-f", audio_file,
"-l", request.language or "auto",
"-oj", # --output-json: 输出JSON格式
"-of", output_file, # 指定输出文件
"-t", str(request.threads), # 使用所有CPU核心
"-bs", "1", # beam size = 1 (最快) beam search
"-bo", "1", # best of = 1 (最快) greedy
"-ac", "0", # 音频上下文 = 0 (最快)
"-nf", # --no-fallback: 禁用温度回退
"-nt", # 不打印timestamp
"--vad" if request.vad else "",
"-vm", "/app/models/ggml-silero-v5.1.2.bin" if request.vad else ""
]
else:
# 标准模式:平衡速度和精度
cmd = [
whisper_binary,
"-m", model_path,
"-f", audio_file,
"-l", request.language or "auto",
"-oj", # --output-json: 输出JSON格式
"-of", output_file, # 指定输出文件
"-t", str(request.threads), # 使用所有CPU核心
"-bs", "5", # beam size = 5 (默认)
"-bo", "5", # best of = 5 (默认)
]
# 添加可选参数(覆盖默认值)
if request.beam_size and request.beam_size != 1:
# 移除默认的-bs 1,添加用户指定的值
if "-bs" in cmd and "1" in cmd:
bs_index = cmd.index("-bs")
if bs_index + 1 < len(cmd) and cmd[bs_index + 1] == "1":
cmd[bs_index + 1] = str(request.beam_size)
if request.temperature:
cmd += ["-tp", str(request.temperature)] # --temperature 的简写
try:
# 执行命令
start_time = time.time()
logger.info(f"开始执行命令: {' '.join(cmd)}")
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
)
logger.info("whisper子进程已创建,开始等待输出")
# 设置超时时间避免无限等待
stdout, _ = await asyncio.wait_for(
proc.communicate(),
timeout=300 # 5分钟超时
)
# logger.info("whisper子进程输出已获取")
# 安全的编码解码
try:
output_text = stdout.decode('utf-8')
except UnicodeDecodeError:
# 如果UTF-8解码失败,尝试其他编码
output_text = stdout.decode('utf-8', errors='replace')
logger.warning("输出包含非UTF-8字符,已替换")
# 记录输出日志
# for line in output_text.splitlines():
# if line.strip():
# logger.info(f"whisper输出: {line.strip()}")
# 检查退出码
exit_code = proc.returncode
processing_time = time.time() - start_time
logger.info(f"命令执行完成,退出码: {exit_code},处理时间: {processing_time:.2f}秒")
# 读取JSON输出文件
result = parse_whisper_output(output_file, stdout, exit_code)
result["processing_time"] = f"{processing_time:.2f}"
result["cmd"] = " ".join(cmd)
return result
except asyncio.TimeoutError:
logger.error("命令执行超时")
if proc:
proc.kill()
await proc.wait()
raise HTTPException(status_code=500, detail="Command execution timed out")
except Exception as e:
logger.error(f"处理过程中出错: {e}")
if proc:
proc.kill()
await proc.wait()
raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
finally:
# 清理临时文件
cleanup_temp_files(audio_file, output_file, temp_dir)
except Exception as e:
logger.error(f"转录失败: {e}")
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
@app.get("/health")
async def health_check():
"""健康检查"""
try:
# 检查whisper.cpp二进制是否存在
whisper_binary = "/app/build/bin/whisper-cli"
binary_exists = os.path.exists(whisper_binary)
# 检查模型目录
model_dirs = ["/app/models", "/models"]
model_files = []
for dir_path in model_dirs:
if os.path.exists(dir_path):
try:
model_files.extend([f"{dir_path}/{f}" for f in os.listdir(dir_path) if f.endswith(".bin")])
except:
pass
return {
"status": "healthy",
"whisper_binary": whisper_binary,
"binary_exists": binary_exists,
"model_dirs": {dir_path: os.path.exists(dir_path) for dir_path in model_dirs},
"available_models": model_files
}
except Exception as e:
return {
"status": "error",
"error": str(e)
}
@app.get("/")
async def root():
"""根路径"""
return {
"message": "Whisper API is running",
"version": "1.0.0",
"endpoints": {
"health": "/health",
"transcribe": "/transcribe"
}
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |