re
Browse files- fixed_app.py +83 -69
- startup.sh +1 -1
fixed_app.py
CHANGED
|
@@ -3,11 +3,12 @@ from pydantic import BaseModel
|
|
| 3 |
import base64
|
| 4 |
import tempfile
|
| 5 |
import os
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
import logging
|
| 8 |
import time
|
| 9 |
-
from fastapi.responses import StreamingResponse
|
| 10 |
import asyncio
|
|
|
|
| 11 |
|
| 12 |
# 设置缓存目录
|
| 13 |
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
|
|
@@ -180,7 +181,7 @@ def decode_audio(audio_base64: str) -> str:
|
|
| 180 |
|
| 181 |
@app.post("/transcribe")
|
| 182 |
async def transcribe_audio(request: AudioRequest):
|
| 183 |
-
"""音频转录API,异步调用 whisper.cpp 并
|
| 184 |
try:
|
| 185 |
logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
|
| 186 |
|
|
@@ -191,7 +192,7 @@ async def transcribe_audio(request: AudioRequest):
|
|
| 191 |
model_path = load_model(request.model)
|
| 192 |
logger.info(f"使用模型: {model_path}")
|
| 193 |
|
| 194 |
-
# 检查whisper.cpp二进制路径
|
| 195 |
whisper_binary = "/app/build/bin/whisper-cli"
|
| 196 |
logger.info(f"使用whisper二进制: {whisper_binary}")
|
| 197 |
|
|
@@ -201,92 +202,105 @@ async def transcribe_audio(request: AudioRequest):
|
|
| 201 |
logger.info(f"音频格式不直接支持,将转换为WAV: {audio_file}")
|
| 202 |
audio_file = await convert_audio_to_wav(audio_file)
|
| 203 |
|
| 204 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
cmd = [
|
| 206 |
whisper_binary,
|
| 207 |
"-m", model_path,
|
| 208 |
"-f", audio_file,
|
| 209 |
"-l", request.language or "zh",
|
| 210 |
"-oj", # --output-json: 输出JSON格式
|
| 211 |
-
"-
|
| 212 |
"-t", str(os.cpu_count() or 1),
|
| 213 |
]
|
| 214 |
|
| 215 |
-
# 记录完整命令
|
| 216 |
-
# /app/build/bin/whisper-cli -m /app/models/ggml-tiny.bin -f /tmp/tmpx04yuy50_converted.wav -l zh -oj -pp -t 16
|
| 217 |
-
logger.info(f"完整命令: {' '.join(cmd)}")
|
| 218 |
-
|
| 219 |
# 添加可选参数
|
| 220 |
if request.beam_size:
|
| 221 |
cmd += ["-bs", str(request.beam_size)] # --beam-size 的简写
|
| 222 |
if request.temperature:
|
| 223 |
cmd += ["-tp", str(request.temperature)] # --temperature 的简写
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
proc = await asyncio.create_subprocess_exec(
|
| 228 |
*cmd,
|
| 229 |
stdout=asyncio.subprocess.PIPE,
|
| 230 |
stderr=asyncio.subprocess.STDOUT,
|
| 231 |
)
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
except Exception as e:
|
| 291 |
logger.error(f"转录失败: {e}")
|
| 292 |
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
|
|
|
| 3 |
import base64
|
| 4 |
import tempfile
|
| 5 |
import os
|
| 6 |
+
import json
|
| 7 |
from typing import Optional
|
| 8 |
import logging
|
| 9 |
import time
|
|
|
|
| 10 |
import asyncio
|
| 11 |
+
import shutil
|
| 12 |
|
| 13 |
# 设置缓存目录
|
| 14 |
os.environ['XDG_CACHE_HOME'] = '/app/.cache'
|
|
|
|
| 181 |
|
| 182 |
@app.post("/transcribe")
|
| 183 |
async def transcribe_audio(request: AudioRequest):
|
| 184 |
+
"""音频转录API,异步调用 whisper.cpp 并返回转录结果"""
|
| 185 |
try:
|
| 186 |
logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
|
| 187 |
|
|
|
|
| 192 |
model_path = load_model(request.model)
|
| 193 |
logger.info(f"使用模型: {model_path}")
|
| 194 |
|
| 195 |
+
# 检查whisper.cpp二进制路径
|
| 196 |
whisper_binary = "/app/build/bin/whisper-cli"
|
| 197 |
logger.info(f"使用whisper二进制: {whisper_binary}")
|
| 198 |
|
|
|
|
| 202 |
logger.info(f"音频格式不直接支持,将转换为WAV: {audio_file}")
|
| 203 |
audio_file = await convert_audio_to_wav(audio_file)
|
| 204 |
|
| 205 |
+
# 创建临时目录用于输出
|
| 206 |
+
temp_dir = tempfile.mkdtemp()
|
| 207 |
+
output_file = os.path.join(temp_dir, "output")
|
| 208 |
+
|
| 209 |
+
# 构建命令
|
| 210 |
cmd = [
|
| 211 |
whisper_binary,
|
| 212 |
"-m", model_path,
|
| 213 |
"-f", audio_file,
|
| 214 |
"-l", request.language or "zh",
|
| 215 |
"-oj", # --output-json: 输出JSON格式
|
| 216 |
+
"-of", output_file, # 指定输出文件
|
| 217 |
"-t", str(os.cpu_count() or 1),
|
| 218 |
]
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
# 添加可选参数
|
| 221 |
if request.beam_size:
|
| 222 |
cmd += ["-bs", str(request.beam_size)] # --beam-size 的简写
|
| 223 |
if request.temperature:
|
| 224 |
cmd += ["-tp", str(request.temperature)] # --temperature 的简写
|
| 225 |
|
| 226 |
+
logger.info(f"完整命令: {' '.join(cmd)}")
|
| 227 |
+
|
| 228 |
+
try:
|
| 229 |
+
# 执行命令
|
| 230 |
+
start_time = time.time()
|
| 231 |
+
logger.info(f"开始执行命令: {' '.join(cmd)}")
|
| 232 |
proc = await asyncio.create_subprocess_exec(
|
| 233 |
*cmd,
|
| 234 |
stdout=asyncio.subprocess.PIPE,
|
| 235 |
stderr=asyncio.subprocess.STDOUT,
|
| 236 |
)
|
| 237 |
+
|
| 238 |
+
# 收集输出
|
| 239 |
+
stdout, _ = await proc.communicate()
|
| 240 |
+
output_text = stdout.decode() if stdout else ""
|
| 241 |
+
|
| 242 |
+
# 记录输出日志
|
| 243 |
+
for line in output_text.splitlines():
|
| 244 |
+
if line.strip():
|
| 245 |
+
logger.info(f"whisper输出: {line.strip()}")
|
| 246 |
+
|
| 247 |
+
# 检查退出码
|
| 248 |
+
exit_code = proc.returncode
|
| 249 |
+
processing_time = time.time() - start_time
|
| 250 |
+
logger.info(f"命令执行完成,退出码: {exit_code},处理时间: {processing_time:.2f}秒")
|
| 251 |
+
|
| 252 |
+
# 读取JSON输出文件
|
| 253 |
+
json_output_file = output_file + ".json"
|
| 254 |
+
result = {}
|
| 255 |
+
|
| 256 |
+
if os.path.exists(json_output_file):
|
| 257 |
+
try:
|
| 258 |
+
with open(json_output_file, 'r', encoding='utf-8') as f:
|
| 259 |
+
result = json.loads(f.read())
|
| 260 |
+
logger.info(f"成功读取JSON输出文件: {json_output_file}")
|
| 261 |
+
except Exception as e:
|
| 262 |
+
logger.error(f"读取JSON输出文件失败: {e}")
|
| 263 |
+
result = {"error": f"Failed to read JSON output: {str(e)}"}
|
| 264 |
+
else:
|
| 265 |
+
# 如果没有JSON输出,使用命令行输出
|
| 266 |
+
logger.warning(f"未找到JSON输出文件: {json_output_file}")
|
| 267 |
+
result = {
|
| 268 |
+
"text": output_text,
|
| 269 |
+
"status": "completed" if exit_code == 0 else "failed",
|
| 270 |
+
"exit_code": exit_code
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# 添加处理信息
|
| 274 |
+
result["processing_info"] = {
|
| 275 |
+
"model": request.model,
|
| 276 |
+
"language": request.language,
|
| 277 |
+
"exit_code": exit_code,
|
| 278 |
+
"processing_time_seconds": round(processing_time, 2),
|
| 279 |
+
"command": " ".join(cmd)
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
return result
|
| 283 |
+
|
| 284 |
+
except Exception as e:
|
| 285 |
+
logger.error(f"处理过程中出错: {e}")
|
| 286 |
+
raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
|
| 287 |
+
finally:
|
| 288 |
+
# 清理临时文件
|
| 289 |
+
if os.path.exists(audio_file):
|
| 290 |
+
os.unlink(audio_file)
|
| 291 |
+
# 如果有转换后的文件,也要清理
|
| 292 |
+
if audio_file.endswith('_converted.wav'):
|
| 293 |
+
original_file = audio_file.replace('_converted.wav', '.m4a')
|
| 294 |
+
if os.path.exists(original_file):
|
| 295 |
+
os.unlink(original_file)
|
| 296 |
+
# 清理输出文件
|
| 297 |
+
json_output_file = output_file + ".json"
|
| 298 |
+
if os.path.exists(json_output_file):
|
| 299 |
+
os.unlink(json_output_file)
|
| 300 |
+
# 清理临时目录
|
| 301 |
+
if os.path.exists(temp_dir):
|
| 302 |
+
import shutil
|
| 303 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 304 |
except Exception as e:
|
| 305 |
logger.error(f"转录失败: {e}")
|
| 306 |
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
|
startup.sh
CHANGED
|
@@ -26,4 +26,4 @@ fi
|
|
| 26 |
|
| 27 |
# 直接启动应用,不需要安装额外依赖
|
| 28 |
echo "Starting FastAPI application..."
|
| 29 |
-
python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info
|
|
|
|
| 26 |
|
| 27 |
# 直接启动应用,不需要安装额外依赖
|
| 28 |
echo "Starting FastAPI application..."
|
| 29 |
+
python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info
|