1een commited on
Commit
1d36f6f
·
1 Parent(s): 25e95ed
Files changed (2) hide show
  1. fixed_app.py +83 -69
  2. startup.sh +1 -1
fixed_app.py CHANGED
@@ -3,11 +3,12 @@ from pydantic import BaseModel
3
  import base64
4
  import tempfile
5
  import os
 
6
  from typing import Optional
7
  import logging
8
  import time
9
- from fastapi.responses import StreamingResponse
10
  import asyncio
 
11
 
12
  # 设置缓存目录
13
  os.environ['XDG_CACHE_HOME'] = '/app/.cache'
@@ -180,7 +181,7 @@ def decode_audio(audio_base64: str) -> str:
180
 
181
  @app.post("/transcribe")
182
  async def transcribe_audio(request: AudioRequest):
183
- """音频转录API,异步调用 whisper.cpp 并流式返回分段结果"""
184
  try:
185
  logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
186
 
@@ -191,7 +192,7 @@ async def transcribe_audio(request: AudioRequest):
191
  model_path = load_model(request.model)
192
  logger.info(f"使用模型: {model_path}")
193
 
194
- # 检查whisper.cpp二进制路径,优先使用新的whisper-cli
195
  whisper_binary = "/app/build/bin/whisper-cli"
196
  logger.info(f"使用whisper二进制: {whisper_binary}")
197
 
@@ -201,92 +202,105 @@ async def transcribe_audio(request: AudioRequest):
201
  logger.info(f"音频格式不直接支持,将转换为WAV: {audio_file}")
202
  audio_file = await convert_audio_to_wav(audio_file)
203
 
204
- # 新的whisper-cli命令格式 - 不输出到文件,直接输出到stdout
 
 
 
 
205
  cmd = [
206
  whisper_binary,
207
  "-m", model_path,
208
  "-f", audio_file,
209
  "-l", request.language or "zh",
210
  "-oj", # --output-json: 输出JSON格式
211
- "-pp", # --print-progress: 显示进度
212
  "-t", str(os.cpu_count() or 1),
213
  ]
214
 
215
- # 记录完整命令
216
- # /app/build/bin/whisper-cli -m /app/models/ggml-tiny.bin -f /tmp/tmpx04yuy50_converted.wav -l zh -oj -pp -t 16
217
- logger.info(f"完整命令: {' '.join(cmd)}")
218
-
219
  # 添加可选参数
220
  if request.beam_size:
221
  cmd += ["-bs", str(request.beam_size)] # --beam-size 的简写
222
  if request.temperature:
223
  cmd += ["-tp", str(request.temperature)] # --temperature 的简写
224
 
225
- async def event_stream():
226
-
 
 
 
 
227
  proc = await asyncio.create_subprocess_exec(
228
  *cmd,
229
  stdout=asyncio.subprocess.PIPE,
230
  stderr=asyncio.subprocess.STDOUT,
231
  )
232
- try:
233
-
234
- output_buffer = []
235
- async for line in proc.stdout:
236
- decoded_line = line.decode().strip()
237
- if decoded_line: # 只处理非空行
238
- logger.info(f"whisper输出: {decoded_line}")
239
- output_buffer.append(decoded_line)
240
-
241
- # 如果是JSON格式,作为SSE事件发送
242
- if decoded_line.startswith("{") and decoded_line.endswith("}"):
243
- yield f"data: {decoded_line}\n\n"
244
- # 如果包含进度信息,发送进度更新
245
- elif any(keyword in decoded_line.lower() for keyword in ["progress", "%", "processing"]):
246
- yield f"data: {{\"progress\": \"{decoded_line}\"}}\n\n"
247
- # 发送所有输出作为调试信息
248
- else:
249
- yield f"data: {{\"debug\": \"{decoded_line}\"}}\n\n"
250
-
251
- # 等待进程完成
252
- exit_code = await proc.wait()
253
- logger.info(f"命令执行完成,退出码: {exit_code}")
254
-
255
- # 如果没有JSON输出但有其他输出,尝试读取输出文件
256
- if output_buffer and not any(line.startswith("{") for line in output_buffer):
257
- # 尝试读取whisper-cli生成的JSON文件
258
- json_output_file = audio_file + ".json"
259
- if os.path.exists(json_output_file):
260
- try:
261
- with open(json_output_file, 'r', encoding='utf-8') as f:
262
- json_content = f.read()
263
- yield f"data: {json_content}\n\n"
264
- # 删除输出文件
265
- os.unlink(json_output_file)
266
- except Exception as e:
267
- logger.error(f"读取JSON输出文件失败: {e}")
268
-
269
- # 将所有输出作为文本结果返回
270
- text_result = "\n".join(output_buffer)
271
- yield f"data: {{\"text\": \"{text_result}\", \"type\": \"text_output\"}}\n\n"
272
-
273
- # 发送结束信号
274
- yield f"data: {{\"status\": \"completed\", \"exit_code\": {exit_code}, \"total_lines\": {len(output_buffer)}}}\n\n"
275
-
276
- except Exception as e:
277
- logger.error(f"处理过程中出错: {e}")
278
- yield f"data: {{\"error\": \"{str(e)}\", \"status\": \"error\"}}\n\n"
279
- finally:
280
- # 清临时文件
281
- if os.path.exists(audio_file):
282
- os.unlink(audio_file)
283
- # 如果有转换后的文件,也要清理
284
- if audio_file.endswith('_converted.wav'):
285
- original_file = audio_file.replace('_converted.wav', '.m4a')
286
- if os.path.exists(original_file):
287
- os.unlink(original_file)
288
-
289
- return StreamingResponse(event_stream(), media_type="text/event-stream")
 
 
 
 
 
 
 
 
 
290
  except Exception as e:
291
  logger.error(f"转录失败: {e}")
292
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
 
3
  import base64
4
  import tempfile
5
  import os
6
+ import json
7
  from typing import Optional
8
  import logging
9
  import time
 
10
  import asyncio
11
+ import shutil
12
 
13
  # 设置缓存目录
14
  os.environ['XDG_CACHE_HOME'] = '/app/.cache'
 
181
 
182
  @app.post("/transcribe")
183
  async def transcribe_audio(request: AudioRequest):
184
+ """音频转录API,异步调用 whisper.cpp 并返回转录结果"""
185
  try:
186
  logger.info(f"收到转录请求: 模型={request.model}, 语言={request.language}")
187
 
 
192
  model_path = load_model(request.model)
193
  logger.info(f"使用模型: {model_path}")
194
 
195
+ # 检查whisper.cpp二进制路径
196
  whisper_binary = "/app/build/bin/whisper-cli"
197
  logger.info(f"使用whisper二进制: {whisper_binary}")
198
 
 
202
  logger.info(f"音频格式不直接支持,将转换为WAV: {audio_file}")
203
  audio_file = await convert_audio_to_wav(audio_file)
204
 
205
+ # 创建临时目录用于输出
206
+ temp_dir = tempfile.mkdtemp()
207
+ output_file = os.path.join(temp_dir, "output")
208
+
209
+ # 构建命令
210
  cmd = [
211
  whisper_binary,
212
  "-m", model_path,
213
  "-f", audio_file,
214
  "-l", request.language or "zh",
215
  "-oj", # --output-json: 输出JSON格式
216
+ "-of", output_file, # 指定输出文件
217
  "-t", str(os.cpu_count() or 1),
218
  ]
219
 
 
 
 
 
220
  # 添加可选参数
221
  if request.beam_size:
222
  cmd += ["-bs", str(request.beam_size)] # --beam-size 的简写
223
  if request.temperature:
224
  cmd += ["-tp", str(request.temperature)] # --temperature 的简写
225
 
226
+ logger.info(f"完整命令: {' '.join(cmd)}")
227
+
228
+ try:
229
+ # 执行命令
230
+ start_time = time.time()
231
+ logger.info(f"开始执行命令: {' '.join(cmd)}")
232
  proc = await asyncio.create_subprocess_exec(
233
  *cmd,
234
  stdout=asyncio.subprocess.PIPE,
235
  stderr=asyncio.subprocess.STDOUT,
236
  )
237
+
238
+ # 收集输出
239
+ stdout, _ = await proc.communicate()
240
+ output_text = stdout.decode() if stdout else ""
241
+
242
+ # 记录输出日志
243
+ for line in output_text.splitlines():
244
+ if line.strip():
245
+ logger.info(f"whisper输出: {line.strip()}")
246
+
247
+ # 检查退出码
248
+ exit_code = proc.returncode
249
+ processing_time = time.time() - start_time
250
+ logger.info(f"命令执行完成,退出码: {exit_code},处理时间: {processing_time:.2f}秒")
251
+
252
+ # 读取JSON输出文件
253
+ json_output_file = output_file + ".json"
254
+ result = {}
255
+
256
+ if os.path.exists(json_output_file):
257
+ try:
258
+ with open(json_output_file, 'r', encoding='utf-8') as f:
259
+ result = json.loads(f.read())
260
+ logger.info(f"成功读取JSON输出文件: {json_output_file}")
261
+ except Exception as e:
262
+ logger.error(f"读取JSON输出文件失败: {e}")
263
+ result = {"error": f"Failed to read JSON output: {str(e)}"}
264
+ else:
265
+ # 如果没有JSON输出,使用命令行输出
266
+ logger.warning(f"未找到JSON输出文件: {json_output_file}")
267
+ result = {
268
+ "text": output_text,
269
+ "status": "completed" if exit_code == 0 else "failed",
270
+ "exit_code": exit_code
271
+ }
272
+
273
+ # 添加处理信息
274
+ result["processing_info"] = {
275
+ "model": request.model,
276
+ "language": request.language,
277
+ "exit_code": exit_code,
278
+ "processing_time_seconds": round(processing_time, 2),
279
+ "command": " ".join(cmd)
280
+ }
281
+
282
+ return result
283
+
284
+ except Exception as e:
285
+ logger.error(f"处过程中出错: {e}")
286
+ raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
287
+ finally:
288
+ # 清理临时文件
289
+ if os.path.exists(audio_file):
290
+ os.unlink(audio_file)
291
+ # 如果有转换后的文件,也要清理
292
+ if audio_file.endswith('_converted.wav'):
293
+ original_file = audio_file.replace('_converted.wav', '.m4a')
294
+ if os.path.exists(original_file):
295
+ os.unlink(original_file)
296
+ # 清理输出文件
297
+ json_output_file = output_file + ".json"
298
+ if os.path.exists(json_output_file):
299
+ os.unlink(json_output_file)
300
+ # 清理临时目录
301
+ if os.path.exists(temp_dir):
302
+ import shutil
303
+ shutil.rmtree(temp_dir, ignore_errors=True)
304
  except Exception as e:
305
  logger.error(f"转录失败: {e}")
306
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
startup.sh CHANGED
@@ -26,4 +26,4 @@ fi
26
 
27
  # 直接启动应用,不需要安装额外依赖
28
  echo "Starting FastAPI application..."
29
- python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info || python3 -m uvicorn simple_app:app --host 0.0.0.0 --port 7860 --log-level info
 
26
 
27
  # 直接启动应用,不需要安装额外依赖
28
  echo "Starting FastAPI application..."
29
+ python3 -m uvicorn fixed_app:app --host 0.0.0.0 --port 7860 --log-level info