ozipoetra commited on
Commit
ebbd8b5
·
verified ·
1 Parent(s): b67212a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -7,6 +7,7 @@ import sys
7
  import logging
8
  import re
9
  import subprocess
 
10
  from contextlib import asynccontextmanager
11
  from datetime import datetime
12
  from fastapi import FastAPI, HTTPException, Depends, Security
@@ -39,7 +40,7 @@ MODEL = stable_whisper.load_faster_whisper(
39
  'kotoba-tech/kotoba-whisper-bilingual-v1.0-faster',
40
  device='cpu',
41
  compute_type='int8',
42
- cpu_threads=2,
43
  num_workers=1,
44
  )
45
  logger.info("Model loaded successfully.")
@@ -48,6 +49,11 @@ logger.info("Model loaded successfully.")
48
  queue: asyncio.Queue = asyncio.Queue()
49
  jobs: dict = {}
50
 
 
 
 
 
 
51
  # --- 5. Lifespan (replaces deprecated @app.on_event) ---
52
  @asynccontextmanager
53
  async def lifespan(app: FastAPI):
@@ -308,6 +314,16 @@ async def process_job(job_id: str, video_url: str):
308
  # We get English output directly — no Google Translate needed for EN.
309
  jobs[job_id].update({"status": "AI Transcribing & Translating JA→EN...", "progress": 40})
310
 
 
 
 
 
 
 
 
 
 
 
311
  def process_ai():
312
  result = MODEL.transcribe(
313
  v_path,
@@ -318,12 +334,13 @@ async def process_job(job_id: str, video_url: str):
318
  # --- stable_whisper faster-whisper wrapper params ---
319
  vad=True, # Silero VAD: skip silence/music beds
320
  vad_threshold=0.35,
 
321
  # --- passed through to faster_whisper.WhisperModel.transcribe ---
322
  beam_size=2, # balanced speed/accuracy on CPU
323
  temperature=0, # deterministic decode
324
  condition_on_previous_text=False, # prevents hallucination snowballing
325
  no_speech_threshold=0.3, # catch quiet disfluencies (moans, sighs)
326
- log_prob_threshold=-2.0, # keep low-confidence disfluency tokens
327
  )
328
 
329
  # --- Anime-optimised regrouping ---
@@ -347,7 +364,8 @@ async def process_job(job_id: str, video_url: str):
347
 
348
  result.to_srt_vtt(en_vtt, segment_level=True, word_level=False)
349
 
350
- await asyncio.to_thread(process_ai)
 
351
 
352
  if not os.path.exists(en_vtt):
353
  raise RuntimeError("Transcription finished but EN VTT was not created")
 
7
  import logging
8
  import re
9
  import subprocess
10
+ from concurrent.futures import ThreadPoolExecutor
11
  from contextlib import asynccontextmanager
12
  from datetime import datetime
13
  from fastapi import FastAPI, HTTPException, Depends, Security
 
40
  'kotoba-tech/kotoba-whisper-bilingual-v1.0-faster',
41
  device='cpu',
42
  compute_type='int8',
43
+ cpu_threads=4,
44
  num_workers=1,
45
  )
46
  logger.info("Model loaded successfully.")
 
49
  queue: asyncio.Queue = asyncio.Queue()
50
  jobs: dict = {}
51
 
52
+ # Dedicated single-thread executor for Whisper.
53
+ # asyncio's default ThreadPoolExecutor can be starved by concurrent requests;
54
+ # a pinned executor guarantees Whisper always has its own OS thread.
55
+ WHISPER_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix='whisper')
56
+
57
  # --- 5. Lifespan (replaces deprecated @app.on_event) ---
58
  @asynccontextmanager
59
  async def lifespan(app: FastAPI):
 
314
  # We get English output directly — no Google Translate needed for EN.
315
  jobs[job_id].update({"status": "AI Transcribing & Translating JA→EN...", "progress": 40})
316
 
317
+ def _whisper_progress(transcribed: float, total: float):
318
+ # Update job progress between 40-58% while Whisper is running
319
+ if total > 0:
320
+ pct = 40 + int((transcribed / total) * 18)
321
+ jobs[job_id]["progress"] = min(pct, 58)
322
+ jobs[job_id]["status"] = (
323
+ f"Transcribing & Translating "
324
+ f"({int(transcribed)}s / {int(total)}s)..."
325
+ )
326
+
327
  def process_ai():
328
  result = MODEL.transcribe(
329
  v_path,
 
334
  # --- stable_whisper faster-whisper wrapper params ---
335
  vad=True, # Silero VAD: skip silence/music beds
336
  vad_threshold=0.35,
337
+ progress_callback=_whisper_progress, # live progress updates
338
  # --- passed through to faster_whisper.WhisperModel.transcribe ---
339
  beam_size=2, # balanced speed/accuracy on CPU
340
  temperature=0, # deterministic decode
341
  condition_on_previous_text=False, # prevents hallucination snowballing
342
  no_speech_threshold=0.3, # catch quiet disfluencies (moans, sighs)
343
+ log_prob_threshold=-2.0, # keep low-confidence disfluency tokens
344
  )
345
 
346
  # --- Anime-optimised regrouping ---
 
364
 
365
  result.to_srt_vtt(en_vtt, segment_level=True, word_level=False)
366
 
367
+ loop = asyncio.get_event_loop()
368
+ await loop.run_in_executor(WHISPER_EXECUTOR, process_ai)
369
 
370
  if not os.path.exists(en_vtt):
371
  raise RuntimeError("Transcription finished but EN VTT was not created")