Spaces:

ozipoetra
/

whispering

Sleeping

App Files Files Community

ozipoetra commited on Apr 14

Commit

ebbd8b5

verified ·

1 Parent(s): b67212a

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -3

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import sys
 import logging
 import re
 import subprocess
 from contextlib import asynccontextmanager
 from datetime import datetime
 from fastapi import FastAPI, HTTPException, Depends, Security
@@ -39,7 +40,7 @@ MODEL = stable_whisper.load_faster_whisper(
     'kotoba-tech/kotoba-whisper-bilingual-v1.0-faster',
     device='cpu',
     compute_type='int8',
-    cpu_threads=2,
     num_workers=1,
 )
 logger.info("Model loaded successfully.")
@@ -48,6 +49,11 @@ logger.info("Model loaded successfully.")
 queue: asyncio.Queue = asyncio.Queue()
 jobs: dict = {}
 # --- 5. Lifespan (replaces deprecated @app.on_event) ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -308,6 +314,16 @@ async def process_job(job_id: str, video_url: str):
         # We get English output directly — no Google Translate needed for EN.
         jobs[job_id].update({"status": "AI Transcribing & Translating JA→EN...", "progress": 40})
         def process_ai():
             result = MODEL.transcribe(
                 v_path,
@@ -318,12 +334,13 @@ async def process_job(job_id: str, video_url: str):
                 # --- stable_whisper faster-whisper wrapper params ---
                 vad=True,                   # Silero VAD: skip silence/music beds
                 vad_threshold=0.35,
                 # --- passed through to faster_whisper.WhisperModel.transcribe ---
                 beam_size=2,                    # balanced speed/accuracy on CPU
                 temperature=0,                  # deterministic decode
                 condition_on_previous_text=False,  # prevents hallucination snowballing
                 no_speech_threshold=0.3,        # catch quiet disfluencies (moans, sighs)
-                log_prob_threshold=-2.0,         # keep low-confidence disfluency tokens
             )
             # --- Anime-optimised regrouping ---
@@ -347,7 +364,8 @@ async def process_job(job_id: str, video_url: str):
             result.to_srt_vtt(en_vtt, segment_level=True, word_level=False)
-        await asyncio.to_thread(process_ai)
         if not os.path.exists(en_vtt):
             raise RuntimeError("Transcription finished but EN VTT was not created")

 import logging
 import re
 import subprocess
+from concurrent.futures import ThreadPoolExecutor
 from contextlib import asynccontextmanager
 from datetime import datetime
 from fastapi import FastAPI, HTTPException, Depends, Security
     'kotoba-tech/kotoba-whisper-bilingual-v1.0-faster',
     device='cpu',
     compute_type='int8',
+    cpu_threads=4,
     num_workers=1,
 )
 logger.info("Model loaded successfully.")
 queue: asyncio.Queue = asyncio.Queue()
 jobs: dict = {}
+# Dedicated single-thread executor for Whisper.
+# asyncio's default ThreadPoolExecutor can be starved by concurrent requests;
+# a pinned executor guarantees Whisper always has its own OS thread.
+WHISPER_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix='whisper')
 # --- 5. Lifespan (replaces deprecated @app.on_event) ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
         # We get English output directly — no Google Translate needed for EN.
         jobs[job_id].update({"status": "AI Transcribing & Translating JA→EN...", "progress": 40})
+        def _whisper_progress(transcribed: float, total: float):
+            # Update job progress between 40-58% while Whisper is running
+            if total > 0:
+                pct = 40 + int((transcribed / total) * 18)
+                jobs[job_id]["progress"] = min(pct, 58)
+                jobs[job_id]["status"] = (
+                    f"Transcribing & Translating "
+                    f"({int(transcribed)}s / {int(total)}s)..."
+                )
         def process_ai():
             result = MODEL.transcribe(
                 v_path,
                 # --- stable_whisper faster-whisper wrapper params ---
                 vad=True,                   # Silero VAD: skip silence/music beds
                 vad_threshold=0.35,
+                progress_callback=_whisper_progress,  # live progress updates
                 # --- passed through to faster_whisper.WhisperModel.transcribe ---
                 beam_size=2,                    # balanced speed/accuracy on CPU
                 temperature=0,                  # deterministic decode
                 condition_on_previous_text=False,  # prevents hallucination snowballing
                 no_speech_threshold=0.3,        # catch quiet disfluencies (moans, sighs)
+                log_prob_threshold=-2.0,        # keep low-confidence disfluency tokens
             )
             # --- Anime-optimised regrouping ---
             result.to_srt_vtt(en_vtt, segment_level=True, word_level=False)
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(WHISPER_EXECUTOR, process_ai)
         if not os.path.exists(en_vtt):
             raise RuntimeError("Transcription finished but EN VTT was not created")