Spaces:

bichnhan2701
/

PhoWhisperBaseAPI

Sleeping

App Files Files Community

bichnhan2701 commited on Dec 20, 2025

Commit

c816b75

1 Parent(s): 7be097f

Update api transcribe

Browse files

Files changed (1) hide show

app/api/transcribe.py +157 -217

app/api/transcribe.py CHANGED Viewed

@@ -1,289 +1,229 @@
 import os
-import logging
 import uuid
 import asyncio
-from fastapi import APIRouter, UploadFile, File, HTTPException, status
-from fastapi.responses import JSONResponse
 from pathlib import Path
-from typing import Optional
-import time
-from app.core.audio_utils import (
-    save_upload_file,
-    get_audio_info,
-    ensure_wav_16k_mono,
-    make_temp_path,
-    download_file_from_url
-)
-from app.core.asr_engine import (
-    load_model,
-    transcribe_file,
-    transcribe_file_chunks
-)
-from app.config import settings
-from app.services.note_client import NoteServiceClient
 from rq import Queue
 from app.infra.redis_client import redis_client
-from app.jobs.transcribe_job import transcribe_job
 from app.schemas.transcribe import TranscribeResponse
-from app.infra.metrics import (
-    REQUEST_COUNT,
-    REQUEST_LATENCY,
-    ASR_DURATION,
 )
 router = APIRouter()
 ASR_MODEL = None
 @router.on_event("startup")
-async def _startup():
     global ASR_MODEL
-    # load model in thread to avoid blocking event loop
     ASR_MODEL = await asyncio.to_thread(load_model, 30)
 def _ensure_file_limits(path: str):
     if os.path.getsize(path) > settings.MAX_UPLOAD_BYTES:
-        raise HTTPException(
-            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
-            detail="File size exceeds limit",
-        )
     info = get_audio_info(path)
     if info and info.get("duration", 0) > settings.MAX_DURATION_SECS:
-        raise HTTPException(
-            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
-            detail="Audio duration exceeds limit",
         )
 @router.post("/transcribe", response_model=TranscribeResponse)
 async def transcribe(file: UploadFile = File(...)):
-    tmp_in = make_temp_path(suffix=Path(file.filename).suffix or ".wav")
-    tmp_wav = None
-    note_service = NoteServiceClient()
     note_id = str(uuid.uuid4())
-    start_time = time.perf_counter()
-    endpoint = "/transcribe"
-    status_label = "success"
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
-            # write upload to tmp (blocking) -> run in thread
             await asyncio.to_thread(save_upload_file, file, tmp_in)
             _ensure_file_limits(tmp_in)
             tmp_wav = make_temp_path(suffix=".wav")
-            # ffmpeg convert is blocking -> run in thread
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
-            # Kiểm tra duration để quyết định xử lý sync hay async
             info = get_audio_info(tmp_wav) or {}
-            duration_sec = info.get("duration", 0)
-            ASYNC_THRESHOLD = 120  # 2 phút, có thể chỉnh
-            # ---------- ASYNC JOB ----------
-            if duration_sec > ASYNC_THRESHOLD:
-                # Enqueue background job bằng RQ
-                q = Queue("asr", connection=redis_client)
-                job = q.enqueue(
-                    transcribe_job,
-                    tmp_wav,
-                    note_id,
-                    job_timeout=1800
-                )
-                logging.info(f"Enqueued background transcribe job: note_id={note_id} job_id={job.id} duration={duration_sec:.1f}s")
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
-                return JSONResponse(status_code=202, content={
-                    "note_id": note_id,
-                    "job_id": job.id,
-                    "status": "queued",
-                    "duration": duration_sec
-                })
-            # ---------- SYNC PIPELINE ----------
-            # Nếu audio ngắn, xử lý sync như cũ
-            model = ASR_MODEL or await asyncio.to_thread(load_model, 30)
-            with ASR_DURATION.labels(endpoint).time():
-                text = await asyncio.to_thread(transcribe_file, model, tmp_wav, 30.0, 5.0)
-                chunks = await asyncio.to_thread(transcribe_file_chunks, model, tmp_wav, 30.0, 5.0)
-            # 🔥 DROP invalid chunks
-            chunks = [
-                c for c in chunks
-                if c.get("text", "").strip() and c.get("end", 0) > c.get("start", 0)
-            ]
-            note_status = "transcribed" if chunks and any(c.get("text", "").strip() for c in chunks) else "error"
-            info2 = get_audio_info(tmp_wav) or {}
-            # persist to Note Service (async HTTP)
-            payload = {
-                "note_id": note_id,
-                "type": "audio",
-                "status": note_status,
-                "raw_text": text,
-                "metadata": {
-                    "audio": {
-                    "duration": info2.get("duration"),
-                    "sample_rate": info2.get("samplerate"),
-                    "chunks": chunks,
-                    "asr_model": "PhoWhisper-base"
-                    }
-                },
-                "generate": ["normalize", "keywords", "summary", "mindmap"]
-            }
-            logging.info(
-                "Create audio note note_id=%s status=%s chunks=%d text_len=%d",
-                note_id,
-                note_status,
-                len(chunks) if chunks else 0,
-                len(text or ""),
-            )
-            await note_service.create_audio_note(payload)
-            duration = time.perf_counter() - start_time
-            logging.info(f"/transcribe success note_id={note_id} duration={duration:.2f}s audio_dur={info2.get('duration')}")
-            REQUEST_COUNT.labels(endpoint, status_label).inc()
-            return JSONResponse(
-                status_code=200,
-                content={
-                    "note_id": note_id,
-                    "status": note_status,
-                    "duration": info2.get("duration"),
-                },
-            )
         finally:
-            # cleanup
-            for p in [tmp_in, tmp_wav]:
-                try:
-                    if p and os.path.exists(p):
-                        os.remove(p)
-                except Exception:
-                    pass
 @router.post("/transcribe-url", response_model=TranscribeResponse)
 async def transcribe_url(payload: dict):
     audio_url = payload.get("audio_url")
     user_id = payload.get("user_id")
     if not audio_url:
-        raise HTTPException(status_code=400, detail="audio_url required")
-    if not user_id:
-        raise HTTPException(status_code=400, detail="user_id required")
-    tmp_in = make_temp_path(suffix=Path(audio_url).suffix or ".tmp")
-    tmp_wav = None
     note_id = str(uuid.uuid4())
-    note_service = NoteServiceClient()
-    endpoint = "/transcribe-url"
-    start_time = time.perf_counter()
-    status_label = "success"
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
-            # 1. Download from Cloudinary (blocking)
             await asyncio.to_thread(download_file_from_url, audio_url, tmp_in)
-            # 2. File & duration limits
             _ensure_file_limits(tmp_in)
-            # 3. Convert to wav 16k mono
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
-            # 4. Check duration for sync / async
             info = get_audio_info(tmp_wav) or {}
-            duration_sec = info.get("duration", 0)
-            ASYNC_THRESHOLD = 120  # seconds
-            # ---------- ASYNC JOB ----------
-            if duration_sec > ASYNC_THRESHOLD:
-                q = Queue("asr", connection=redis_client)
-                job = q.enqueue(
-                    transcribe_job,
-                    tmp_wav,
-                    note_id,
-                    job_timeout=1800,
-                )
-                logging.info(
-                    f"/transcribe-url queued note_id={note_id} "
-                    f"job_id={job.id} duration={duration_sec:.1f}s"
-                )
-                REQUEST_COUNT.labels(endpoint, "queued").inc()
                 return JSONResponse(
                     status_code=202,
                     content={
                         "note_id": note_id,
                         "job_id": job.id,
                         "status": "queued",
-                        "duration": duration_sec,
                     },
                 )
-            # ---------- SYNC PIPELINE ----------
-            model = ASR_MODEL or await asyncio.to_thread(load_model, 30)
-            with ASR_DURATION.labels(endpoint).time():
-                text = await asyncio.to_thread(
-                    transcribe_file, model, tmp_wav, 30.0, 5.0
-                )
-                chunks = await asyncio.to_thread(
-                    transcribe_file_chunks, model, tmp_wav, 30.0, 5.0
-                )
-            # 🔥 DROP invalid chunks
-            chunks = [
-                c for c in chunks
-                if c.get("text", "").strip() and c.get("end", 0) > c.get("start", 0)
-            ]
-            note_status = "transcribed" if chunks and any(c.get("text", "").strip() for c in chunks) else "error"
-            # 5. Persist to Note Service
-            payload = {
-                "note_id": note_id,
-                "type": "audio",
-                "status": note_status,
-                "raw_text": text,
-                "metadata": {
-                    "audio": {
-                    "duration": info.get("duration"),
-                    "sample_rate": info.get("samplerate"),
-                    "chunks": chunks,
-                    "asr_model": "PhoWhisper-base"
-                    }
-                },
-                "generate": ["normalize", "keywords", "summary", "mindmap"]
-            }
-            logging.info(
-                "Create audio note note_id=%s status=%s chunks=%d text_len=%d",
-                note_id,
-                note_status,
-                len(chunks) if chunks else 0,
-                len(text or ""),
-            )
-            await note_service.create_audio_note(payload)
-            duration = time.perf_counter() - start_time
-            logging.info(
-                f"/transcribe-url success note_id={note_id} "
-                f"duration={duration:.2f}s audio_dur={info.get('duration')}"
-            )
-            REQUEST_COUNT.labels(endpoint, status_label).inc()
-            return JSONResponse(
-                status_code=200,
-                content={
-                    "note_id": note_id,
-                    "status": note_status,
-                    "duration": info.get("duration"),
-                },
-            )
         finally:
-            for p in [tmp_in, tmp_wav]:
-                try:
-                    if p and os.path.exists(p):
-                        os.remove(p)
-                except Exception:
-                    pass

 import os
 import uuid
+import time
 import asyncio
+import logging
 from pathlib import Path
+from fastapi import APIRouter, UploadFile, File, HTTPException
+from fastapi.responses import JSONResponse
 from rq import Queue
+from app.config import settings
 from app.infra.redis_client import redis_client
+from app.infra.metrics import REQUEST_COUNT, REQUEST_LATENCY, ASR_DURATION
 from app.schemas.transcribe import TranscribeResponse
+from app.services.note_client import NoteServiceClient
+from app.jobs.transcribe_job import transcribe_job
+from app.core.audio_utils import (
+    save_upload_file,
+    download_file_from_url,
+    ensure_wav_16k_mono,
+    make_temp_path,
+    get_audio_info,
+    upload_temp_audio,
+)
+from app.core.asr_engine import (
+    load_model,
+    transcribe_file,
+    transcribe_file_chunks,
 )
 router = APIRouter()
 ASR_MODEL = None
+ASYNC_THRESHOLD = 120  # seconds
+# ============================================================
+# Startup: load ASR model once
+# ============================================================
 @router.on_event("startup")
+async def startup():
     global ASR_MODEL
     ASR_MODEL = await asyncio.to_thread(load_model, 30)
+# ============================================================
+# Utils
+# ============================================================
 def _ensure_file_limits(path: str):
     if os.path.getsize(path) > settings.MAX_UPLOAD_BYTES:
+        raise HTTPException(413, "File size exceeds limit")
     info = get_audio_info(path)
     if info and info.get("duration", 0) > settings.MAX_DURATION_SECS:
+        raise HTTPException(413, "Audio duration exceeds limit")
+async def _run_sync_pipeline(
+    tmp_wav: str,
+    note_id: str,
+):
+    """
+    Run sync ASR + persist to Note Service
+    """
+    note_service = NoteServiceClient()
+    info = get_audio_info(tmp_wav) or {}
+    model = ASR_MODEL
+    with ASR_DURATION.labels("/transcribe").time():
+        text = await asyncio.to_thread(
+            transcribe_file, model, tmp_wav, 30.0, 5.0
+        )
+        chunks = await asyncio.to_thread(
+            transcribe_file_chunks, model, tmp_wav, 30.0, 5.0
         )
+    chunks = [c for c in chunks if c.get("text", "").strip()]
+    status = "transcribed" if chunks else "error"
+    payload = {
+        "note_id": note_id,
+        "type": "audio",
+        "status": status,
+        "raw_text": text,
+        "metadata": {
+            "audio": {
+                "duration": info.get("duration"),
+                "sample_rate": info.get("samplerate"),
+                "chunks": chunks,
+                "asr_model": "PhoWhisper-base",
+            }
+        },
+        "generate": ["normalize", "keywords", "summary", "mindmap"],
+    }
+    await note_service.create_audio_note(payload)
+    return {
+        "note_id": note_id,
+        "status": status,
+        "duration": info.get("duration"),
+    }
+def _enqueue_async_job(audio_url: str, note_id: str, user_id: str | None = None):
+    q = Queue("asr", connection=redis_client)
+    job = q.enqueue(
+        transcribe_job,
+        audio_url,
+        note_id,
+        user_id,
+        job_timeout=1800,
+    )
+    return job
+# ============================================================
+# POST /transcribe (UPLOAD FILE)
+# ============================================================
 @router.post("/transcribe", response_model=TranscribeResponse)
 async def transcribe(file: UploadFile = File(...)):
+    endpoint = "/transcribe"
+    start = time.perf_counter()
     note_id = str(uuid.uuid4())
+    tmp_in = make_temp_path(suffix=Path(file.filename).suffix or ".tmp")
+    tmp_wav = None
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
+            # 1. Save upload
             await asyncio.to_thread(save_upload_file, file, tmp_in)
             _ensure_file_limits(tmp_in)
+            # 2. Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
             info = get_audio_info(tmp_wav) or {}
+            duration = info.get("duration", 0)
+            # ---------- ASYNC ----------
+            if duration > ASYNC_THRESHOLD:
+                audio_url = await asyncio.to_thread(upload_temp_audio, tmp_wav)
+                job = _enqueue_async_job(audio_url, note_id)
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
+                return JSONResponse(
+                    status_code=202,
+                    content={
+                        "note_id": note_id,
+                        "job_id": job.id,
+                        "status": "queued",
+                        "duration": duration,
+                    },
+                )
+            # ---------- SYNC ----------
+            result = await _run_sync_pipeline(tmp_wav, note_id)
+            REQUEST_COUNT.labels(endpoint, "success").inc()
+            return result
         finally:
+            for p in (tmp_in, tmp_wav):
+                if p and os.path.exists(p):
+                    os.remove(p)
+# ============================================================
+# POST /transcribe-url (FULL LOGIC, same as /transcribe)
+# ============================================================
 @router.post("/transcribe-url", response_model=TranscribeResponse)
 async def transcribe_url(payload: dict):
+    endpoint = "/transcribe-url"
+    start = time.perf_counter()
     audio_url = payload.get("audio_url")
     user_id = payload.get("user_id")
     if not audio_url:
+        raise HTTPException(400, "audio_url required")
     note_id = str(uuid.uuid4())
+    tmp_in = make_temp_path(suffix=Path(audio_url).suffix or ".tmp")
+    tmp_wav = None
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
+            # 1. Download audio
             await asyncio.to_thread(download_file_from_url, audio_url, tmp_in)
             _ensure_file_limits(tmp_in)
+            # 2. Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
             info = get_audio_info(tmp_wav) or {}
+            duration = info.get("duration", 0)
+            # ---------- ASYNC ----------
+            if duration > ASYNC_THRESHOLD:
+                # use ORIGINAL url for async job
+                job = _enqueue_async_job(audio_url, note_id, user_id)
+                REQUEST_COUNT.labels(endpoint, "queued").inc()
                 return JSONResponse(
                     status_code=202,
                     content={
                         "note_id": note_id,
                         "job_id": job.id,
                         "status": "queued",
+                        "duration": duration,
                     },
                 )
+            # ---------- SYNC ----------
+            result = await _run_sync_pipeline(tmp_wav, note_id)
+            REQUEST_COUNT.labels(endpoint, "success").inc()
+            return result
         finally:
+            for p in (tmp_in, tmp_wav):
+                if p and os.path.exists(p):
+                    os.remove(p)