Spaces:

bichnhan2701
/

PhoWhisperBaseAPI

Running

App Files Files Community

bichnhan2701 commited on 22 days ago

Commit

ca4e471

1 Parent(s): be6b69c

Update transcribe

Browse files

Files changed (2) hide show

app/api/transcribe.py +73 -44
app/jobs/transcribe_job.py +7 -2

app/api/transcribe.py CHANGED Viewed

@@ -7,7 +7,6 @@ from pathlib import Path
 from fastapi import APIRouter, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse
 from rq import Queue
 from app.config import settings
@@ -25,6 +24,7 @@ from app.core.audio_utils import (
     get_audio_info,
     upload_temp_audio,
 )
 from app.core.asr_engine import (
     load_model,
     transcribe_file,
@@ -35,6 +35,7 @@ router = APIRouter()
 ASR_MODEL = None
 ASYNC_THRESHOLD = 120  # seconds
 # ============================================================
 # Startup: load ASR model once
@@ -57,45 +58,60 @@ def _ensure_file_limits(path: str):
         raise HTTPException(413, "Audio duration exceeds limit")
-async def _run_sync_pipeline(
-    tmp_wav: str,
-    note_id: str,
-):
     """
-    Run sync ASR + persist to Note Service
     """
     note_service = NoteServiceClient()
     info = get_audio_info(tmp_wav) or {}
-    model = ASR_MODEL
     with ASR_DURATION.labels("/transcribe").time():
         text = await asyncio.to_thread(
-            transcribe_file, model, tmp_wav, 30.0, 5.0
         )
         chunks = await asyncio.to_thread(
-            transcribe_file_chunks, model, tmp_wav, 30.0, 5.0
         )
-    chunks = [c for c in chunks if c.get("text", "").strip()]
     status = "transcribed" if chunks else "error"
-    payload = {
-        "note_id": note_id,
-        "type": "audio",
-        "status": status,
-        "raw_text": text,
-        "metadata": {
-            "audio": {
-                "duration": info.get("duration"),
-                "sample_rate": info.get("samplerate"),
-                "chunks": chunks,
-                "asr_model": "PhoWhisper-base",
-            }
         },
-        "generate": ["normalize", "keywords", "summary", "mindmap"],
-    }
-    await note_service.create_audio_note(payload)
     return {
         "note_id": note_id,
@@ -104,16 +120,28 @@ async def _run_sync_pipeline(
     }
-def _enqueue_async_job(audio_url: str, note_id: str, user_id: str | None = None):
-    q = Queue("asr", connection=redis_client)
-    job = q.enqueue(
-        transcribe_job,
-        audio_url,
-        note_id,
-        user_id,
-        job_timeout=1800,
     )
-    return job
 # ============================================================
@@ -122,7 +150,6 @@ def _enqueue_async_job(audio_url: str, note_id: str, user_id: str | None = None)
 @router.post("/transcribe", response_model=TranscribeResponse)
 async def transcribe(file: UploadFile = File(...)):
     endpoint = "/transcribe"
-    start = time.perf_counter()
     note_id = str(uuid.uuid4())
     tmp_in = make_temp_path(suffix=Path(file.filename).suffix or ".tmp")
@@ -130,11 +157,11 @@ async def transcribe(file: UploadFile = File(...)):
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
-            # 1. Save upload
             await asyncio.to_thread(save_upload_file, file, tmp_in)
             _ensure_file_limits(tmp_in)
-            # 2. Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
@@ -144,6 +171,8 @@ async def transcribe(file: UploadFile = File(...)):
             # ---------- ASYNC ----------
             if duration > ASYNC_THRESHOLD:
                 audio_url = await asyncio.to_thread(upload_temp_audio, tmp_wav)
                 job = _enqueue_async_job(audio_url, note_id)
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
@@ -158,6 +187,7 @@ async def transcribe(file: UploadFile = File(...)):
                 )
             # ---------- SYNC ----------
             result = await _run_sync_pipeline(tmp_wav, note_id)
             REQUEST_COUNT.labels(endpoint, "success").inc()
@@ -170,12 +200,11 @@ async def transcribe(file: UploadFile = File(...)):
 # ============================================================
-# POST /transcribe-url (FULL LOGIC, same as /transcribe)
 # ============================================================
 @router.post("/transcribe-url", response_model=TranscribeResponse)
 async def transcribe_url(payload: dict):
     endpoint = "/transcribe-url"
-    start = time.perf_counter()
     audio_url = payload.get("audio_url")
     user_id = payload.get("user_id")
@@ -184,17 +213,16 @@ async def transcribe_url(payload: dict):
         raise HTTPException(400, "audio_url required")
     note_id = str(uuid.uuid4())
     tmp_in = make_temp_path(suffix=Path(audio_url).suffix or ".tmp")
     tmp_wav = None
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
-            # 1. Download audio
             await asyncio.to_thread(download_file_from_url, audio_url, tmp_in)
             _ensure_file_limits(tmp_in)
-            # 2. Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
@@ -203,7 +231,7 @@ async def transcribe_url(payload: dict):
             # ---------- ASYNC ----------
             if duration > ASYNC_THRESHOLD:
-                # use ORIGINAL url for async job
                 job = _enqueue_async_job(audio_url, note_id, user_id)
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
@@ -218,6 +246,7 @@ async def transcribe_url(payload: dict):
                 )
             # ---------- SYNC ----------
             result = await _run_sync_pipeline(tmp_wav, note_id)
             REQUEST_COUNT.labels(endpoint, "success").inc()

 from fastapi import APIRouter, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse
 from rq import Queue
 from app.config import settings
     get_audio_info,
     upload_temp_audio,
 )
 from app.core.asr_engine import (
     load_model,
     transcribe_file,
 ASR_MODEL = None
 ASYNC_THRESHOLD = 120  # seconds
+logger = logging.getLogger(__name__)
 # ============================================================
 # Startup: load ASR model once
         raise HTTPException(413, "Audio duration exceeds limit")
+def _enqueue_async_job(audio_url: str, note_id: str, user_id: str | None = None):
+    q = Queue("asr", connection=redis_client)
+    return q.enqueue(
+        transcribe_job,
+        audio_url,
+        note_id,
+        user_id,
+        job_timeout=1800,
+    )
+async def _run_sync_pipeline(tmp_wav: str, note_id: str):
     """
+    Sync ASR → update existing note
     """
     note_service = NoteServiceClient()
     info = get_audio_info(tmp_wav) or {}
     with ASR_DURATION.labels("/transcribe").time():
         text = await asyncio.to_thread(
+            transcribe_file, ASR_MODEL, tmp_wav, 30.0, 5.0
         )
         chunks = await asyncio.to_thread(
+            transcribe_file_chunks, ASR_MODEL, tmp_wav, 30.0, 5.0
         )
+    chunks = [
+        {
+            "text": c["text"],
+            "start": c.get("start"),
+            "end": c.get("end"),
+        }
+        for c in chunks
+        if c.get("text", "").strip()
+    ]
     status = "transcribed" if chunks else "error"
+    # 🔥 UPDATE — KHÔNG CREATE
+    await note_service.update_note(
+        note_id,
+        {
+            "status": status,
+            "raw_text": text,
+            "metadata": {
+                "audio": {
+                    "duration": info.get("duration"),
+                    "sample_rate": info.get("samplerate"),
+                    "chunks": chunks,
+                    "asr_model": "PhoWhisper-base",
+                }
+            },
         },
+    )
     return {
         "note_id": note_id,
     }
+async def _create_placeholder_note(note_id: str, duration: float):
+    """
+    Tạo note NGAY LẬP TỨC để:
+    - SSE không trả not_found
+    - enrich có object để update
+    """
+    await NoteServiceClient().create_audio_note(
+        {
+            "note_id": note_id,
+            "type": "audio",
+            "status": "processing",
+            "raw_text": "",
+            "metadata": {
+                "audio": {
+                    "duration": duration,
+                    "chunks": [],
+                    "asr_model": "PhoWhisper-base",
+                }
+            },
+            "generate": ["normalize", "keywords", "summary", "mindmap"],
+        }
     )
 # ============================================================
 @router.post("/transcribe", response_model=TranscribeResponse)
 async def transcribe(file: UploadFile = File(...)):
     endpoint = "/transcribe"
     note_id = str(uuid.uuid4())
     tmp_in = make_temp_path(suffix=Path(file.filename).suffix or ".tmp")
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
+            # 1️⃣ Save upload
             await asyncio.to_thread(save_upload_file, file, tmp_in)
             _ensure_file_limits(tmp_in)
+            # 2️⃣ Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
             # ---------- ASYNC ----------
             if duration > ASYNC_THRESHOLD:
                 audio_url = await asyncio.to_thread(upload_temp_audio, tmp_wav)
+                await _create_placeholder_note(note_id, duration)
                 job = _enqueue_async_job(audio_url, note_id)
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
                 )
             # ---------- SYNC ----------
+            await _create_placeholder_note(note_id, duration)
             result = await _run_sync_pipeline(tmp_wav, note_id)
             REQUEST_COUNT.labels(endpoint, "success").inc()
 # ============================================================
+# POST /transcribe-url (FULL LOGIC)
 # ============================================================
 @router.post("/transcribe-url", response_model=TranscribeResponse)
 async def transcribe_url(payload: dict):
     endpoint = "/transcribe-url"
     audio_url = payload.get("audio_url")
     user_id = payload.get("user_id")
         raise HTTPException(400, "audio_url required")
     note_id = str(uuid.uuid4())
     tmp_in = make_temp_path(suffix=Path(audio_url).suffix or ".tmp")
     tmp_wav = None
     with REQUEST_LATENCY.labels(endpoint).time():
         try:
+            # 1️⃣ Download
             await asyncio.to_thread(download_file_from_url, audio_url, tmp_in)
             _ensure_file_limits(tmp_in)
+            # 2️⃣ Convert
             tmp_wav = make_temp_path(suffix=".wav")
             await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
             # ---------- ASYNC ----------
             if duration > ASYNC_THRESHOLD:
+                await _create_placeholder_note(note_id, duration)
                 job = _enqueue_async_job(audio_url, note_id, user_id)
                 REQUEST_COUNT.labels(endpoint, "queued").inc()
                 )
             # ---------- SYNC ----------
+            await _create_placeholder_note(note_id, duration)
             result = await _run_sync_pipeline(tmp_wav, note_id)
             REQUEST_COUNT.labels(endpoint, "success").inc()

app/jobs/transcribe_job.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import tempfile
 import os
 import requests
 from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
@@ -62,8 +63,12 @@ def transcribe_job(audio_url: str, note_id: str, user_id: str | None = None):
         }
         client = NoteServiceClient()
-        asyncio.run(client.create_audio_note(payload))
     finally:
         # 3️⃣ Cleanup
         if wav_path and os.path.exists(wav_path):

 import asyncio
 import tempfile
 import os
+from xmlrpc import client
 import requests
 from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
         }
         client = NoteServiceClient()
+        asyncio.run(client.update_note(note_id, {
+            "status": note_status,
+            "raw_text": text,
+            "metadata": payload["metadata"],
+        }))
     finally:
         # 3️⃣ Cleanup
         if wav_path and os.path.exists(wav_path):