Spaces:

bichnhan2701
/

PhoWhisperBaseAPI

Sleeping

bichnhan2701 commited on 28 days ago

Commit

d6ff044

1 Parent(s): 580c470

update

Files changed (1) hide show

app/core/asr_engine.py CHANGED Viewed

@@ -185,6 +185,18 @@ def transcribe_file_chunks(
     """
     if not wav_path:
         return []
     out = model(
         wav_path,
         chunk_length_s=chunk_length_s,
@@ -194,7 +206,7 @@ def transcribe_file_chunks(
     # Pipeline output can vary across transformers versions/models:
     # - some return `chunks` (with `timestamp` list),
-    # - others return `segments` (with `start`/`end`),
     # so be permissive and handle both shapes.
     raw_segments = out.get("chunks") or out.get("segments") or []
@@ -224,17 +236,4 @@ def transcribe_file_chunks(
             # be robust against unexpected types
             continue
-    # If no timestamped chunks found and file is long, try chunked inference
-    if not chunks:
-        info = get_audio_info(wav_path) or {}
-        duration = info.get("duration", 0)
-        if duration and duration > chunk_length_s:
-            try:
-                _, combined = transcribe_long_audio(
-                    model, wav_path, chunk_length_s=chunk_length_s, overlap_s=stride_s
-                )
-                return combined
-            except Exception:
-                logger.exception("transcribe_long_audio fallback failed for %s", wav_path)
     return chunks

     """
     if not wav_path:
         return []
+    # For long audio prefer explicit chunked inference (split + per-chunk inference)
+    info = get_audio_info(wav_path) or {}
+    duration = info.get("duration", 0)
+    if duration and duration > chunk_length_s:
+        try:
+            _, combined = transcribe_long_audio(
+                model, wav_path, chunk_length_s=chunk_length_s, overlap_s=stride_s
+            )
+            return combined
+        except Exception:
+            logger.exception("transcribe_long_audio failed in transcribe_file_chunks, falling back to pipeline")
     out = model(
         wav_path,
         chunk_length_s=chunk_length_s,
     # Pipeline output can vary across transformers versions/models:
     # - some return `chunks` (with `timestamp` list),
+    # - others return `segments` (with `start`/end),
     # so be permissive and handle both shapes.
     raw_segments = out.get("chunks") or out.get("segments") or []
             # be robust against unexpected types
             continue
     return chunks