Spaces:
Sleeping
Sleeping
Commit
·
d6ff044
1
Parent(s):
580c470
update
Browse files- app/core/asr_engine.py +13 -14
app/core/asr_engine.py
CHANGED
|
@@ -185,6 +185,18 @@ def transcribe_file_chunks(
|
|
| 185 |
"""
|
| 186 |
if not wav_path:
|
| 187 |
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
out = model(
|
| 189 |
wav_path,
|
| 190 |
chunk_length_s=chunk_length_s,
|
|
@@ -194,7 +206,7 @@ def transcribe_file_chunks(
|
|
| 194 |
|
| 195 |
# Pipeline output can vary across transformers versions/models:
|
| 196 |
# - some return `chunks` (with `timestamp` list),
|
| 197 |
-
# - others return `segments` (with `start
|
| 198 |
# so be permissive and handle both shapes.
|
| 199 |
raw_segments = out.get("chunks") or out.get("segments") or []
|
| 200 |
|
|
@@ -224,17 +236,4 @@ def transcribe_file_chunks(
|
|
| 224 |
# be robust against unexpected types
|
| 225 |
continue
|
| 226 |
|
| 227 |
-
# If no timestamped chunks found and file is long, try chunked inference
|
| 228 |
-
if not chunks:
|
| 229 |
-
info = get_audio_info(wav_path) or {}
|
| 230 |
-
duration = info.get("duration", 0)
|
| 231 |
-
if duration and duration > chunk_length_s:
|
| 232 |
-
try:
|
| 233 |
-
_, combined = transcribe_long_audio(
|
| 234 |
-
model, wav_path, chunk_length_s=chunk_length_s, overlap_s=stride_s
|
| 235 |
-
)
|
| 236 |
-
return combined
|
| 237 |
-
except Exception:
|
| 238 |
-
logger.exception("transcribe_long_audio fallback failed for %s", wav_path)
|
| 239 |
-
|
| 240 |
return chunks
|
|
|
|
| 185 |
"""
|
| 186 |
if not wav_path:
|
| 187 |
return []
|
| 188 |
+
# For long audio prefer explicit chunked inference (split + per-chunk inference)
|
| 189 |
+
info = get_audio_info(wav_path) or {}
|
| 190 |
+
duration = info.get("duration", 0)
|
| 191 |
+
if duration and duration > chunk_length_s:
|
| 192 |
+
try:
|
| 193 |
+
_, combined = transcribe_long_audio(
|
| 194 |
+
model, wav_path, chunk_length_s=chunk_length_s, overlap_s=stride_s
|
| 195 |
+
)
|
| 196 |
+
return combined
|
| 197 |
+
except Exception:
|
| 198 |
+
logger.exception("transcribe_long_audio failed in transcribe_file_chunks, falling back to pipeline")
|
| 199 |
+
|
| 200 |
out = model(
|
| 201 |
wav_path,
|
| 202 |
chunk_length_s=chunk_length_s,
|
|
|
|
| 206 |
|
| 207 |
# Pipeline output can vary across transformers versions/models:
|
| 208 |
# - some return `chunks` (with `timestamp` list),
|
| 209 |
+
# - others return `segments` (with `start`/end),
|
| 210 |
# so be permissive and handle both shapes.
|
| 211 |
raw_segments = out.get("chunks") or out.get("segments") or []
|
| 212 |
|
|
|
|
| 236 |
# be robust against unexpected types
|
| 237 |
continue
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
return chunks
|