bichnhan2701 commited on
Commit
98ab978
·
1 Parent(s): e0ebe32
Files changed (2) hide show
  1. app/api/transcribe.py +7 -1
  2. app/core/asr_engine.py +14 -2
app/api/transcribe.py CHANGED
@@ -96,7 +96,13 @@ async def _run_sync_pipeline(tmp_wav: str, note_id: str):
96
  if c.get("text", "").strip()
97
  ]
98
 
99
- status = "transcribed" if chunks else "error"
 
 
 
 
 
 
100
 
101
  # 🔥 UPDATE — KHÔNG CREATE
102
  await note_service.update_note(
 
96
  if c.get("text", "").strip()
97
  ]
98
 
99
+ # If model returned text but pipeline didn't provide timestamped chunks,
100
+ # treat as transcribed and create a single fallback chunk covering full duration.
101
+ text = text or ""
102
+ if not chunks and text.strip():
103
+ chunks = [{"text": text.strip(), "start": 0.0, "end": info.get("duration") or 0.0}]
104
+
105
+ status = "transcribed" if text.strip() else "error"
106
 
107
  # 🔥 UPDATE — KHÔNG CREATE
108
  await note_service.update_note(
app/core/asr_engine.py CHANGED
@@ -60,10 +60,22 @@ def transcribe_file(
60
  wav_path,
61
  chunk_length_s=chunk_length_s,
62
  stride_length_s=stride_s,
 
63
  )
64
 
65
- text = out.get("text", "")
66
- return text.strip() if text else ""
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  # ===============================
 
60
  wav_path,
61
  chunk_length_s=chunk_length_s,
62
  stride_length_s=stride_s,
63
+ # return_timestamps may be ignored for full-text outputs but safe to pass
64
  )
65
 
66
+ # Primary: pipeline may return 'text'
67
+ text = (out.get("text") or "").strip()
68
+ if text:
69
+ return text
70
+
71
+ # Fallback: some pipeline versions return detailed segments/chunks
72
+ segs = out.get("chunks") or out.get("segments") or []
73
+ if segs:
74
+ parts = [ (s.get("text") or "").strip() for s in segs ]
75
+ joined = " ".join([p for p in parts if p])
76
+ return joined.strip()
77
+
78
+ return ""
79
 
80
 
81
  # ===============================