bichnhan2701 commited on
Commit
643318e
·
1 Parent(s): 4cbfd1f
Files changed (1) hide show
  1. app/jobs/transcribe_job.py +30 -7
app/jobs/transcribe_job.py CHANGED
@@ -9,6 +9,7 @@ import time
9
  from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
10
  from app.services.note_client import NoteServiceClient
11
  from app.core.audio_utils import get_audio_info
 
12
 
13
  def run_async(coro):
14
  try:
@@ -41,21 +42,43 @@ def transcribe_job(audio_url: str, note_id: str, user_id: str | None = None):
41
  # 1️⃣ Download audio
42
  wav_path = download_audio(audio_url)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # 2️⃣ ASR
45
  text = transcribe_file(model, wav_path, 30.0, 5.0)
46
  chunks = transcribe_file_chunks(model, wav_path, 30.0, 5.0)
47
 
 
48
  chunks = [
49
- {
50
- "text": c["text"],
51
- "start": c.get("start"),
52
- "end": c.get("end"),
53
- }
54
- for c in chunks
55
  if c.get("text", "").strip()
56
  ]
57
 
58
- note_status = "transcribed" if chunks else "error"
 
 
 
 
 
 
 
 
 
 
59
  info = get_audio_info(wav_path) or {}
60
 
61
  payload = {
 
9
  from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
10
  from app.services.note_client import NoteServiceClient
11
  from app.core.audio_utils import get_audio_info
12
+ from app.core.audio_utils import ensure_wav_16k_mono, make_temp_path
13
 
14
  def run_async(coro):
15
  try:
 
42
  # 1️⃣ Download audio
43
  wav_path = download_audio(audio_url)
44
 
45
+ # Ensure WAV is 16k mono for consistent chunking and ASR behavior
46
+ try:
47
+ info = get_audio_info(wav_path) or {}
48
+ if info.get("samplerate") != 16000 or info.get("channels") != 1:
49
+ tmp_wav = make_temp_path(suffix=".wav")
50
+ ensure_wav_16k_mono(wav_path, tmp_wav)
51
+ # replace wav_path with converted file and remove original
52
+ try:
53
+ os.remove(wav_path)
54
+ except Exception:
55
+ pass
56
+ wav_path = tmp_wav
57
+ except Exception:
58
+ logging.exception("Failed to ensure wav format for %s", wav_path)
59
+
60
  # 2️⃣ ASR
61
  text = transcribe_file(model, wav_path, 30.0, 5.0)
62
  chunks = transcribe_file_chunks(model, wav_path, 30.0, 5.0)
63
 
64
+ # normalize chunks list
65
  chunks = [
66
+ {"text": c["text"], "start": c.get("start"), "end": c.get("end")}
67
+ for c in (chunks or [])
 
 
 
 
68
  if c.get("text", "").strip()
69
  ]
70
 
71
+ # If no timestamped chunks but raw text exists, create per-file fallback chunk
72
+ if not chunks and text and text.strip():
73
+ try:
74
+ info = get_audio_info(wav_path) or {}
75
+ duration = info.get("duration") or 0.0
76
+ chunks = [{"text": text.strip(), "start": 0.0, "end": float(duration)}]
77
+ except Exception:
78
+ logging.exception("failed to create fallback chunk for note %s", note_id)
79
+
80
+ # Consider transcribed if we have either timestamped chunks or non-empty text
81
+ note_status = "transcribed" if (chunks or (text and text.strip())) else "error"
82
  info = get_audio_info(wav_path) or {}
83
 
84
  payload = {