bichnhan2701 commited on
Commit
529a32e
·
1 Parent(s): 1fa646f

Fix bug early enrich again

Browse files
Files changed (2) hide show
  1. app/api/transcribe.py +21 -18
  2. app/jobs/transcribe_job.py +37 -10
app/api/transcribe.py CHANGED
@@ -123,25 +123,28 @@ async def _run_sync_pipeline(tmp_wav: str, note_id: str):
123
 
124
 
125
  async def _create_placeholder_note(note_id: str, duration: float):
126
- """
127
- Tạo note NGAY LẬP TỨC để:
128
- - SSE không trả not_found
129
- - enrich có object để update
130
- """
131
- await NoteServiceClient().create_audio_note({
132
- "note_id": note_id,
133
- "type": "audio",
134
- "status": "processing",
135
- "raw_text": "",
136
- "metadata": {
137
- "audio": {
138
- "duration": duration,
139
- "chunks": [],
140
- "asr_model": "PhoWhisper-base",
141
  }
142
- },
143
- # 🔥 KHÔNG generate ở đây
144
- })
 
 
 
145
 
146
 
147
  # ============================================================
 
123
 
124
 
125
  async def _create_placeholder_note(note_id: str, duration: float):
126
+ client = NoteServiceClient()
127
+ res = await client.create_audio_note(
128
+ {
129
+ "note_id": note_id,
130
+ "type": "audio",
131
+ "status": "processing",
132
+ "raw_text": "",
133
+ "metadata": {
134
+ "audio": {
135
+ "duration": duration,
136
+ "chunks": [],
137
+ "asr_model": "PhoWhisper-base",
138
+ }
139
+ },
140
+ # ❌ KHÔNG generate ở đây
141
  }
142
+ )
143
+
144
+ if res is None:
145
+ # 🔥 FAIL FAST
146
+ raise RuntimeError("Failed to create placeholder note")
147
+
148
 
149
 
150
  # ============================================================
app/jobs/transcribe_job.py CHANGED
@@ -2,11 +2,24 @@ import asyncio
2
  import tempfile
3
  import os
4
  import requests
 
 
5
 
6
  from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
7
  from app.services.note_client import NoteServiceClient
8
  from app.core.audio_utils import get_audio_info
9
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def download_audio(audio_url: str) -> str:
12
  r = requests.get(audio_url, timeout=30)
@@ -62,18 +75,32 @@ def transcribe_job(audio_url: str, note_id: str, user_id: str | None = None):
62
  "generate": ["normalize", "keywords", "summary", "mindmap"],
63
  }
64
 
65
- client = NoteServiceClient()
66
- asyncio.run(
67
- client.update_note(
68
- note_id,
69
- {
70
- "status": note_status,
71
- "raw_text": text,
72
- "metadata": payload["metadata"],
73
- },
74
- )
75
  )
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  finally:
79
  if wav_path and os.path.exists(wav_path):
 
2
  import tempfile
3
  import os
4
  import requests
5
+ import httpx
6
+ import time
7
 
8
  from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
9
  from app.services.note_client import NoteServiceClient
10
  from app.core.audio_utils import get_audio_info
11
 
12
+ def run_async(coro):
13
+ try:
14
+ loop = asyncio.get_running_loop()
15
+ except RuntimeError:
16
+ loop = None
17
+
18
+ if loop and loop.is_running():
19
+ return asyncio.create_task(coro)
20
+ else:
21
+ return asyncio.run(coro)
22
+
23
 
24
  def download_audio(audio_url: str) -> str:
25
  r = requests.get(audio_url, timeout=30)
 
75
  "generate": ["normalize", "keywords", "summary", "mindmap"],
76
  }
77
 
78
+ generate_tasks = (
79
+ ["normalize", "keywords", "summary", "mindmap"]
80
+ if note_status == "transcribed"
81
+ else []
 
 
 
 
 
 
82
  )
83
 
84
+ time.sleep(1.0)
85
+
86
+ client = NoteServiceClient()
87
+ try:
88
+ run_async(
89
+ client.update_note(
90
+ note_id,
91
+ {
92
+ "status": note_status,
93
+ "raw_text": text,
94
+ "metadata": payload["metadata"],
95
+ **({"generate": generate_tasks} if generate_tasks else {}),
96
+ },
97
+ )
98
+ )
99
+ except httpx.HTTPStatusError as e:
100
+ if e.response.status_code == 404:
101
+ run_async(client.create_audio_note(payload))
102
+ else:
103
+ raise
104
 
105
  finally:
106
  if wav_path and os.path.exists(wav_path):