MinaNasser commited on
Commit
f2a264e
·
1 Parent(s): f85bf15
Files changed (1) hide show
  1. routes/transcripe.py +52 -139
routes/transcripe.py CHANGED
@@ -13,6 +13,7 @@ from models.database import get_db
13
  from sqlalchemy.exc import IntegrityError
14
  from websockets.exceptions import ConnectionClosedOK, ConnectionClosedError
15
  import aiohttp, asyncio
 
16
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
@@ -22,38 +23,6 @@ transcripe_router = APIRouter(
22
  tags=["transcripe"])
23
 
24
 
25
- # @transcripe_router.post("/transcribe")
26
- # async def transcribe_endpoint(file: UploadFile = File(...),session_id: str = None,chunk_number: int = 0, db: AsyncSession = Depends(get_db)):
27
- # if not session_id or session_id.strip() == "":
28
- # session_id = f"ses_{uuid.uuid4().hex[:8]}"
29
-
30
- # session = await get_session(db, session_id)
31
- # if not session:
32
- # session = await create_session(db, session_id)
33
-
34
- # with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
35
- # content = await file.read()
36
- # temp_file.write(content)
37
- # temp_path = temp_file.name
38
-
39
- # try:
40
- # controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
41
- # text, language = await controller.transcribe_audio(temp_path)
42
-
43
- # if text:
44
- # transcription = await create_transcription(db, session_id, chunk_number, text, language)
45
-
46
- # return {
47
- # "id": transcription.id,
48
- # "session_id": transcription.session_id,
49
- # "chunk_number": transcription.chunk_number,
50
- # "text": transcription.text,
51
- # "language": transcription.language,
52
- # "created_at": transcription.created_at
53
- # }
54
- # finally:
55
- # if os.path.exists(temp_path):
56
- # os.remove(temp_path)
57
 
58
 
59
  @transcripe_router.post("/transcribe")
@@ -72,22 +41,26 @@ async def transcribe_endpoint(
72
  session = await create_session(db, session_id)
73
  except IntegrityError:
74
  await db.rollback()
75
- session = await get_session(db, session_id)
76
-
77
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
78
- content = await file.read()
79
- temp_file.write(content)
80
- temp_path = temp_file.name
81
 
 
82
  try:
83
- controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
84
- text, language = await controller.transcribe_audio(temp_path)
 
 
 
 
 
85
 
86
- if not text or text.strip() == "":
 
 
87
  return {"status": "no_speech", "session_id": session_id, "chunk_number": chunk_number}
 
 
88
 
89
  transcription = await create_transcription(db, session_id, chunk_number, text, language)
90
-
91
  return {
92
  "id": transcription.id,
93
  "session_id": transcription.session_id,
@@ -97,7 +70,7 @@ async def transcribe_endpoint(
97
  "created_at": transcription.created_at,
98
  }
99
  finally:
100
- if os.path.exists(temp_path):
101
  os.remove(temp_path)
102
 
103
 
@@ -110,19 +83,24 @@ async def websocket_endpoint(
110
  ):
111
  await websocket.accept()
112
 
113
- result = await get_session(db, session_id)
114
- if not result:
115
  try:
116
- result = await create_session(db, session_id)
117
  except IntegrityError:
118
  await db.rollback()
119
- result = await get_session(db, session_id)
120
 
121
- if not result:
122
  await websocket.send_json({"error": "Session not found nor created"})
123
  await websocket.close()
124
  return
125
 
 
 
 
 
 
126
  controller = TranscriptionController(
127
  models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER
128
  )
@@ -134,64 +112,51 @@ async def websocket_endpoint(
134
  while True:
135
  message = await websocket.receive()
136
  if message["type"] == "websocket.disconnect":
137
- logger.info(f"Client sent disconnect frame for session {session_id}")
138
  break
139
 
140
  data = message.get("bytes")
141
  if not data:
142
- logger.warning(f"Received non-bytes message on chunk {chunk_number}, skipping")
143
  continue
144
 
145
- logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
146
 
147
  temp_path = None
148
  try:
149
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
150
- temp_file.write(data)
151
- temp_path = temp_file.name
152
-
153
- MAX_RETRIES = 3
154
- text, language = None, None
155
- last_exc = None
156
- for attempt in range(MAX_RETRIES):
157
- try:
158
- text, language = await controller.transcribe_audio(temp_path)
159
- break
160
- except (aiohttp.ServerDisconnectedError, asyncio.TimeoutError) as e:
161
- last_exc = e
162
- logger.warning(
163
- f"Remote inference attempt {attempt + 1} failed for "
164
- f"chunk {chunk_number}: {e}"
165
- )
166
- if attempt < MAX_RETRIES - 1:
167
- await asyncio.sleep(2 ** attempt)
168
-
169
- if text is None and last_exc is not None:
170
- await websocket.send_json({
171
- "chunk_number": chunk_number,
172
- "status": "inference_unavailable",
173
- "error": str(last_exc)
174
- })
175
- chunk_number += 1
176
- continue
177
 
178
- if text and text.strip():
 
 
179
  await create_transcription(db, session_id, chunk_number, text, language)
180
  await websocket.send_json({
 
181
  "chunk_number": chunk_number,
182
  "text": text,
183
  "language": language,
184
  "session_id": session_id,
185
- "status": "ok"
186
  })
187
- logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
188
- else:
 
 
189
  await websocket.send_json({
190
- "chunk_number": chunk_number,
191
  "status": "no_speech",
192
- "language": language
 
 
 
 
 
 
 
 
 
193
  })
194
- logger.info(f"Chunk {chunk_number} ignored (no speech, detected lang: {language})")
195
 
196
  chunk_number += 1
197
 
@@ -206,7 +171,7 @@ async def websocket_endpoint(
206
  logger.info(f"WebSocket closed cleanly for session {session_id}: {e}")
207
 
208
  except Exception as e:
209
- logger.error(f"Unhandled error in WebSocket for session {session_id}: {e}", exc_info=True)
210
  try:
211
  await websocket.send_json({"error": str(e)})
212
  except Exception:
@@ -217,58 +182,6 @@ async def websocket_endpoint(
217
  pass
218
 
219
 
220
- # @transcripe_router.websocket("/ws/{session_id}")
221
- # async def websocket_endpoint(websocket: WebSocket, session_id: str, db: AsyncSession = Depends(get_db)):
222
- # await websocket.accept()
223
-
224
- # result = await get_session(db, session_id)
225
- # if not result:
226
- # result = await create_session(db, session_id)
227
-
228
- # if not result:
229
- # await websocket.send_json({"error": "Session not found nor created"})
230
- # await websocket.close()
231
- # return
232
-
233
- # chunk_number = 0
234
- # logger.info(f"WebSocket connection established for session {session_id}")
235
-
236
- # try:
237
- # while True:
238
- # data = await websocket.receive_bytes()
239
- # logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
240
-
241
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
242
- # temp_file.write(data)
243
- # temp_path = temp_file.name
244
-
245
- # try:
246
- # controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
247
- # text, language = await controller.transcribe_audio(temp_path)
248
-
249
- # if text:
250
- # transcription = await create_transcription(db, session_id, chunk_number, text, language)
251
- # await websocket.send_json({
252
- # "chunk_number": chunk_number,
253
- # "text": text,
254
- # "language": language,
255
- # "session_id": session_id
256
- # })
257
- # logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
258
- # else:
259
- # logger.info(f"Chunk {chunk_number} ignored (Detected: {language})")
260
-
261
- # chunk_number += 1
262
- # finally:
263
- # if os.path.exists(temp_path):
264
- # os.remove(temp_path)
265
-
266
- # except WebSocketDisconnect:
267
- # logger.info(f"WebSocket disconnected for session {session_id}")
268
- # except Exception as e:
269
- # logger.error(f"Error in WebSocket: {str(e)}")
270
- # await websocket.send_json({"error": str(e)})
271
- # await websocket.close()
272
 
273
  models={}
274
 
 
13
  from sqlalchemy.exc import IntegrityError
14
  from websockets.exceptions import ConnectionClosedOK, ConnectionClosedError
15
  import aiohttp, asyncio
16
+ from stores.sttremotes import NoSpeechDetected, ProviderUnavailable
17
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
 
23
  tags=["transcripe"])
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  @transcripe_router.post("/transcribe")
 
41
  session = await create_session(db, session_id)
42
  except IntegrityError:
43
  await db.rollback()
44
+ session = await get_session(db, session_id)
 
 
 
 
 
45
 
46
+ temp_path = None
47
  try:
48
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as f:
49
+ f.write(await file.read())
50
+ temp_path = f.name
51
+
52
+ controller = TranscriptionController(
53
+ models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER
54
+ )
55
 
56
+ try:
57
+ text, language = await controller.transcribe_audio(temp_path)
58
+ except NoSpeechDetected:
59
  return {"status": "no_speech", "session_id": session_id, "chunk_number": chunk_number}
60
+ except ProviderUnavailable as e:
61
+ raise HTTPException(status_code=503, detail=str(e))
62
 
63
  transcription = await create_transcription(db, session_id, chunk_number, text, language)
 
64
  return {
65
  "id": transcription.id,
66
  "session_id": transcription.session_id,
 
70
  "created_at": transcription.created_at,
71
  }
72
  finally:
73
+ if temp_path and os.path.exists(temp_path):
74
  os.remove(temp_path)
75
 
76
 
 
83
  ):
84
  await websocket.accept()
85
 
86
+ session = await get_session(db, session_id)
87
+ if not session:
88
  try:
89
+ session = await create_session(db, session_id)
90
  except IntegrityError:
91
  await db.rollback()
92
+ session = await get_session(db, session_id)
93
 
94
+ if not session:
95
  await websocket.send_json({"error": "Session not found nor created"})
96
  await websocket.close()
97
  return
98
 
99
+ if not models:
100
+ await websocket.send_json({"error": "Models not yet loaded, please reconnect."})
101
+ await websocket.close()
102
+ return
103
+
104
  controller = TranscriptionController(
105
  models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER
106
  )
 
112
  while True:
113
  message = await websocket.receive()
114
  if message["type"] == "websocket.disconnect":
115
+ logger.info(f"Client disconnected for session {session_id}")
116
  break
117
 
118
  data = message.get("bytes")
119
  if not data:
120
+ logger.warning(f"Non-bytes message on chunk {chunk_number}, skipping")
121
  continue
122
 
123
+ logger.info(f"Received chunk {chunk_number} ({len(data)} bytes)")
124
 
125
  temp_path = None
126
  try:
127
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as f:
128
+ f.write(data)
129
+ temp_path = f.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ try:
132
+ text, language = await controller.transcribe_audio(temp_path)
133
+ # Success — save and send back
134
  await create_transcription(db, session_id, chunk_number, text, language)
135
  await websocket.send_json({
136
+ "status": "ok",
137
  "chunk_number": chunk_number,
138
  "text": text,
139
  "language": language,
140
  "session_id": session_id,
 
141
  })
142
+ logger.info(f"Chunk {chunk_number} ({language}): {text[:50]}...")
143
+
144
+ except NoSpeechDetected:
145
+ # Silent chunk — keep connection alive, tell the client
146
  await websocket.send_json({
 
147
  "status": "no_speech",
148
+ "chunk_number": chunk_number,
149
+ })
150
+ logger.info(f"Chunk {chunk_number}: no speech detected")
151
+
152
+ except ProviderUnavailable as e:
153
+ # Provider is down — keep connection alive, client can retry
154
+ await websocket.send_json({
155
+ "status": "provider_unavailable",
156
+ "chunk_number": chunk_number,
157
+ "error": str(e),
158
  })
159
+ logger.error(f"Chunk {chunk_number}: provider unavailable {e}")
160
 
161
  chunk_number += 1
162
 
 
171
  logger.info(f"WebSocket closed cleanly for session {session_id}: {e}")
172
 
173
  except Exception as e:
174
+ logger.error(f"Unhandled error for session {session_id}: {e}", exc_info=True)
175
  try:
176
  await websocket.send_json({"error": str(e)})
177
  except Exception:
 
182
  pass
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  models={}
187