MinaNasser commited on
Commit
53e8087
·
1 Parent(s): b3ecc6c

4th errors

Browse files
Files changed (1) hide show
  1. routes/transcripe.py +197 -33
routes/transcripe.py CHANGED
@@ -10,6 +10,8 @@ import uuid
10
  from models.sessions import create_session, get_session
11
  from models.transcriptions import create_transcription
12
  from models.database import get_db
 
 
13
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
@@ -19,91 +21,253 @@ transcripe_router = APIRouter(
19
  tags=["transcripe"])
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @transcripe_router.post("/transcribe")
23
- async def transcribe_endpoint(file: UploadFile = File(...),session_id: str = None,chunk_number: int = 0, db: AsyncSession = Depends(get_db)):
 
 
 
 
 
24
  if not session_id or session_id.strip() == "":
25
  session_id = f"ses_{uuid.uuid4().hex[:8]}"
26
-
27
  session = await get_session(db, session_id)
28
  if not session:
29
- session = await create_session(db, session_id)
 
 
 
 
30
 
31
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
32
  content = await file.read()
33
  temp_file.write(content)
34
  temp_path = temp_file.name
35
-
36
  try:
37
  controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
38
  text, language = await controller.transcribe_audio(temp_path)
39
 
40
- if text:
41
- transcription = await create_transcription(db, session_id, chunk_number, text, language)
42
-
 
 
43
  return {
44
  "id": transcription.id,
45
  "session_id": transcription.session_id,
46
  "chunk_number": transcription.chunk_number,
47
  "text": transcription.text,
48
  "language": transcription.language,
49
- "created_at": transcription.created_at
50
  }
51
  finally:
52
  if os.path.exists(temp_path):
53
  os.remove(temp_path)
54
 
 
 
55
  @transcripe_router.websocket("/ws/{session_id}")
56
- async def websocket_endpoint(websocket: WebSocket, session_id: str, db: AsyncSession = Depends(get_db)):
 
 
 
 
57
  await websocket.accept()
58
 
59
  result = await get_session(db, session_id)
60
  if not result:
61
- result = await create_session(db, session_id)
62
-
 
 
 
 
63
  if not result:
64
  await websocket.send_json({"error": "Session not found nor created"})
65
  await websocket.close()
66
  return
67
-
 
 
 
 
68
  chunk_number = 0
69
  logger.info(f"WebSocket connection established for session {session_id}")
70
-
71
  try:
72
  while True:
73
- data = await websocket.receive_bytes()
 
 
 
 
 
 
 
 
 
74
  logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
75
 
76
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
77
- temp_file.write(data)
78
- temp_path = temp_file.name
79
-
80
  try:
81
- controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
82
- text, language = await controller.transcribe_audio(temp_path)
83
-
84
- if text:
85
- transcription = await create_transcription(db, session_id, chunk_number, text, language)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  await websocket.send_json({
87
  "chunk_number": chunk_number,
88
  "text": text,
89
  "language": language,
90
- "session_id": session_id
 
91
  })
92
  logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
93
  else:
94
- logger.info(f"Chunk {chunk_number} ignored (Detected: {language})")
95
-
 
 
 
 
 
96
  chunk_number += 1
 
97
  finally:
98
- if os.path.exists(temp_path):
99
  os.remove(temp_path)
100
-
101
  except WebSocketDisconnect:
102
  logger.info(f"WebSocket disconnected for session {session_id}")
 
 
 
 
103
  except Exception as e:
104
- logger.error(f"Error in WebSocket: {str(e)}")
105
- await websocket.send_json({"error": str(e)})
106
- await websocket.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  models={}
109
 
@@ -111,10 +275,10 @@ async def load_models():
111
  print("Loading Whisper models...")
112
  if get_settings().INFERENCE_TYPE == "local":
113
  if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
114
- models["small_arabic"] = WhisperModel("MinaNasser/Whisper-Small-MN-int8", device="cpu", compute_type="int8")
115
  models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
116
  elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
117
- models["base_arabic"] = WhisperModel("MinaNasser/Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
118
  models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
119
 
120
  print("Models loaded successfully ")
 
10
  from models.sessions import create_session, get_session
11
  from models.transcriptions import create_transcription
12
  from models.database import get_db
13
+ from sqlalchemy.exc import IntegrityError
14
+ from websockets.exceptions import ConnectionClosedOK, ConnectionClosedError
15
 
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
 
21
  tags=["transcripe"])
22
 
23
 
24
+ # @transcripe_router.post("/transcribe")
25
+ # async def transcribe_endpoint(file: UploadFile = File(...),session_id: str = None,chunk_number: int = 0, db: AsyncSession = Depends(get_db)):
26
+ # if not session_id or session_id.strip() == "":
27
+ # session_id = f"ses_{uuid.uuid4().hex[:8]}"
28
+
29
+ # session = await get_session(db, session_id)
30
+ # if not session:
31
+ # session = await create_session(db, session_id)
32
+
33
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
34
+ # content = await file.read()
35
+ # temp_file.write(content)
36
+ # temp_path = temp_file.name
37
+
38
+ # try:
39
+ # controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
40
+ # text, language = await controller.transcribe_audio(temp_path)
41
+
42
+ # if text:
43
+ # transcription = await create_transcription(db, session_id, chunk_number, text, language)
44
+
45
+ # return {
46
+ # "id": transcription.id,
47
+ # "session_id": transcription.session_id,
48
+ # "chunk_number": transcription.chunk_number,
49
+ # "text": transcription.text,
50
+ # "language": transcription.language,
51
+ # "created_at": transcription.created_at
52
+ # }
53
+ # finally:
54
+ # if os.path.exists(temp_path):
55
+ # os.remove(temp_path)
56
+
57
+
58
  @transcripe_router.post("/transcribe")
59
+ async def transcribe_endpoint(
60
+ file: UploadFile = File(...),
61
+ session_id: str = None,
62
+ chunk_number: int = 0,
63
+ db: AsyncSession = Depends(get_db)
64
+ ):
65
  if not session_id or session_id.strip() == "":
66
  session_id = f"ses_{uuid.uuid4().hex[:8]}"
67
+
68
  session = await get_session(db, session_id)
69
  if not session:
70
+ try:
71
+ session = await create_session(db, session_id)
72
+ except IntegrityError:
73
+ await db.rollback()
74
+ session = await get_session(db, session_id)
75
 
76
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
77
  content = await file.read()
78
  temp_file.write(content)
79
  temp_path = temp_file.name
80
+
81
  try:
82
  controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
83
  text, language = await controller.transcribe_audio(temp_path)
84
 
85
+ if not text or text.strip() == "":
86
+ return {"status": "no_speech", "session_id": session_id, "chunk_number": chunk_number}
87
+
88
+ transcription = await create_transcription(db, session_id, chunk_number, text, language)
89
+
90
  return {
91
  "id": transcription.id,
92
  "session_id": transcription.session_id,
93
  "chunk_number": transcription.chunk_number,
94
  "text": transcription.text,
95
  "language": transcription.language,
96
+ "created_at": transcription.created_at,
97
  }
98
  finally:
99
  if os.path.exists(temp_path):
100
  os.remove(temp_path)
101
 
102
+
103
+
104
  @transcripe_router.websocket("/ws/{session_id}")
105
+ async def websocket_endpoint(
106
+ websocket: WebSocket,
107
+ session_id: str,
108
+ db: AsyncSession = Depends(get_db)
109
+ ):
110
  await websocket.accept()
111
 
112
  result = await get_session(db, session_id)
113
  if not result:
114
+ try:
115
+ result = await create_session(db, session_id)
116
+ except IntegrityError:
117
+ await db.rollback()
118
+ result = await get_session(db, session_id)
119
+
120
  if not result:
121
  await websocket.send_json({"error": "Session not found nor created"})
122
  await websocket.close()
123
  return
124
+
125
+ controller = TranscriptionController(
126
+ models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER
127
+ )
128
+
129
  chunk_number = 0
130
  logger.info(f"WebSocket connection established for session {session_id}")
131
+
132
  try:
133
  while True:
134
+ message = await websocket.receive()
135
+ if message["type"] == "websocket.disconnect":
136
+ logger.info(f"Client sent disconnect frame for session {session_id}")
137
+ break
138
+
139
+ data = message.get("bytes")
140
+ if not data:
141
+ logger.warning(f"Received non-bytes message on chunk {chunk_number}, skipping")
142
+ continue
143
+
144
  logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
145
 
146
+ temp_path = None
 
 
 
147
  try:
148
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
149
+ temp_file.write(data)
150
+ temp_path = temp_file.name
151
+
152
+ MAX_RETRIES = 3
153
+ text, language = None, None
154
+ last_exc = None
155
+ for attempt in range(MAX_RETRIES):
156
+ try:
157
+ text, language = await controller.transcribe_audio(temp_path)
158
+ break
159
+ except (aiohttp.ServerDisconnectedError, asyncio.TimeoutError) as e:
160
+ last_exc = e
161
+ logger.warning(
162
+ f"Remote inference attempt {attempt + 1} failed for "
163
+ f"chunk {chunk_number}: {e}"
164
+ )
165
+ if attempt < MAX_RETRIES - 1:
166
+ await asyncio.sleep(2 ** attempt)
167
+
168
+ if text is None and last_exc is not None:
169
+ await websocket.send_json({
170
+ "chunk_number": chunk_number,
171
+ "status": "inference_unavailable",
172
+ "error": str(last_exc)
173
+ })
174
+ chunk_number += 1
175
+ continue
176
+
177
+ if text and text.strip():
178
+ await create_transcription(db, session_id, chunk_number, text, language)
179
  await websocket.send_json({
180
  "chunk_number": chunk_number,
181
  "text": text,
182
  "language": language,
183
+ "session_id": session_id,
184
+ "status": "ok"
185
  })
186
  logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
187
  else:
188
+ await websocket.send_json({
189
+ "chunk_number": chunk_number,
190
+ "status": "no_speech",
191
+ "language": language
192
+ })
193
+ logger.info(f"Chunk {chunk_number} ignored (no speech, detected lang: {language})")
194
+
195
  chunk_number += 1
196
+
197
  finally:
198
+ if temp_path and os.path.exists(temp_path):
199
  os.remove(temp_path)
200
+
201
  except WebSocketDisconnect:
202
  logger.info(f"WebSocket disconnected for session {session_id}")
203
+
204
+ except (ConnectionClosedOK, ConnectionClosedError) as e:
205
+ logger.info(f"WebSocket closed cleanly for session {session_id}: {e}")
206
+
207
  except Exception as e:
208
+ logger.error(f"Unhandled error in WebSocket for session {session_id}: {e}", exc_info=True)
209
+ try:
210
+ await websocket.send_json({"error": str(e)})
211
+ except Exception:
212
+ pass
213
+ try:
214
+ await websocket.close()
215
+ except Exception:
216
+ pass
217
+
218
+
219
+ # @transcripe_router.websocket("/ws/{session_id}")
220
+ # async def websocket_endpoint(websocket: WebSocket, session_id: str, db: AsyncSession = Depends(get_db)):
221
+ # await websocket.accept()
222
+
223
+ # result = await get_session(db, session_id)
224
+ # if not result:
225
+ # result = await create_session(db, session_id)
226
+
227
+ # if not result:
228
+ # await websocket.send_json({"error": "Session not found nor created"})
229
+ # await websocket.close()
230
+ # return
231
+
232
+ # chunk_number = 0
233
+ # logger.info(f"WebSocket connection established for session {session_id}")
234
+
235
+ # try:
236
+ # while True:
237
+ # data = await websocket.receive_bytes()
238
+ # logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
239
+
240
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
241
+ # temp_file.write(data)
242
+ # temp_path = temp_file.name
243
+
244
+ # try:
245
+ # controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
246
+ # text, language = await controller.transcribe_audio(temp_path)
247
+
248
+ # if text:
249
+ # transcription = await create_transcription(db, session_id, chunk_number, text, language)
250
+ # await websocket.send_json({
251
+ # "chunk_number": chunk_number,
252
+ # "text": text,
253
+ # "language": language,
254
+ # "session_id": session_id
255
+ # })
256
+ # logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
257
+ # else:
258
+ # logger.info(f"Chunk {chunk_number} ignored (Detected: {language})")
259
+
260
+ # chunk_number += 1
261
+ # finally:
262
+ # if os.path.exists(temp_path):
263
+ # os.remove(temp_path)
264
+
265
+ # except WebSocketDisconnect:
266
+ # logger.info(f"WebSocket disconnected for session {session_id}")
267
+ # except Exception as e:
268
+ # logger.error(f"Error in WebSocket: {str(e)}")
269
+ # await websocket.send_json({"error": str(e)})
270
+ # await websocket.close()
271
 
272
  models={}
273
 
 
275
  print("Loading Whisper models...")
276
  if get_settings().INFERENCE_TYPE == "local":
277
  if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
278
+ models["small_arabic"] = WhisperModel("Whisper-Small-MN-int8", device="cpu", compute_type="int8")
279
  models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
280
  elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
281
+ models["base_arabic"] = WhisperModel("Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
282
  models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
283
 
284
  print("Models loaded successfully ")