Spaces:

MinaNasser
/

Transcriper_API

Runtime error

App Files Files Community

MinaNasser commited on Apr 19

Commit

53e8087

1 Parent(s): b3ecc6c

4th errors

Browse files

Files changed (1) hide show

routes/transcripe.py +197 -33

routes/transcripe.py CHANGED Viewed

@@ -10,6 +10,8 @@ import uuid
 from models.sessions import create_session, get_session
 from models.transcriptions import create_transcription
 from models.database import get_db
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -19,91 +21,253 @@ transcripe_router = APIRouter(
         tags=["transcripe"])
 @transcripe_router.post("/transcribe")
-async def transcribe_endpoint(file: UploadFile = File(...),session_id: str = None,chunk_number: int = 0, db: AsyncSession = Depends(get_db)):
     if not session_id or session_id.strip() == "":
         session_id = f"ses_{uuid.uuid4().hex[:8]}"
     session = await get_session(db, session_id)
     if not session:
-        session = await create_session(db, session_id)
     with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
         content = await file.read()
         temp_file.write(content)
         temp_path = temp_file.name
     try:
         controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
         text, language = await controller.transcribe_audio(temp_path)
-        if text:
-            transcription = await create_transcription(db, session_id, chunk_number, text, language)
         return {
             "id": transcription.id,
             "session_id": transcription.session_id,
             "chunk_number": transcription.chunk_number,
             "text": transcription.text,
             "language": transcription.language,
-            "created_at": transcription.created_at
         }
     finally:
         if os.path.exists(temp_path):
             os.remove(temp_path)
 @transcripe_router.websocket("/ws/{session_id}")
-async def websocket_endpoint(websocket: WebSocket, session_id: str, db: AsyncSession = Depends(get_db)):
     await websocket.accept()
     result = await get_session(db, session_id)
     if not result:
-        result = await create_session(db, session_id)
     if not result:
         await websocket.send_json({"error": "Session not found nor created"})
         await websocket.close()
         return
     chunk_number = 0
     logger.info(f"WebSocket connection established for session {session_id}")
     try:
         while True:
-            data = await websocket.receive_bytes()
             logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
-                temp_file.write(data)
-                temp_path = temp_file.name
             try:
-                controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
-                text, language = await controller.transcribe_audio(temp_path)
-                if text:
-                    transcription = await create_transcription(db, session_id, chunk_number, text, language)
                     await websocket.send_json({
                         "chunk_number": chunk_number,
                         "text": text,
                         "language": language,
-                        "session_id": session_id
                     })
                     logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
                 else:
-                    logger.info(f"Chunk {chunk_number} ignored (Detected: {language})")
                 chunk_number += 1
             finally:
-                if os.path.exists(temp_path):
                     os.remove(temp_path)
     except WebSocketDisconnect:
         logger.info(f"WebSocket disconnected for session {session_id}")
     except Exception as e:
-        logger.error(f"Error in WebSocket: {str(e)}")
-        await websocket.send_json({"error": str(e)})
-        await websocket.close()
 models={}
@@ -111,10 +275,10 @@ async def load_models():
     print("Loading Whisper models...")
     if get_settings().INFERENCE_TYPE == "local":
         if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
-            models["small_arabic"] = WhisperModel("MinaNasser/Whisper-Small-MN-int8", device="cpu", compute_type="int8")
             models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
         elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
-            models["base_arabic"] = WhisperModel("MinaNasser/Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
             models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
     print("Models loaded successfully ")

 from models.sessions import create_session, get_session
 from models.transcriptions import create_transcription
 from models.database import get_db
+from sqlalchemy.exc import IntegrityError
+from websockets.exceptions import ConnectionClosedOK, ConnectionClosedError
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
         tags=["transcripe"])
+# @transcripe_router.post("/transcribe")
+# async def transcribe_endpoint(file: UploadFile = File(...),session_id: str = None,chunk_number: int = 0, db: AsyncSession = Depends(get_db)):
+#     if not session_id or session_id.strip() == "":
+#         session_id = f"ses_{uuid.uuid4().hex[:8]}"
+#     session = await get_session(db, session_id)
+#     if not session:
+#         session = await create_session(db, session_id)
+#     with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
+#         content = await file.read()
+#         temp_file.write(content)
+#         temp_path = temp_file.name
+#     try:
+#         controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
+#         text, language = await controller.transcribe_audio(temp_path)
+#         if text:
+#             transcription = await create_transcription(db, session_id, chunk_number, text, language)
+#         return {
+#             "id": transcription.id,
+#             "session_id": transcription.session_id,
+#             "chunk_number": transcription.chunk_number,
+#             "text": transcription.text,
+#             "language": transcription.language,
+#             "created_at": transcription.created_at
+#         }
+#     finally:
+#         if os.path.exists(temp_path):
+#             os.remove(temp_path)
 @transcripe_router.post("/transcribe")
+async def transcribe_endpoint(
+    file: UploadFile = File(...),
+    session_id: str = None,
+    chunk_number: int = 0,
+    db: AsyncSession = Depends(get_db)
+):
     if not session_id or session_id.strip() == "":
         session_id = f"ses_{uuid.uuid4().hex[:8]}"
     session = await get_session(db, session_id)
     if not session:
+        try:
+            session = await create_session(db, session_id)
+        except IntegrityError:
+            await db.rollback()
+            session = await get_session(db, session_id)
     with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
         content = await file.read()
         temp_file.write(content)
         temp_path = temp_file.name
     try:
         controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
         text, language = await controller.transcribe_audio(temp_path)
+        if not text or text.strip() == "":
+            return {"status": "no_speech", "session_id": session_id, "chunk_number": chunk_number}
+        transcription = await create_transcription(db, session_id, chunk_number, text, language)
         return {
             "id": transcription.id,
             "session_id": transcription.session_id,
             "chunk_number": transcription.chunk_number,
             "text": transcription.text,
             "language": transcription.language,
+            "created_at": transcription.created_at,
         }
     finally:
         if os.path.exists(temp_path):
             os.remove(temp_path)
 @transcripe_router.websocket("/ws/{session_id}")
+async def websocket_endpoint(
+    websocket: WebSocket,
+    session_id: str,
+    db: AsyncSession = Depends(get_db)
+):
     await websocket.accept()
     result = await get_session(db, session_id)
     if not result:
+        try:
+            result = await create_session(db, session_id)
+        except IntegrityError:
+            await db.rollback()
+            result = await get_session(db, session_id)
     if not result:
         await websocket.send_json({"error": "Session not found nor created"})
         await websocket.close()
         return
+    controller = TranscriptionController(
+        models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER
+    )
     chunk_number = 0
     logger.info(f"WebSocket connection established for session {session_id}")
     try:
         while True:
+            message = await websocket.receive()
+            if message["type"] == "websocket.disconnect":
+                logger.info(f"Client sent disconnect frame for session {session_id}")
+                break
+            data = message.get("bytes")
+            if not data:
+                logger.warning(f"Received non-bytes message on chunk {chunk_number}, skipping")
+                continue
             logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
+            temp_path = None
             try:
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
+                    temp_file.write(data)
+                    temp_path = temp_file.name
+                MAX_RETRIES = 3
+                text, language = None, None
+                last_exc = None
+                for attempt in range(MAX_RETRIES):
+                    try:
+                        text, language = await controller.transcribe_audio(temp_path)
+                        break
+                    except (aiohttp.ServerDisconnectedError, asyncio.TimeoutError) as e:
+                        last_exc = e
+                        logger.warning(
+                            f"Remote inference attempt {attempt + 1} failed for "
+                            f"chunk {chunk_number}: {e}"
+                        )
+                        if attempt < MAX_RETRIES - 1:
+                            await asyncio.sleep(2 ** attempt)
+                if text is None and last_exc is not None:
+                    await websocket.send_json({
+                        "chunk_number": chunk_number,
+                        "status": "inference_unavailable",
+                        "error": str(last_exc)
+                    })
+                    chunk_number += 1
+                    continue
+                if text and text.strip():
+                    await create_transcription(db, session_id, chunk_number, text, language)
                     await websocket.send_json({
                         "chunk_number": chunk_number,
                         "text": text,
                         "language": language,
+                        "session_id": session_id,
+                        "status": "ok"
                     })
                     logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
                 else:
+                    await websocket.send_json({
+                        "chunk_number": chunk_number,
+                        "status": "no_speech",
+                        "language": language
+                    })
+                    logger.info(f"Chunk {chunk_number} ignored (no speech, detected lang: {language})")
                 chunk_number += 1
             finally:
+                if temp_path and os.path.exists(temp_path):
                     os.remove(temp_path)
     except WebSocketDisconnect:
         logger.info(f"WebSocket disconnected for session {session_id}")
+    except (ConnectionClosedOK, ConnectionClosedError) as e:
+        logger.info(f"WebSocket closed cleanly for session {session_id}: {e}")
     except Exception as e:
+        logger.error(f"Unhandled error in WebSocket for session {session_id}: {e}", exc_info=True)
+        try:
+            await websocket.send_json({"error": str(e)})
+        except Exception:
+            pass
+        try:
+            await websocket.close()
+        except Exception:
+            pass
+# @transcripe_router.websocket("/ws/{session_id}")
+# async def websocket_endpoint(websocket: WebSocket, session_id: str, db: AsyncSession = Depends(get_db)):
+#     await websocket.accept()
+#     result = await get_session(db, session_id)
+#     if not result:
+#         result = await create_session(db, session_id)
+#     if not result:
+#         await websocket.send_json({"error": "Session not found nor created"})
+#         await websocket.close()
+#         return
+#     chunk_number = 0
+#     logger.info(f"WebSocket connection established for session {session_id}")
+#     try:
+#         while True:
+#             data = await websocket.receive_bytes()
+#             logger.info(f"Received audio chunk {chunk_number} ({len(data)} bytes)")
+#             with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as temp_file:
+#                 temp_file.write(data)
+#                 temp_path = temp_file.name
+#             try:
+#                 controller = TranscriptionController(models, logger, remotename=get_settings().REMOTE_INFERENCE_PROVIDER)
+#                 text, language = await controller.transcribe_audio(temp_path)
+#                 if text:
+#                     transcription = await create_transcription(db, session_id, chunk_number, text, language)
+#                     await websocket.send_json({
+#                         "chunk_number": chunk_number,
+#                         "text": text,
+#                         "language": language,
+#                         "session_id": session_id
+#                     })
+#                     logger.info(f"Transcribed chunk {chunk_number} ({language}): {text[:50]}...")
+#                 else:
+#                     logger.info(f"Chunk {chunk_number} ignored (Detected: {language})")
+#                 chunk_number += 1
+#             finally:
+#                 if os.path.exists(temp_path):
+#                     os.remove(temp_path)
+#     except WebSocketDisconnect:
+#         logger.info(f"WebSocket disconnected for session {session_id}")
+#     except Exception as e:
+#         logger.error(f"Error in WebSocket: {str(e)}")
+#         await websocket.send_json({"error": str(e)})
+#         await websocket.close()
 models={}
     print("Loading Whisper models...")
     if get_settings().INFERENCE_TYPE == "local":
         if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
+            models["small_arabic"] = WhisperModel("Whisper-Small-MN-int8", device="cpu", compute_type="int8")
             models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
         elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
+            models["base_arabic"] = WhisperModel("Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
             models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
     print("Models loaded successfully ")