Spaces:

nuernie
/

ai-server

Paused

App Files Files Community

nuernie commited on May 17, 2025

Commit

bb2aa1c

1 Parent(s): 2259eec

adjust

Browse files

Files changed (1) hide show

app.py +52 -30

app.py CHANGED Viewed

@@ -3,25 +3,28 @@ from contextlib import asynccontextmanager
 import uvicorn
 from whisper_live.server import TranscriptionServer
 import logging
-import json
 import numpy as np
-# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize the transcription server once
 transcription_server = TranscriptionServer()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Setup if needed
     yield
-    # Cleanup if needed
 app = FastAPI(
     title="Whisper Live Server",
-    description="A real-time speech-to-text server using OpenAI's Whisper model",
     version="1.0.0",
     lifespan=lifespan
 )
@@ -30,13 +33,12 @@ app = FastAPI(
 async def root():
     return {
         "message": "Welcome to Whisper Live Server",
-        "status": "running",
         "websocket_endpoint": "/ws",
         "health_endpoint": "/health"
     }
 @app.get("/health")
-def health_check():
     return {"status": "healthy"}
@@ -45,43 +47,62 @@ async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
     client_uid = None
     config = {}
     try:
-        # 1. Receive config from client
         config = await websocket.receive_json()
         client_uid = config.get("uid")
         if not client_uid:
             await websocket.close(code=4000, reason="No client UID provided")
             return
         logger.info(f"Client connected: {client_uid} | Config: {config}")
-        # 2. Confirm server readiness
         await websocket.send_json({
             "uid": client_uid,
             "message": "SERVER_READY",
             "backend": "faster_whisper"
         })
-        # 3. Per-client session configuration
-        session = transcription_server.create_session(
-            model=config.get("model", "tiny"),
-            language=config.get("language", "de"),
-            task=config.get("task", "transcribe"),
-            use_vad=config.get("use_vad", True)
-        )
-        # 4. Start processing audio stream
         while True:
-            data = await websocket.receive()
-            if data["type"] == "websocket.disconnect":
                 break
-            if "bytes" in data:
-                audio_data = np.frombuffer(data["bytes"], dtype=np.float32)
-                segments = session.process_audio(audio_data)
                 if segments:
                     await websocket.send_json({
@@ -89,10 +110,10 @@ async def websocket_endpoint(websocket: WebSocket):
                         "segments": segments
                     })
-            elif "text" in data:
-                if data["text"] == "END_OF_AUDIO":
-                    logger.info(f"Client {client_uid} ended stream.")
-                    break
     except WebSocketDisconnect:
         logger.warning(f"WebSocket disconnected: {client_uid}")
@@ -109,5 +130,6 @@ async def websocket_endpoint(websocket: WebSocket):
     finally:
         await websocket.close()
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import uvicorn
 from whisper_live.server import TranscriptionServer
 import logging
 import numpy as np
+# —————————————————————————————
+# Logging
+# —————————————————————————————
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# —————————————————————————————
+# Instantiate the shared TranscriptionServer
+# (e.g. this loads your model once)
+# —————————————————————————————
 transcription_server = TranscriptionServer()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # any startup logic here (e.g. preload models)
     yield
+    # any cleanup logic here
 app = FastAPI(
     title="Whisper Live Server",
     version="1.0.0",
     lifespan=lifespan
 )
 async def root():
     return {
         "message": "Welcome to Whisper Live Server",
         "websocket_endpoint": "/ws",
         "health_endpoint": "/health"
     }
 @app.get("/health")
+async def health_check():
     return {"status": "healthy"}
     await websocket.accept()
     client_uid = None
     config = {}
     try:
+        # —————————————————————————————
+        # 1) Read the per‑client config JSON
+        #—————————————————————————————
         config = await websocket.receive_json()
         client_uid = config.get("uid")
         if not client_uid:
             await websocket.close(code=4000, reason="No client UID provided")
             return
         logger.info(f"Client connected: {client_uid} | Config: {config}")
+        # —————————————————————————————
+        # 2) Send back a SERVER_READY message
+        #—————————————————————————————
         await websocket.send_json({
             "uid": client_uid,
             "message": "SERVER_READY",
             "backend": "faster_whisper"
         })
+        # —————————————————————————————
+        # 3) Configure the shared server for this client
+        #    (you can also patch transcription_server attributes here)
+        #—————————————————————————————
+        language = config.get("language", "de")
+        task     = config.get("task",     "transcribe")
+        model    = config.get("model",    "tiny")
+        use_vad  = config.get("use_vad",  True)
+        # If your TranscriptionServer.process_audio takes kwargs, you can pass them directly.
+        # Otherwise, you may need to set transcription_server.language = language, etc.
+        # —————————————————————————————
+        # 4) Enter the receive‑loop
+        #—————————————————————————————
         while True:
+            msg = await websocket.receive()
+            # client closed connection
+            if msg["type"] == "websocket.disconnect":
                 break
+            # binary audio frames
+            if "bytes" in msg and msg["bytes"] is not None:
+                audio_data = np.frombuffer(msg["bytes"], dtype=np.float32)
+                # pass per‑client params into process_audio
+                segments = transcription_server.process_audio(
+                    audio_data,
+                    language=language,
+                    task=task,
+                    model=model,
+                    use_vad=use_vad
+                )
                 if segments:
                     await websocket.send_json({
                         "segments": segments
                     })
+            # text control messages
+            elif "text" in msg and msg["text"] == "END_OF_AUDIO":
+                logger.info(f"Client {client_uid} ended stream.")
+                break
     except WebSocketDisconnect:
         logger.warning(f"WebSocket disconnected: {client_uid}")
     finally:
         await websocket.close()
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)