nuernie commited on
Commit
bb2aa1c
Β·
1 Parent(s): 2259eec
Files changed (1) hide show
  1. app.py +52 -30
app.py CHANGED
@@ -3,25 +3,28 @@ from contextlib import asynccontextmanager
3
  import uvicorn
4
  from whisper_live.server import TranscriptionServer
5
  import logging
6
- import json
7
  import numpy as np
8
 
9
- # Configure logging
 
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # Initialize the transcription server once
 
 
 
14
  transcription_server = TranscriptionServer()
15
 
16
  @asynccontextmanager
17
  async def lifespan(app: FastAPI):
18
- # Setup if needed
19
  yield
20
- # Cleanup if needed
21
 
22
  app = FastAPI(
23
  title="Whisper Live Server",
24
- description="A real-time speech-to-text server using OpenAI's Whisper model",
25
  version="1.0.0",
26
  lifespan=lifespan
27
  )
@@ -30,13 +33,12 @@ app = FastAPI(
30
  async def root():
31
  return {
32
  "message": "Welcome to Whisper Live Server",
33
- "status": "running",
34
  "websocket_endpoint": "/ws",
35
  "health_endpoint": "/health"
36
  }
37
 
38
  @app.get("/health")
39
- def health_check():
40
  return {"status": "healthy"}
41
 
42
 
@@ -45,43 +47,62 @@ async def websocket_endpoint(websocket: WebSocket):
45
  await websocket.accept()
46
  client_uid = None
47
  config = {}
48
-
49
  try:
50
- # 1. Receive config from client
 
 
51
  config = await websocket.receive_json()
52
  client_uid = config.get("uid")
53
-
54
  if not client_uid:
55
  await websocket.close(code=4000, reason="No client UID provided")
56
  return
57
 
58
  logger.info(f"Client connected: {client_uid} | Config: {config}")
59
 
60
- # 2. Confirm server readiness
 
 
61
  await websocket.send_json({
62
  "uid": client_uid,
63
  "message": "SERVER_READY",
64
  "backend": "faster_whisper"
65
  })
66
 
67
- # 3. Per-client session configuration
68
- session = transcription_server.create_session(
69
- model=config.get("model", "tiny"),
70
- language=config.get("language", "de"),
71
- task=config.get("task", "transcribe"),
72
- use_vad=config.get("use_vad", True)
73
- )
74
-
75
- # 4. Start processing audio stream
 
 
 
 
 
 
76
  while True:
77
- data = await websocket.receive()
78
 
79
- if data["type"] == "websocket.disconnect":
 
80
  break
81
 
82
- if "bytes" in data:
83
- audio_data = np.frombuffer(data["bytes"], dtype=np.float32)
84
- segments = session.process_audio(audio_data)
 
 
 
 
 
 
 
 
 
85
 
86
  if segments:
87
  await websocket.send_json({
@@ -89,10 +110,10 @@ async def websocket_endpoint(websocket: WebSocket):
89
  "segments": segments
90
  })
91
 
92
- elif "text" in data:
93
- if data["text"] == "END_OF_AUDIO":
94
- logger.info(f"Client {client_uid} ended stream.")
95
- break
96
 
97
  except WebSocketDisconnect:
98
  logger.warning(f"WebSocket disconnected: {client_uid}")
@@ -109,5 +130,6 @@ async def websocket_endpoint(websocket: WebSocket):
109
  finally:
110
  await websocket.close()
111
 
 
112
  if __name__ == "__main__":
113
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
3
  import uvicorn
4
  from whisper_live.server import TranscriptionServer
5
  import logging
 
6
  import numpy as np
7
 
8
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
9
+ # Logging
10
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
15
+ # Instantiate the shared TranscriptionServer
16
+ # (e.g. this loads your model once)
17
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
18
  transcription_server = TranscriptionServer()
19
 
20
  @asynccontextmanager
21
  async def lifespan(app: FastAPI):
22
+ # any startup logic here (e.g. preload models)
23
  yield
24
+ # any cleanup logic here
25
 
26
  app = FastAPI(
27
  title="Whisper Live Server",
 
28
  version="1.0.0",
29
  lifespan=lifespan
30
  )
 
33
  async def root():
34
  return {
35
  "message": "Welcome to Whisper Live Server",
 
36
  "websocket_endpoint": "/ws",
37
  "health_endpoint": "/health"
38
  }
39
 
40
  @app.get("/health")
41
+ async def health_check():
42
  return {"status": "healthy"}
43
 
44
 
 
47
  await websocket.accept()
48
  client_uid = None
49
  config = {}
50
+
51
  try:
52
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
53
+ # 1) Read the per‑client config JSON
54
+ #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
55
  config = await websocket.receive_json()
56
  client_uid = config.get("uid")
 
57
  if not client_uid:
58
  await websocket.close(code=4000, reason="No client UID provided")
59
  return
60
 
61
  logger.info(f"Client connected: {client_uid} | Config: {config}")
62
 
63
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
64
+ # 2) Send back a SERVER_READY message
65
+ #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
66
  await websocket.send_json({
67
  "uid": client_uid,
68
  "message": "SERVER_READY",
69
  "backend": "faster_whisper"
70
  })
71
 
72
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
73
+ # 3) Configure the shared server for this client
74
+ # (you can also patch transcription_server attributes here)
75
+ #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
76
+ language = config.get("language", "de")
77
+ task = config.get("task", "transcribe")
78
+ model = config.get("model", "tiny")
79
+ use_vad = config.get("use_vad", True)
80
+
81
+ # If your TranscriptionServer.process_audio takes kwargs, you can pass them directly.
82
+ # Otherwise, you may need to set transcription_server.language = language, etc.
83
+
84
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
85
+ # 4) Enter the receive‑loop
86
+ #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
87
  while True:
88
+ msg = await websocket.receive()
89
 
90
+ # client closed connection
91
+ if msg["type"] == "websocket.disconnect":
92
  break
93
 
94
+ # binary audio frames
95
+ if "bytes" in msg and msg["bytes"] is not None:
96
+ audio_data = np.frombuffer(msg["bytes"], dtype=np.float32)
97
+
98
+ # pass per‑client params into process_audio
99
+ segments = transcription_server.process_audio(
100
+ audio_data,
101
+ language=language,
102
+ task=task,
103
+ model=model,
104
+ use_vad=use_vad
105
+ )
106
 
107
  if segments:
108
  await websocket.send_json({
 
110
  "segments": segments
111
  })
112
 
113
+ # text control messages
114
+ elif "text" in msg and msg["text"] == "END_OF_AUDIO":
115
+ logger.info(f"Client {client_uid} ended stream.")
116
+ break
117
 
118
  except WebSocketDisconnect:
119
  logger.warning(f"WebSocket disconnected: {client_uid}")
 
130
  finally:
131
  await websocket.close()
132
 
133
+
134
  if __name__ == "__main__":
135
  uvicorn.run(app, host="0.0.0.0", port=7860)