Saadpie1 commited on
Commit
0296e2a
·
verified ·
1 Parent(s): be72adb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
  import asyncio
3
- import json
4
- import base64
5
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
6
  from google import genai
7
  from google.genai import types
@@ -33,24 +31,31 @@ async def websocket_endpoint(websocket: WebSocket):
33
  try:
34
  async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
35
 
36
- # 1. Receiver from Gemini -> Send to Band
37
  async def receive_from_gemini():
38
  async for response in session.receive():
39
  if response.server_content and response.server_content.model_turn:
40
  parts = response.server_content.model_turn.parts
41
  for part in parts:
42
  if hasattr(part, 'inline_data') and part.inline_data:
43
- # Send raw audio bytes to ESP32
44
  await websocket.send_bytes(part.inline_data.data)
45
 
46
- # 2. Receiver from Band -> Send to Gemini
47
  async def receive_from_band():
48
  while True:
49
- # ESP32 sends raw 16kHz PCM chunks
50
- data = await websocket.receive_bytes()
51
- await session.send_realtime_input(
52
- audio=types.Blob(data=data, mime_type="audio/pcm;rate=16000")
53
- )
 
 
 
 
 
 
 
 
54
 
55
  await asyncio.gather(receive_from_gemini(), receive_from_band())
56
 
@@ -65,7 +70,4 @@ def read_root():
65
 
66
  if __name__ == "__main__":
67
  import uvicorn
68
- # Port 7860 is mandatory for Hugging Face Spaces
69
  uvicorn.run(app, host="0.0.0.0", port=7860)
70
-
71
-
 
1
  import os
2
  import asyncio
 
 
3
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
4
  from google import genai
5
  from google.genai import types
 
31
  try:
32
  async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
33
 
34
+ # 1. Gemini -> Band (Binary Audio)
35
  async def receive_from_gemini():
36
  async for response in session.receive():
37
  if response.server_content and response.server_content.model_turn:
38
  parts = response.server_content.model_turn.parts
39
  for part in parts:
40
  if hasattr(part, 'inline_data') and part.inline_data:
 
41
  await websocket.send_bytes(part.inline_data.data)
42
 
43
+ # 2. Band/Termux -> Gemini (Hybrid: Handles Text & Audio)
44
  async def receive_from_band():
45
  while True:
46
+ # Generic receive() avoids the 'KeyError' crash
47
+ message = await websocket.receive()
48
+
49
+ if "bytes" in message:
50
+ # Audio from ESP32 or Termux Mic
51
+ await session.send_realtime_input(
52
+ audio=types.Blob(data=message["bytes"], mime_type="audio/pcm;rate=16000")
53
+ )
54
+ elif "text" in message:
55
+ # Typing from Termux
56
+ await session.send_client_content(
57
+ turns=[types.Content(parts=[types.Part(text=message["text"])])]
58
+ )
59
 
60
  await asyncio.gather(receive_from_gemini(), receive_from_band())
61
 
 
70
 
71
  if __name__ == "__main__":
72
  import uvicorn
 
73
  uvicorn.run(app, host="0.0.0.0", port=7860)