File size: 2,810 Bytes
d658050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0296e2a
d658050
 
 
 
 
 
 
 
0296e2a
d658050
 
0296e2a
 
 
 
 
 
 
 
 
 
 
 
 
d658050
 
 
 
 
 
 
 
 
 
 
090a598
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import asyncio
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from google import genai
from google.genai import types

app = FastAPI()

# --- CONFIGURATION ---
MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025"
client = genai.Client(
    http_options={"api_version": "v1beta"},
    api_key=os.environ.get("GEMINI_API_KEY"),
)

CONFIG = types.LiveConnectConfig(
    response_modalities=["AUDIO"],
    system_instruction="You are SteveAI by Saadpie. Be a helpful, concise AI assistant. Use natural phrases like 'Based on my search' when applicable. Do not use markdown.",
    speech_config=types.SpeechConfig(
        voice_config=types.VoiceConfig(
            prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Orus")
        )
    )
)

@app.websocket("/audio")
async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    print("Band Connected to SteveAI Cloud")
    
    try:
        async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
            
            # 1. Gemini -> Band (Binary Audio)
            async def receive_from_gemini():
                async for response in session.receive():
                    if response.server_content and response.server_content.model_turn:
                        parts = response.server_content.model_turn.parts
                        for part in parts:
                            if hasattr(part, 'inline_data') and part.inline_data:
                                await websocket.send_bytes(part.inline_data.data)
            
            # 2. Band/Termux -> Gemini (Hybrid: Handles Text & Audio)
            async def receive_from_band():
                while True:
                    # Generic receive() avoids the 'KeyError' crash
                    message = await websocket.receive()
                    
                    if "bytes" in message:
                        # Audio from ESP32 or Termux Mic
                        await session.send_realtime_input(
                            audio=types.Blob(data=message["bytes"], mime_type="audio/pcm;rate=16000")
                        )
                    elif "text" in message:
                        # Typing from Termux
                        await session.send_client_content(
                            turns=[types.Content(parts=[types.Part(text=message["text"])])]
                        )

            await asyncio.gather(receive_from_gemini(), receive_from_band())

    except WebSocketDisconnect:
        print("Band Disconnected")
    except Exception as e:
        print(f"Bridge Error: {e}")

@app.get("/")
def read_root():
    return {"status": "SteveAI Bridge Active", "target": MODEL}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)