Spaces:

saadpie
/

ASH-BAND

Sleeping

App Files Files Community

saadpie commited on Apr 18

Commit

06aae43

verified ·

1 Parent(s): 07104ff

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -41

app.py CHANGED Viewed

@@ -2,15 +2,13 @@ import os
 import asyncio
 from quart import Quart, websocket
 from google import genai
-from google.genai import types
 app = Quart(__name__)
 # Ensure your HF Space has GEMINI_API_KEY set in its secrets/environment variables
 client = genai.Client()
-# Note: The official live model name is currently gemini-2.0-flash-exp.
-# Update this if you have specific access to a 3.1 live preview endpoint.
 MODEL = "gemini-2.0-flash-exp"
 VOICE_MODES = {
@@ -34,36 +32,31 @@ async def index():
 async def ws_stream():
     """
     WebSocket endpoint for the Termux client.
-    Connect via: ws://<hf-space-url>/stream?voice=Zephyr
     """
-    # Grab the requested voice from the URL parameter, default to Zephyr
     requested_voice = websocket.args.get("voice", "Zephyr")
     voice_name = VOICE_MODES.get(requested_voice, "Zephyr")
-    # Mirroring your TS configuration
-    config = types.LiveConnectConfig(
-        response_modalities=[types.LiveModality.AUDIO],
-        speech_config=types.SpeechConfig(
-            voice_config=types.VoiceConfig(
-                prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                    voice_name=voice_name
-                )
-            )
-        ),
-        tools=[{"google_search": {}}],
-        system_instruction=types.Content(
-            parts=[types.Part.from_text(
-                "You are ASH-BAND, a high-fidelity AI wearable companion. "
-                "Speak in a professional, concise, and helpful tone. "
-                "You have access to Google Search. Keep responses brief to minimize latency. "
-                "Your responses are spoken aloud."
-            )]
         )
-    )
     print(f"Connecting to Gemini Live API with voice: {voice_name}...")
     try:
         async with client.aio.live.connect(model=MODEL, config=config) as session:
             print("Live session established.")
@@ -71,10 +64,9 @@ async def ws_stream():
             async def client_to_gemini():
                 try:
                     while True:
-                        # Receive audio chunks from the client
                         data = await websocket.receive()
                         if isinstance(data, bytes):
-                            # The TS file was downsampling to 16000Hz PCM
                             await session.send(
                                 input={"data": data, "mime_type": "audio/pcm;rate=16000"}
                             )
@@ -88,18 +80,15 @@ async def ws_stream():
                 try:
                     async for message in session.receive():
                         server_content = message.server_content
-                        if server_content is not None:
-                            # Handle Interruption
                             if server_content.interrupted:
-                                print("AI Interrupted by user.")
-                                # In a more complex setup, send a control message to client to clear audio queue
                             model_turn = server_content.model_turn
-                            if model_turn is not None:
                                 for part in model_turn.parts:
-                                    # Output raw audio back to the client
                                     if part.inline_data and part.inline_data.data:
-                                        # Gemini returns 24kHz PCM audio
                                         await websocket.send(part.inline_data.data)
                 except asyncio.CancelledError:
                     pass
@@ -110,20 +99,17 @@ async def ws_stream():
             task1 = asyncio.create_task(client_to_gemini())
             task2 = asyncio.create_task(gemini_to_client())
-            # Wait until one of the connections drops
-            done, pending = await asyncio.wait(
                 [task1, task2],
                 return_when=asyncio.FIRST_COMPLETED,
             )
-            # Clean up the remaining task
-            for p in pending:
-                p.cancel()
     except Exception as e:
         print(f"Connection failed: {e}")
-# Hugging Face Spaces standard port is 7860
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 import asyncio
 from quart import Quart, websocket
 from google import genai
 app = Quart(__name__)
 # Ensure your HF Space has GEMINI_API_KEY set in its secrets/environment variables
 client = genai.Client()
+# Note: Using gemini-2.0-flash-exp as it is the most stable for the Live SDK currently
 MODEL = "gemini-2.0-flash-exp"
 VOICE_MODES = {
 async def ws_stream():
     """
     WebSocket endpoint for the Termux client.
+    Connect via: wss://<hf-space-url>/stream?voice=Zephyr
     """
     requested_voice = websocket.args.get("voice", "Zephyr")
     voice_name = VOICE_MODES.get(requested_voice, "Zephyr")
+    # Using a dictionary for config prevents AttributeError on specific SDK versions
+    config = {
+        "response_modalities": ["AUDIO"],
+        "speech_config": {
+            "voice_config": {
+                "prebuilt_voice_config": {"voice_name": voice_name}
+            }
+        },
+        "tools": [{"google_search": {}}],
+        "system_instruction": (
+            "You are ASH-BAND, a high-fidelity AI wearable companion. "
+            "Speak in a professional, concise, and helpful tone. "
+            "Keep responses brief to minimize latency. Your responses are spoken aloud."
         )
+    }
     print(f"Connecting to Gemini Live API with voice: {voice_name}...")
     try:
+        # Pass the dictionary directly to the config parameter
         async with client.aio.live.connect(model=MODEL, config=config) as session:
             print("Live session established.")
             async def client_to_gemini():
                 try:
                     while True:
                         data = await websocket.receive()
                         if isinstance(data, bytes):
+                            # Sending 16kHz PCM data from client to Gemini
                             await session.send(
                                 input={"data": data, "mime_type": "audio/pcm;rate=16000"}
                             )
                 try:
                     async for message in session.receive():
                         server_content = message.server_content
+                        if server_content:
                             if server_content.interrupted:
+                                print("AI Interrupted.")
                             model_turn = server_content.model_turn
+                            if model_turn:
                                 for part in model_turn.parts:
                                     if part.inline_data and part.inline_data.data:
+                                        # Sending 24kHz PCM data back to client
                                         await websocket.send(part.inline_data.data)
                 except asyncio.CancelledError:
                     pass
             task1 = asyncio.create_task(client_to_gemini())
             task2 = asyncio.create_task(gemini_to_client())
+            await asyncio.wait(
                 [task1, task2],
                 return_when=asyncio.FIRST_COMPLETED,
             )
+            task1.cancel()
+            task2.cancel()
     except Exception as e:
         print(f"Connection failed: {e}")
 if __name__ == "__main__":
+    # HF Spaces standard port is 7860
+    app.run(host="0.0.0.0", port=7860)