Spaces:

saadpie
/

ASH-BAND

Sleeping

App Files Files Community

ASH-BAND / app.py

saadpie

Create app.py

87461f5 verified about 1 month ago

raw

history blame

4.8 kB

	import os
	import asyncio
	from quart import Quart, websocket
	from google import genai
	from google.genai import types

	app = Quart(__name__)

	# Ensure your HF Space has GEMINI_API_KEY set in its secrets/environment variables
	client = genai.Client()

	# Note: The official live model name is currently gemini-2.0-flash-exp.
	# Update this if you have specific access to a 3.1 live preview endpoint.
	MODEL = "gemini-2.0-flash-exp"

	VOICE_MODES = {
	'Zephyr': 'Zephyr', # Default / Balanced
	'Puck': 'Puck', # Energetic / Bright
	'Charon': 'Charon', # Deep / Calm
	'Kore': 'Kore', # Soft / Warm
	'Fenrir': 'Fenrir' # Formal / Sharp
	}

	@app.websocket('/stream')
	async def ws_stream():
	"""
	WebSocket endpoint for the Termux client.
	Connect via: ws://<hf-space-url>/stream?voice=Zephyr
	"""
	# Grab the requested voice from the URL parameter, default to Zephyr
	requested_voice = websocket.args.get("voice", "Zephyr")
	voice_name = VOICE_MODES.get(requested_voice, "Zephyr")

	# Mirroring your TS configuration
	config = types.LiveConnectConfig(
	response_modalities=[types.LiveModality.AUDIO],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name=voice_name
	)
	)
	),
	tools=[{"google_search": {}}],
	system_instruction=types.Content(
	parts=[types.Part.from_text(
	"You are ASH-BAND, a high-fidelity AI wearable companion. "
	"Speak in a professional, concise, and helpful tone. "
	"You have access to Google Search. Keep responses brief to minimize latency. "
	"Your responses are spoken aloud."
	)]
	)
	)

	print(f"Connecting to Gemini Live API with voice: {voice_name}...")

	try:
	async with client.aio.live.connect(model=MODEL, config=config) as session:
	print("Live session established.")

	# Task 1: Stream audio from Client (Termux) -> Gemini
	async def client_to_gemini():
	try:
	while True:
	# Receive audio chunks from the client
	data = await websocket.receive()
	if isinstance(data, bytes):
	# The TS file was downsampling to 16000Hz PCM
	await session.send(
	input={"data": data, "mime_type": "audio/pcm;rate=16000"}
	)
	except asyncio.CancelledError:
	pass
	except Exception as e:
	print(f"Error reading from client: {e}")

	# Task 2: Stream audio from Gemini -> Client (Termux)
	async def gemini_to_client():
	try:
	async for message in session.receive():
	server_content = message.server_content
	if server_content is not None:
	# Handle Interruption
	if server_content.interrupted:
	print("AI Interrupted by user.")
	# In a more complex setup, send a control message to client to clear audio queue

	model_turn = server_content.model_turn
	if model_turn is not None:
	for part in model_turn.parts:
	# Output raw audio back to the client
	if part.inline_data and part.inline_data.data:
	# Gemini returns 24kHz PCM audio
	await websocket.send(part.inline_data.data)
	except asyncio.CancelledError:
	pass
	except Exception as e:
	print(f"Error receiving from Gemini: {e}")

	# Run both streaming directions concurrently
	task1 = asyncio.create_task(client_to_gemini())
	task2 = asyncio.create_task(gemini_to_client())

	# Wait until one of the connections drops
	done, pending = await asyncio.wait(
	[task1, task2],
	return_when=asyncio.FIRST_COMPLETED,
	)

	# Clean up the remaining task
	for p in pending:
	p.cancel()

	except Exception as e:
	print(f"Connection failed: {e}")

	# Hugging Face Spaces standard port is 7860
	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)