from fastrtc import WebRTC, ReplyOnPause, AlgoOptions, audio_to_bytes from groq import Groq from loguru import logger from process_groq_tts import process_groq_tts from simple_math_agent import agent, agent_config import whisper, tempfile, wave, os, numpy as np os.environ["GROQ_API_KEY"] = "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" groq_client = Groq(api_key=os.environ["GROQ_API_KEY"]) logger.remove() logger.add(lambda msg: print(msg), colorize=True, format="{time:HH:mm:ss} | {level} | {message}") def response(audio: tuple[int, np.ndarray]): logger.info("🎙️ Received audio input") sr, data = audio audio_bytes = audio_to_bytes((sr, data)) tmp = tempfile.mktemp(suffix=".wav") with wave.open(tmp, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(sr) wf.writeframes(audio_bytes) model = whisper.load_model("base") result = model.transcribe(tmp, language="ar") transcript = result["text"] logger.info(f'👂 Transcribed: "{transcript}"') agent_response = agent.invoke( {"messages": [{"role": "user", "content": transcript}]}, config=agent_config, ) response_text = agent_response["messages"][-1].content logger.info(f'💬 Response: "{response_text}"') tts_response = groq_client.audio.speech.create( model="playai-tts-arabic", voice="Ahmad-PlayAI", response_format="wav", input=response_text, ) yield from process_groq_tts(tts_response) def create_stream(): return WebRTC( modality="audio", mode="send-receive", rtc_configuration={ # ✅ add ICE servers "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}] }, handler=ReplyOnPause( response, algo_options=AlgoOptions(speech_threshold=0.5), ), ) if __name__ == "__main__": import os stream = create_stream() logger.info("🎧 Stream handler configured") os.environ["GRADIO_SERVER_PORT"] = "7861" # avoid port conflict stream.ui.launch(server_port=None, share=True)