testing2121cxz / app.py
1MR's picture
Update app.py
1095d80 verified
from fastrtc import WebRTC, ReplyOnPause, AlgoOptions, audio_to_bytes
from groq import Groq
from loguru import logger
from process_groq_tts import process_groq_tts
from simple_math_agent import agent, agent_config
import whisper, tempfile, wave, os, numpy as np
os.environ["GROQ_API_KEY"] = "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"
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
logger.remove()
logger.add(lambda msg: print(msg),
colorize=True,
format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>")
def response(audio: tuple[int, np.ndarray]):
logger.info("πŸŽ™οΈ Received audio input")
sr, data = audio
audio_bytes = audio_to_bytes((sr, data))
tmp = tempfile.mktemp(suffix=".wav")
with wave.open(tmp, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sr)
wf.writeframes(audio_bytes)
model = whisper.load_model("base")
result = model.transcribe(tmp, language="ar")
transcript = result["text"]
logger.info(f'πŸ‘‚ Transcribed: "{transcript}"')
agent_response = agent.invoke(
{"messages": [{"role": "user", "content": transcript}]},
config=agent_config,
)
response_text = agent_response["messages"][-1].content
logger.info(f'πŸ’¬ Response: "{response_text}"')
tts_response = groq_client.audio.speech.create(
model="playai-tts-arabic",
voice="Ahmad-PlayAI",
response_format="wav",
input=response_text,
)
yield from process_groq_tts(tts_response)
def create_stream():
return WebRTC(
modality="audio",
mode="send-receive",
rtc_configuration={ # βœ… add ICE servers
"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
},
handler=ReplyOnPause(
response,
algo_options=AlgoOptions(speech_threshold=0.5),
),
)
if __name__ == "__main__":
import os
stream = create_stream()
logger.info("🎧 Stream handler configured")
os.environ["GRADIO_SERVER_PORT"] = "7861" # avoid port conflict
stream.ui.launch(server_port=None, share=True)