File size: 2,208 Bytes
1095d80
52296e0
1095d80
52296e0
1095d80
 
0b2ebe2
 
52296e0
0b2ebe2
52296e0
 
 
 
0b2ebe2
1095d80
0b2ebe2
 
1095d80
 
 
 
 
 
 
 
 
0b2ebe2
 
1095d80
52296e0
0b2ebe2
 
 
52296e0
1095d80
0b2ebe2
 
 
 
 
 
 
 
 
 
1095d80
0b2ebe2
 
1095d80
 
 
 
 
 
 
 
 
 
 
 
0b2ebe2
 
 
1095d80
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from fastrtc import WebRTC, ReplyOnPause, AlgoOptions, audio_to_bytes
from groq import Groq
from loguru import logger
from process_groq_tts import process_groq_tts
from simple_math_agent import agent, agent_config
import whisper, tempfile, wave, os, numpy as np

os.environ["GROQ_API_KEY"] = "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"
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])

logger.remove()
logger.add(lambda msg: print(msg),
           colorize=True,
           format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>")

def response(audio: tuple[int, np.ndarray]):
    logger.info("πŸŽ™οΈ Received audio input")

    sr, data = audio
    audio_bytes = audio_to_bytes((sr, data))
    tmp = tempfile.mktemp(suffix=".wav")

    with wave.open(tmp, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sr)
        wf.writeframes(audio_bytes)

    model = whisper.load_model("base")
    result = model.transcribe(tmp, language="ar")
    transcript = result["text"]
    logger.info(f'πŸ‘‚ Transcribed: "{transcript}"')

    agent_response = agent.invoke(
        {"messages": [{"role": "user", "content": transcript}]},
        config=agent_config,
    )
    response_text = agent_response["messages"][-1].content
    logger.info(f'πŸ’¬ Response: "{response_text}"')

    tts_response = groq_client.audio.speech.create(
        model="playai-tts-arabic",
        voice="Ahmad-PlayAI",
        response_format="wav",
        input=response_text,
    )
    yield from process_groq_tts(tts_response)


def create_stream():
    return WebRTC(
        modality="audio",
        mode="send-receive",
        rtc_configuration={        # βœ… add ICE servers
            "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
        },
        handler=ReplyOnPause(
            response,
            algo_options=AlgoOptions(speech_threshold=0.5),
        ),
    )


if __name__ == "__main__":
    import os
    stream = create_stream()
    logger.info("🎧 Stream handler configured")

    os.environ["GRADIO_SERVER_PORT"] = "7861"   # avoid port conflict
    stream.ui.launch(server_port=None, share=True)