1MR commited on
Commit
0b2ebe2
Β·
verified Β·
1 Parent(s): 5984019

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from typing import Generator, Tuple
3
+
4
+ import numpy as np
5
+ from fastrtc import (
6
+ AlgoOptions,
7
+ ReplyOnPause,
8
+ Stream,
9
+ audio_to_bytes,
10
+ )
11
+ from groq import Groq
12
+ from loguru import logger
13
+ from process_groq_tts import process_groq_tts
14
+ from simple_math_agent import agent, agent_config
15
+ import os
16
+
17
+ os.environ["GROQ_API_KEY"] = "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"
18
+ logger.remove()
19
+ logger.add(
20
+ lambda msg: print(msg),
21
+ colorize=True,
22
+ format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>",
23
+ )
24
+
25
+ groq_client = Groq(api_key="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")
26
+
27
+
28
+ def response(
29
+ audio: tuple[int, np.ndarray],
30
+ ) -> Generator[Tuple[int, np.ndarray], None, None]:
31
+ """
32
+ Process audio input, transcribe it, generate a response using LangGraph, and deliver TTS audio.
33
+
34
+ Args:
35
+ audio: Tuple containing sample rate and audio data
36
+
37
+ Yields:
38
+ Tuples of (sample_rate, audio_array) for audio playback
39
+ """
40
+ logger.info("πŸŽ™οΈ Received audio input")
41
+
42
+ logger.debug("πŸ”„ Transcribing audio...")
43
+ import whisper
44
+ import wave
45
+ import tempfile
46
+ import os
47
+
48
+ model = whisper.load_model("base")
49
+
50
+ # Create a temporary WAV file
51
+ temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
52
+ temp_file.close()
53
+
54
+ try:
55
+ # Convert audio data to bytes and save as WAV
56
+ audio_bytes = audio_to_bytes(audio)
57
+
58
+ # Save as WAV file using wave module
59
+ with wave.open(temp_file.name, 'wb') as wav_file:
60
+ wav_file.setnchannels(1) # mono audio
61
+ wav_file.setsampwidth(2) # 16-bit audio
62
+ wav_file.setframerate(audio[0]) # sample rate
63
+ wav_file.writeframes(audio_bytes)
64
+
65
+ # Transcribe the audio
66
+ result = model.transcribe(temp_file.name, language="ar")
67
+ transcript = result["text"]
68
+
69
+ finally:
70
+ # Clean up the temporary file
71
+ if os.path.exists(temp_file.name):
72
+ os.remove(temp_file.name)
73
+
74
+ logger.info(f'πŸ‘‚ Transcribed: "{transcript}"')
75
+
76
+ logger.debug("🧠 Running agent...")
77
+ agent_response = agent.invoke(
78
+ {"messages": [{"role": "user", "content": transcript}]}, config=agent_config
79
+ )
80
+ response_text = agent_response["messages"][-1].content
81
+ logger.info(f'πŸ’¬ Response: "{response_text}"')
82
+
83
+ logger.debug("πŸ”Š Generating speech...")
84
+ tts_response = groq_client.audio.speech.create(
85
+ model="playai-tts-arabic",
86
+ voice="Ahmad-PlayAI",
87
+ response_format="wav",
88
+ input=response_text,
89
+ )
90
+ yield from process_groq_tts(tts_response)
91
+
92
+
93
+ def create_stream() -> Stream:
94
+ """
95
+ Create and configure a Stream instance with audio capabilities.
96
+
97
+ Returns:
98
+ Stream: Configured FastRTC Stream instance
99
+ """
100
+ return Stream(
101
+ modality="audio",
102
+ mode="send-receive",
103
+ handler=ReplyOnPause(
104
+ response,
105
+ algo_options=AlgoOptions(
106
+ speech_threshold=0.5,
107
+ ),
108
+ ),
109
+ )
110
+
111
+
112
+ if __name__ == "__main__":
113
+ parser = argparse.ArgumentParser(description="FastRTC Groq Voice Agent")
114
+ parser.add_argument(
115
+ "--phone",
116
+ action="store_true",
117
+ help="Launch with FastRTC phone interface (get a temp phone number)",
118
+ )
119
+ args = parser.parse_args()
120
+
121
+ stream = create_stream()
122
+ logger.info("🎧 Stream handler configured")
123
+
124
+ if args.phone:
125
+ logger.info("Launching with FastRTC phone interface...")
126
+ stream.fastphone(share=True)
127
+ else:
128
+ logger.info("Launching with Gradio UI...")
129
+ stream.ui.launch(share=True)