STARBORN commited on
Commit
2087bf7
·
verified ·
1 Parent(s): 7cf18aa

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -14
app.py CHANGED
@@ -6,7 +6,13 @@ import asyncio
6
  import edge_tts
7
  import soundfile as sf
8
  from groq import Groq
9
- from fastrtc import WebRTC, ReplyOnPause, get_hf_turn_credentials
 
 
 
 
 
 
10
 
11
  # Initialize Groq
12
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -45,19 +51,88 @@ def process_audio(audio: tuple[int, np.ndarray]):
45
  reply_text = response.choices[0].message.content
46
  return asyncio.run(text_to_speech_logic(reply_text))
47
 
48
- with gr.Blocks() as demo:
49
- gr.Markdown("# 🎙️ Voice Agent Live (CPU)")
50
- webrtc_comp = WebRTC(
51
- label="Voice Chat",
52
- mode="send-receive",
53
- modality="audio",
54
- rtc_configuration=get_hf_turn_credentials()
55
- )
56
- webrtc_comp.stream(
57
- fn=ReplyOnPause(process_audio),
58
- inputs=[webrtc_comp],
59
- outputs=[webrtc_comp]
 
 
 
 
 
 
 
 
 
 
 
60
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
6
  import edge_tts
7
  import soundfile as sf
8
  from groq import Groq
9
+
10
+ try:
11
+ from fastrtc import WebRTC, ReplyOnPause, get_hf_turn_credentials
12
+ FASTRTC_AVAILABLE = True
13
+ except ImportError:
14
+ FASTRTC_AVAILABLE = False
15
+ print("FastRTC not available, using fallback UI")
16
 
17
  # Initialize Groq
18
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
51
  reply_text = response.choices[0].message.content
52
  return asyncio.run(text_to_speech_logic(reply_text))
53
 
54
+ # Fallback function for regular audio interface
55
+ def process_audio_file(audio_file):
56
+ if audio_file is None:
57
+ return None, "Please record or upload audio"
58
+
59
+ # Load audio
60
+ y, sr = librosa.load(audio_file, sr=16000)
61
+ sf.write("input.wav", y, sr)
62
+
63
+ # Transcribe
64
+ with open("input.wav", "rb") as file:
65
+ transcription = client.audio.transcriptions.create(
66
+ file=("input.wav", file.read()),
67
+ model="whisper-large-v3-turbo",
68
+ )
69
+
70
+ # Get response
71
+ response = client.chat.completions.create(
72
+ model="llama-3.3-70b-versatile",
73
+ messages=[
74
+ {"role": "system", "content": "You are a concise voice assistant. Give 1-sentence answers."},
75
+ {"role": "user", "content": transcription.text}
76
+ ]
77
  )
78
+
79
+ reply_text = response.choices[0].message.content
80
+
81
+ # Generate speech
82
+ sr_out, audio_out = asyncio.run(text_to_speech_logic(reply_text))
83
+
84
+ return "temp_op.mp3", f"**You said:** {transcription.text}\n\n**Assistant:** {reply_text}"
85
+
86
+ # Create the interface
87
+ with gr.Blocks(title="Voice Agent Live") as demo:
88
+ gr.Markdown("# 🎙️ Voice Agent Live")
89
+ gr.Markdown("Speak to the AI assistant and get voice responses!")
90
+
91
+ if FASTRTC_AVAILABLE:
92
+ gr.Markdown("### Real-time Voice Chat (WebRTC)")
93
+ try:
94
+ webrtc_comp = WebRTC(
95
+ label="Voice Chat",
96
+ mode="send-receive",
97
+ modality="audio",
98
+ rtc_configuration=get_hf_turn_credentials()
99
+ )
100
+ webrtc_comp.stream(
101
+ fn=ReplyOnPause(process_audio),
102
+ inputs=[webrtc_comp],
103
+ outputs=[webrtc_comp]
104
+ )
105
+ except Exception as e:
106
+ gr.Markdown(f"⚠️ WebRTC Error: {str(e)}")
107
+ gr.Markdown("### Using fallback mode below")
108
+ FASTRTC_AVAILABLE = False
109
+
110
+ if not FASTRTC_AVAILABLE:
111
+ gr.Markdown("### Voice Chat (Record/Upload)")
112
+ with gr.Row():
113
+ with gr.Column():
114
+ audio_input = gr.Audio(
115
+ sources=["microphone", "upload"],
116
+ type="filepath",
117
+ label="Record or Upload Audio"
118
+ )
119
+ submit_btn = gr.Button("🎤 Process Audio", variant="primary", size="lg")
120
+
121
+ with gr.Column():
122
+ audio_output = gr.Audio(label="Assistant Response", type="filepath")
123
+ text_output = gr.Markdown()
124
+
125
+ submit_btn.click(
126
+ fn=process_audio_file,
127
+ inputs=[audio_input],
128
+ outputs=[audio_output, text_output]
129
+ )
130
+
131
+ gr.Examples(
132
+ examples=[],
133
+ inputs=audio_input,
134
+ label="Try recording your voice!"
135
+ )
136
 
137
  if __name__ == "__main__":
138
+ demo.launch(share=False)