STARBORN commited on
Commit
7cf18aa
·
verified ·
1 Parent(s): 9719795

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import numpy as np
4
+ import librosa
5
+ import asyncio
6
+ import edge_tts
7
+ import soundfile as sf
8
+ from groq import Groq
9
+ from fastrtc import WebRTC, ReplyOnPause, get_hf_turn_credentials
10
+
11
+ # Initialize Groq
12
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
13
+
14
+ async def text_to_speech_logic(text):
15
+ communicate = edge_tts.Communicate(text, "en-US-AndrewNeural")
16
+ await communicate.save("temp_op.mp3")
17
+ audio, sr = librosa.load("temp_op.mp3", sr=16000)
18
+ # Ensure audio is in the correct shape (1, samples) for FastRTC
19
+ if len(audio.shape) == 1:
20
+ audio = audio.reshape(1, -1)
21
+ return sr, audio
22
+
23
+ def process_audio(audio: tuple[int, np.ndarray]):
24
+ sr, y = audio
25
+ # FastRTC audio can be (samples, channels), we need (samples,)
26
+ if len(y.shape) > 1:
27
+ y = y.mean(axis=1)
28
+
29
+ sf.write("input.wav", y, sr)
30
+
31
+ with open("input.wav", "rb") as file:
32
+ transcription = client.audio.transcriptions.create(
33
+ file=("input.wav", file.read()),
34
+ model="whisper-large-v3-turbo",
35
+ )
36
+
37
+ response = client.chat.completions.create(
38
+ model="llama-3.3-70b-versatile",
39
+ messages=[
40
+ {"role": "system", "content": "You are a concise voice assistant. Give 1-sentence answers."},
41
+ {"role": "user", "content": transcription.text}
42
+ ]
43
+ )
44
+
45
+ reply_text = response.choices[0].message.content
46
+ return asyncio.run(text_to_speech_logic(reply_text))
47
+
48
+ with gr.Blocks() as demo:
49
+ gr.Markdown("# 🎙️ Voice Agent Live (CPU)")
50
+ webrtc_comp = WebRTC(
51
+ label="Voice Chat",
52
+ mode="send-receive",
53
+ modality="audio",
54
+ rtc_configuration=get_hf_turn_credentials()
55
+ )
56
+ webrtc_comp.stream(
57
+ fn=ReplyOnPause(process_audio),
58
+ inputs=[webrtc_comp],
59
+ outputs=[webrtc_comp]
60
+ )
61
+
62
+ if __name__ == "__main__":
63
+ demo.launch()