AmirAziz1221 commited on
Commit
9f451f4
Β·
verified Β·
1 Parent(s): 8966a72

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import soundfile as sf
4
+ from transformers import pipeline
5
+ from groq import Groq
6
+ from TTS.api import TTS
7
+ import os
8
+
9
+ # ----------------------------
10
+ # Load models
11
+ # ----------------------------
12
+
13
+ # Whisper (Speech β†’ Text)
14
+ stt = pipeline(
15
+ "automatic-speech-recognition",
16
+ model="openai/whisper-small"
17
+ )
18
+
19
+ # Groq Client
20
+ client = Groq(api_key=os.environ["GROQ_API_KEY"])
21
+
22
+ # Text β†’ Speech
23
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
24
+
25
+ # ----------------------------
26
+ # Core function
27
+ # ----------------------------
28
+
29
+ def voice_to_voice(audio):
30
+ # Speech β†’ Text
31
+ text = stt(audio)["text"]
32
+
33
+ # LLM Response
34
+ completion = client.chat.completions.create(
35
+ model="llama3-8b-8192",
36
+ messages=[{"role": "user", "content": text}]
37
+ )
38
+ reply = completion.choices[0].message.content
39
+
40
+ # Text β†’ Speech
41
+ output_path = "response.wav"
42
+ tts.tts_to_file(text=reply, file_path=output_path)
43
+
44
+ return reply, output_path
45
+
46
+ # ----------------------------
47
+ # UI
48
+ # ----------------------------
49
+
50
+ ui = gr.Interface(
51
+ fn=voice_to_voice,
52
+ inputs=gr.Audio(type="filepath", label="🎀 Speak"),
53
+ outputs=[
54
+ gr.Textbox(label="🧠 AI Response"),
55
+ gr.Audio(label="πŸ”Š Voice Reply")
56
+ ],
57
+ title="Voice to Voice AI (Groq + Hugging Face)",
58
+ description="Speak β†’ AI thinks β†’ AI speaks back"
59
+ )
60
+
61
+ ui.launch()