shanzaejaz's picture
Update app.py
c1acd01 verified
import gradio as gr
from faster_whisper import WhisperModel
from groq import Groq
from gtts import gTTS
import os
print("Loading Whisper...")
whisper_model = WhisperModel("base", compute_type="int8")
# Use HF secret instead of Colab cell
client = Groq(api_key=os.environ["GROQ_API_KEY"])
print("Ready")
def voice_ai(audio_path):
try:
if audio_path is None:
return None, "", ""
# ---------------- STT
segments, _ = whisper_model.transcribe(audio_path)
user_text = "".join([seg.text for seg in segments])
print("User:", user_text)
# ---------------- LLM
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": user_text}]
)
reply = completion.choices[0].message.content
print("AI:", reply)
# ---------------- TTS
output_file = "response.mp3"
tts = gTTS(reply)
tts.save(output_file)
return output_file, user_text, reply
except Exception as e:
print("ERROR:", e)
return None, "error", str(e)
with gr.Blocks() as demo:
gr.Markdown("# 🎙 Voice AI")
mic = gr.Audio(type="filepath")
btn = gr.Button("Generate")
out_audio = gr.Audio()
user_box = gr.Textbox(label="You said")
ai_box = gr.Textbox(label="AI reply")
btn.click(
voice_ai,
inputs=mic,
outputs=[out_audio, user_box, ai_box]
)
demo.launch()