import gradio as gr from faster_whisper import WhisperModel from groq import Groq from gtts import gTTS import os print("Loading Whisper...") whisper_model = WhisperModel("base", compute_type="int8") # Use HF secret instead of Colab cell client = Groq(api_key=os.environ["GROQ_API_KEY"]) print("Ready") def voice_ai(audio_path): try: if audio_path is None: return None, "", "" # ---------------- STT segments, _ = whisper_model.transcribe(audio_path) user_text = "".join([seg.text for seg in segments]) print("User:", user_text) # ---------------- LLM completion = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{"role": "user", "content": user_text}] ) reply = completion.choices[0].message.content print("AI:", reply) # ---------------- TTS output_file = "response.mp3" tts = gTTS(reply) tts.save(output_file) return output_file, user_text, reply except Exception as e: print("ERROR:", e) return None, "error", str(e) with gr.Blocks() as demo: gr.Markdown("# 🎙 Voice AI") mic = gr.Audio(type="filepath") btn = gr.Button("Generate") out_audio = gr.Audio() user_box = gr.Textbox(label="You said") ai_box = gr.Textbox(label="AI reply") btn.click( voice_ai, inputs=mic, outputs=[out_audio, user_box, ai_box] ) demo.launch()