import os import gradio as gr import whisper from groq import Groq from gtts import gTTS import tempfile # Load Groq API key from Hugging Face secrets GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) # Load Whisper model whisper_model = whisper.load_model("base") def transcribe_and_chat(audio_file): if audio_file is None: return "No audio received", "Error: Please record again.", None try: # 1. Transcribe result = whisper_model.transcribe(audio_file) user_text = result.get("text", "").strip() if not user_text: return "Could not transcribe", "Error: Empty transcription", None # 2. Query Groq chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_text}], model="llama-3.1-8b-instant" # ✅ stable model ) bot_text = chat_completion.choices[0].message.content # 3. TTS tts = gTTS(bot_text) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: tts.save(tmp.name) reply_audio = tmp.name return user_text, bot_text, reply_audio except Exception as e: return "Transcription error", str(e), None # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🎙️ Real-time Voice-to-Voice Chatbot (Whisper + Groq + gTTS)") audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak here") user_text_out = gr.Textbox(label="📝 Transcription (You said)") bot_text_out = gr.Textbox(label="🤖 LLM Reply") audio_out = gr.Audio(label="🔊 Bot's Voice", type="filepath") btn = gr.Button("▶️ Process") btn.click( fn=transcribe_and_chat, inputs=[audio_in], outputs=[user_text_out, bot_text_out, audio_out] ) demo.launch()