import gradio as gr from gtts import gTTS from groq import Groq import whisper import tempfile import os # Initialize Groq client api = os.environ.get('GroqApi') client = Groq(api_key=api) # Load the Whisper model locally whisper_model = whisper.load_model("base") # Options: "tiny", "base", "small", "medium", "large" # Function to handle transcription, LLM response, and audio synthesis def voice_to_voice(audio_file): try: # 1. Transcribe the audio using the local Whisper model result = whisper_model.transcribe(audio_file) user_input = result["text"] # 2. Interact with the LLM via Groq API chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": user_input, } ], model="llama3-8b-8192", stream=False, ) response_text = chat_completion.choices[0].message.content # 3. Convert text response to speech using GTTS tts = gTTS(response_text) temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_audio_file.name) return temp_audio_file.name, response_text except Exception as e: return None, f"Error: {str(e)}" # Build Gradio interface with gr.Blocks() as demo: gr.Markdown("## Real-Time Voice-to-Voice Chatbot") audio_input = gr.Audio(type="filepath", label="Speak Something") audio_output = gr.Audio(label="Bot Response") text_output = gr.Textbox(label="Transcription & Response") btn = gr.Button("Process") btn.click(voice_to_voice, inputs=audio_input, outputs=[audio_output, text_output]) # Launch the interface demo.launch()