import whisper from groq import Groq import os from gtts import gTTS import tempfile import gradio as gr # Load Whisper model model = whisper.load_model("base") # Use openai-whisper's load_model # Initialize Groq client client = Groq(api_key=os.getenv("GROQ_API_KEY")) def transcribe_audio(audio_path): """Transcribe audio to text using Whisper.""" result = model.transcribe(audio_path) return result["text"] def get_llm_response(user_input): """Get a response from the LLM via Groq's API.""" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", stream=False, ) return chat_completion.choices[0].message.content def text_to_speech(text): """Convert text to speech using gTTS.""" tts = gTTS(text) temp_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name tts.save(temp_audio_path) return temp_audio_path def chatbot_pipeline(audio): """Complete pipeline: audio transcription, LLM interaction, and text-to-speech.""" # Step 1: Transcribe audio transcription = transcribe_audio(audio) # Step 2: Get LLM response llm_response = get_llm_response(transcription) # Step 3: Convert response to speech response_audio = text_to_speech(llm_response) # Return transcription, LLM response, and audio return transcription, llm_response, response_audio # Define Gradio interface interface = gr.Interface( fn=chatbot_pipeline, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="LLM Response"), gr.Audio(label="Response Audio"), ], live=True, title="Real-Time Voice-to-Voice Chatbot", description="Transcribe audio, interact with an LLM, and respond with audio in real-time.", ) # Launch interface if __name__ == "__main__": interface.launch()