from pydub import AudioSegment from pydub.playback import play import tempfile import whisper from gtts import gTTS from groq import Groq import gradio as gr import warnings # ignore warnings warnings.filterwarnings("ignore", category=FutureWarning) # Load Whisper Model model = whisper.load_model("base") # Choose appropriate model size # API key GROQ_API_KEY = "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" # Initialize Groq API Client client = Groq(api_key=GROQ_API_KEY) # Function for real-time voice-to-voice chatbot def chatbot(audio_file): # Step 1: Transcribe audio to text transcription = model.transcribe(audio_file) user_input = transcription["text"] print(f"User Input: {user_input}") # Step 2: Get response from Llama model (Groq API) chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": user_input, } ], model="llama3-8b-8192", stream=False, ) llama_response = chat_completion.choices[0].message.content print(f"Llama Response: {llama_response}") # Step 3: Convert Llama response to speech tts = gTTS(llama_response) output_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name tts.save(output_audio_path) # Step 4: Process the audio (Optional for local testing) audio_segment = AudioSegment.from_file(output_audio_path, format="mp3") play(audio_segment) return llama_response, output_audio_path # Gradio Interface def interface(audio_file): text_response, audio_path = chatbot(audio_file) return text_response, audio_path # Launch Gradio App gr.Interface( fn=interface, inputs=gr.Audio(type="filepath", label="Speak to Chatbot"), outputs=[ gr.Textbox(label="Chatbot Response"), gr.Audio(label="Voice Response"), ], title="Real-Time Voice-to-Voice Chatbot" ).launch()