import whisper from TTS.api import TTS import gradio as gr from pydub import AudioSegment from groq import Groq # Initialize models whisper_model = whisper.load_model("small") # Use a smaller Whisper model for faster processing tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) # Initialize Groq Client groq_api_key = "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" client = Groq(api_key=groq_api_key) # Functions for the Chatbot def voice_to_text(audio_path): """Convert voice input to text using Whisper.""" result = whisper_model.transcribe(audio_path) return result["text"] def process_text_with_groq(input_text): """Process user text input using Groq LLM.""" messages = [{"role": "user", "content": input_text}] try: chat_completion = client.chat.completions.create( messages=messages, model="llama3-8b-8192", stream=False ) return chat_completion.choices[0].message.content except Exception as e: return f"Error: {str(e)}" def text_to_voice(output_text): """Convert text response to voice using Coqui TTS.""" audio_path = "response.wav" tts_model.tts_to_file(text=output_text, file_path=audio_path) return audio_path # Gradio Interface def chatbot(audio_file): # Convert audio to text user_input = voice_to_text(audio_file) # Get Groq LLM response bot_response = process_text_with_groq(user_input) # Convert text response to audio audio_response_path = text_to_voice(bot_response) return bot_response, audio_response_path # Gradio UI ui = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath", label="Upload Audio File"), outputs=[ gr.Textbox(label="Chatbot Response"), gr.Audio(label="Chatbot Voice Response") ], title="Zeeshan Voice-to-Voice Chatbot", description="Upload an audio file to interact with Zeeshan. Zeeshan will listen, process your query using Groq's LLM, and respond with both text and voice." ) # Launch Gradio app if __name__ == "__main__": ui.launch()