Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| import whisper | |
| from gtts import gTTS | |
| import os | |
| import tempfile | |
| import gradio as gr | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file (if available) | |
| load_dotenv() | |
| # Set up Groq API key (fallback method if not set as an environment variable) | |
| if "GROQ_API_KEY" not in os.environ: | |
| os.environ["GROQ_API_KEY"] = "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" # Replace with your Groq API key | |
| # Initialize Groq client | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # Load the Whisper model | |
| whisper_model = whisper.load_model("base") # "base" is lightweight; use "large" for higher accuracy | |
| # Define chatbot function | |
| def voice_to_voice_chatbot(input_audio): | |
| """ | |
| Transcribes the audio input using Whisper, queries Groq's LLM API, | |
| converts the response to audio, and returns the audio file. | |
| """ | |
| # Step 1: Transcribe audio to text using Whisper | |
| try: | |
| transcription_result = whisper_model.transcribe(input_audio) | |
| user_query = transcription_result["text"] | |
| print("User Query:", user_query) | |
| except Exception as e: | |
| return f"Error in transcription: {str(e)}" | |
| # Step 2: Query the Groq API with the transcribed text | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": user_query}], | |
| model="llama3-8b-8192", | |
| stream=False | |
| ) | |
| bot_response = chat_completion.choices[0].message.content | |
| print("Bot Response:", bot_response) | |
| except Exception as e: | |
| return f"Error in LLM response: {str(e)}" | |
| # Step 3: Convert the LLM response to speech using gTTS | |
| try: | |
| tts = gTTS(text=bot_response, lang="en") | |
| output_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3").name | |
| tts.save(output_audio_path) | |
| except Exception as e: | |
| return f"Error in text-to-speech conversion: {str(e)}" | |
| # Return the audio file path | |
| return output_audio_path | |
| # Create Gradio interface for the chatbot | |
| interface = gr.Interface( | |
| fn=voice_to_voice_chatbot, | |
| inputs=gr.Audio(type="filepath"), # Input: File path of recorded audio | |
| outputs=gr.Audio(type="filepath"), # Output: File path of the generated audio | |
| live=True, # Enable real-time interaction | |
| description="This Voice to Voice Chatbot is created by Rizwan Sajad using OpenAI Whisper, Groq API, and gTTS." | |
| ) | |
| # Launch Gradio interface | |
| interface.launch() | |