Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| import tempfile | |
| import os | |
| from groq import Groq | |
| # Load the Whisper model from Hugging Face | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device) | |
| # Initialize Groq client | |
| client = Groq(api_key="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") | |
| # Function to handle the voice-to-voice conversation | |
| def voice_to_voice_conversation(audio): | |
| # Read and transcribe audio using Whisper | |
| transcription = whisper_model(audio)["text"] | |
| # Get response from Groq API using Llama 8b | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": transcription}], | |
| model="llama3-8b-8192", | |
| ) | |
| response_text = chat_completion.choices[0].message.content | |
| # Convert text to speech using GTTS and save to a temporary file | |
| tts = gTTS(response_text) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tts.save(tmp_file.name) | |
| tmp_file_path = tmp_file.name | |
| # Load the generated speech as an audio file for Gradio | |
| return transcription, tmp_file_path | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=voice_to_voice_conversation, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")], | |
| title="Voice-to-Voice Chatbot", | |
| description="Speak into the microphone, and the chatbot will respond with a generated voice message.", | |
| live=False | |
| ) | |
| # Launch the interface | |
| interface.launch() |