Haseeb-001's picture
Update app.py
22ed0eb verified
from pydub import AudioSegment
from pydub.playback import play
import tempfile
import whisper
from gtts import gTTS
from groq import Groq
import gradio as gr
import warnings
# ignore warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Load Whisper Model
model = whisper.load_model("base") # Choose appropriate model size
# API key
GROQ_API_KEY = "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"
# Initialize Groq API Client
client = Groq(api_key=GROQ_API_KEY)
# Function for real-time voice-to-voice chatbot
def chatbot(audio_file):
# Step 1: Transcribe audio to text
transcription = model.transcribe(audio_file)
user_input = transcription["text"]
print(f"User Input: {user_input}")
# Step 2: Get response from Llama model (Groq API)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": user_input,
}
],
model="llama3-8b-8192",
stream=False,
)
llama_response = chat_completion.choices[0].message.content
print(f"Llama Response: {llama_response}")
# Step 3: Convert Llama response to speech
tts = gTTS(llama_response)
output_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(output_audio_path)
# Step 4: Process the audio (Optional for local testing)
audio_segment = AudioSegment.from_file(output_audio_path, format="mp3")
play(audio_segment)
return llama_response, output_audio_path
# Gradio Interface
def interface(audio_file):
text_response, audio_path = chatbot(audio_file)
return text_response, audio_path
# Launch Gradio App
gr.Interface(
fn=interface,
inputs=gr.Audio(type="filepath", label="Speak to Chatbot"),
outputs=[
gr.Textbox(label="Chatbot Response"),
gr.Audio(label="Voice Response"),
],
title="Real-Time Voice-to-Voice Chatbot"
).launch()