voice-chatbot / app.py
QuaidKhalid's picture
Create app.py
8345bbf verified
import os
os.environ["GROQ_API_KEY"] = "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"
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq
# Initialize the Whisper model
model = whisper.load_model("base")
# Initialize the Groq client with the API key from environment variables
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
def process_audio(file_path):
try:
if file_path is None:
raise ValueError("No audio file received. Please upload or record your voice.")
# Load the audio file and transcribe using Whisper
audio = whisper.load_audio(file_path)
result = model.transcribe(audio)
text = result["text"]
# Generate a response using Groq
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192",
)
response_message = chat_completion.choices[0].message.content.strip()
# Convert the response text to speech
tts = gTTS(response_message)
response_audio_io = io.BytesIO()
tts.write_to_fp(response_audio_io)
response_audio_io.seek(0)
# Save the audio to a file
audio_file_path = "response.mp3"
with open(audio_file_path, "wb") as audio_file:
audio_file.write(response_audio_io.getvalue())
# Return the response text and path to the saved audio file
return response_message, audio_file_path
except Exception as e:
return f"An error occurred: {e}", None
# Gradio interface
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
live=True, # Enables real-time processing
title="Voice-to-Voice Chatbot",
description="A real-time voice-to-voice chatbot using OpenAI Whisper, Groq LLaMA model, and GTTS."
)
# Launch the Gradio interface
iface.launch()