Voice-ChatBot / app.py
Zeeshan24's picture
Update app.py
d06e8d0 verified
import whisper
from TTS.api import TTS
import gradio as gr
from pydub import AudioSegment
from groq import Groq
# Initialize models
whisper_model = whisper.load_model("small") # Use a smaller Whisper model for faster processing
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
# Initialize Groq Client
groq_api_key = "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"
client = Groq(api_key=groq_api_key)
# Functions for the Chatbot
def voice_to_text(audio_path):
"""Convert voice input to text using Whisper."""
result = whisper_model.transcribe(audio_path)
return result["text"]
def process_text_with_groq(input_text):
"""Process user text input using Groq LLM."""
messages = [{"role": "user", "content": input_text}]
try:
chat_completion = client.chat.completions.create(
messages=messages,
model="llama3-8b-8192",
stream=False
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
def text_to_voice(output_text):
"""Convert text response to voice using Coqui TTS."""
audio_path = "response.wav"
tts_model.tts_to_file(text=output_text, file_path=audio_path)
return audio_path
# Gradio Interface
def chatbot(audio_file):
# Convert audio to text
user_input = voice_to_text(audio_file)
# Get Groq LLM response
bot_response = process_text_with_groq(user_input)
# Convert text response to audio
audio_response_path = text_to_voice(bot_response)
return bot_response, audio_response_path
# Gradio UI
ui = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
outputs=[
gr.Textbox(label="Chatbot Response"),
gr.Audio(label="Chatbot Voice Response")
],
title="Zeeshan Voice-to-Voice Chatbot",
description="Upload an audio file to interact with Zeeshan. Zeeshan will listen, process your query using Groq's LLM, and respond with both text and voice."
)
# Launch Gradio app
if __name__ == "__main__":
ui.launch()