a / app.py
muhammadshaheryar's picture
Create app.py
a7d34ca verified
# Step 2: Import necessary libraries
import os
import gradio as gr
from groq import Groq
from gtts import gTTS
import whisper
from io import BytesIO
import soundfile as sf
# Step 3: Set up Groq client
# Make sure to replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
os.environ["GROQ_API_KEY"] = "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"
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# Step 4: Load Whisper model
whisper_model = whisper.load_model("base")
# Step 5: Function for transcribing audio (adjusted for filepath)
def transcribe_audio(audio_filepath):
audio_data, _ = sf.read(audio_filepath) # Read audio file from path
result = whisper_model.transcribe(audio_data)
return result["text"]
# Step 6: Function to get a response from Groq LLM
def get_groq_response(user_input):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": user_input}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
# Step 7: Function to convert text to speech using gTTS
def text_to_speech(text):
tts = gTTS(text)
audio_output = BytesIO()
tts.write_to_fp(audio_output)
audio_output.seek(0)
return audio_output
# Step 8: Gradio interface function
def chatbot_pipeline(audio):
# Step 8a: Transcribe the input audio
transcribed_text = transcribe_audio(audio)
# Step 8b: Get response from Groq
response_text = get_groq_response(transcribed_text)
# Step 8c: Convert response to speech
response_audio = text_to_speech(response_text)
return response_text, response_audio
# Step 9: Define Gradio interface (fix for audio output type)
interface = gr.Interface(
fn=chatbot_pipeline,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(), gr.Audio(type="numpy")],
title="Real-Time Voice-to-Voice Chatbot",
description="Talk to a real-time chatbot that transcribes your voice, generates responses using Groq API, and reads them back to you!"
)
# Step 10: Launch Gradio interface
interface.launch(debug=True)