pec / app.py
EngrGullu's picture
Update app.py
3ecb667 verified
import os
import whisper
from gtts import gTTS
from groq import Groq
import gradio as gr
# Set your Groq API key (added directly for simplicity, ensure you keep it secure)
os.environ["GROQ_API_KEY"] = "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"
# Initialize Whisper model
model = whisper.load_model("base")
# Initialize Groq API client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Step 1: Transcribe Audio (Speech-to-Text using Whisper)
def transcribe_audio(audio_path):
result = model.transcribe(audio_path)
return result['text']
# Step 2: Interact with LLM (Groq API)
def interact_with_llm(user_input):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": user_input,
}
],
model="llama3-8b-8192",
stream=False,
)
response = chat_completion.choices[0].message.content
return response
# Step 3: Convert Text to Speech using gTTS
def text_to_speech(text):
tts = gTTS(text, lang="en")
audio_file = "response.mp3"
tts.save(audio_file)
return audio_file
# Combined workflow: Transcribe -> Interact with LLM -> Convert to Speech
def chatbot(audio):
# Step 1: Transcribe Audio to Text
transcription = transcribe_audio(audio)
# Step 2: Get LLM response based on transcription
llm_response = interact_with_llm(transcription)
# Step 3: Convert LLM response to audio (text-to-speech)
audio_output = text_to_speech(llm_response)
return transcription, llm_response, audio_output
# Gradio Interface setup
interface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath", label="Speak into the microphone"),
outputs=[
"text", # Transcription output
"text", # LLM response output
gr.Audio(type="filepath", label="Response Audio") # Final audio output
],
live=True,
title="Real-Time Voice-to-Voice Chatbot",
description="Talk to an AI in real-time! Speak into the microphone, get a response, and hear it back.",
)
# Launch Gradio app
if __name__ == "__main__":
interface.launch()