AUDIOTOAUDIO / app.py
MalikShehram's picture
Update app.py
579c50c verified
import whisper
from groq import Groq
from gtts import gTTS
import os
import gradio as gr
import tempfile
import logging
# Setup logging
logging.basicConfig(level=logging.DEBUG)
# Initialize the Whisper model
whisper_model = whisper.load_model("base")
# Initialize Groq client
client = Groq(api_key="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")
def process_audio(input_audio):
try:
# Log received audio file
logging.debug(f"Received audio file: {input_audio}")
# Transcribe audio with Whisper
transcription = whisper_model.transcribe(input_audio)
if "text" not in transcription or not transcription['text']:
raise ValueError("Whisper failed to transcribe the audio.")
user_text = transcription['text']
logging.debug(f"Transcription: {user_text}")
# Interact with LLM via Groq API
response = client.chat.completions.create(
messages=[{"role": "user", "content": user_text}],
model="llama3-8b-8192",
)
if not response.choices:
raise ValueError("Groq API returned an empty response.")
llm_response = response.choices[0].message.content
logging.debug(f"LLM Response: {llm_response}")
# Convert LLM response to speech with gTTS
if not llm_response.strip():
raise ValueError("LLM response is empty or invalid.")
tts = gTTS(llm_response)
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_output.name)
logging.debug(f"Generated audio file: {temp_output.name}")
# Return response text and audio file path
return llm_response, temp_output.name
except Exception as e:
logging.error(f"Error in process_audio: {e}")
return f"An error occurred: {str(e)}", None
# Create Gradio interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"), # Using 'filepath' to get the file path
outputs=[
gr.Textbox(label="LLM Response"),
gr.Audio(label="Response Audio"),
],
title="Real-Time Voice-to-Voice Chatbox",
description="Transcribes input audio, interacts with an LLM via Groq API, and generates audio responses."
)
# Launch Gradio app
interface.launch(share=True)