Hamxa1997's picture
Create app.py
6909b3a verified
import gradio as gr
import os
from groq import Groq
import whisper
import torch
from gtts import gTTS
import IPython.display as ipd
# Set up Whisper with a smaller model or on CPU
model_name = "small" # Use "small", "base", or "medium" for smaller models
whisper_model = whisper.load_model(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model.to(device)
# Set up Groq API with direct API key
client = Groq(
api_key="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"
)
# Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
try:
print(f"Audio file path received for transcription: {audio_path}")
# Load the audio file directly using Whisper's built-in functionality
result = whisper_model.transcribe(audio_path)
print(f"Transcription result: {result}")
return result['text']
except Exception as e:
print(f"Error during transcription: {e}")
return "Error during transcription"
# Function to get response from LLaMA using Groq API
def get_llama_response(transcription):
try:
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": transcription,
}
],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"Error during LLaMA response generation: {e}")
return "Error during response generation"
# Function to convert text to speech using gTTS
def text_to_speech(text):
try:
tts = gTTS(text)
tts.save("response.mp3")
return "response.mp3"
except Exception as e:
print(f"Error during text-to-speech conversion: {e}")
return "Error during text-to-speech conversion"
# Main function for Gradio interface
def chatbot(audio_path):
transcription = transcribe_audio(audio_path)
response = get_llama_response(transcription)
audio_path = text_to_speech(response)
return transcription, response, audio_path
# Gradio Interface
interface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath"), # Use 'filepath' to work with the path of the audio file
outputs=["text", "text", "audio"],
live=True,
description="Real-time Voice-to-Voice Chatbot"
)
# Launch the interface
interface.launch()