voicechatbot / app.py
Fawad97's picture
Create app.py
303c9bb verified
import gradio as gr
import whisper
from gtts import gTTS
from io import BytesIO
from pydub import AudioSegment
from groq import Groq
import os
import asyncio
# Set your Groq API key
groq_api_key = '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'
# Initialize Groq client
client = Groq(api_key=groq_api_key)
# Load Whisper model
model = whisper.load_model("base") # or "small", "medium", "large"
async def transcribe_audio_async(audio_file):
try:
# Ensure that audio_file is a BytesIO object
if isinstance(audio_file, BytesIO):
audio_file.seek(0) # Reset file pointer to the beginning
# Save to a temporary file
temp_file_path = "temp.wav"
with open(temp_file_path, "wb") as temp_file:
temp_file.write(audio_file.read())
# Transcribe using Whisper
result = model.transcribe(temp_file_path)
os.remove(temp_file_path)
text = result["text"]
return text
else:
raise ValueError("The provided audio file is not in the expected format.")
except Exception as e:
return f"Error in transcribing audio: {str(e)}"
def generate_response(text):
try:
if not text:
raise ValueError("No text provided for response generation.")
# Use Groq API to get response from LLaMA 8b model
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error in generating response: {str(e)}"
def text_to_speech(text):
try:
if not text:
raise ValueError("No text provided for text-to-speech.")
# Convert text to speech using GTTS
tts = gTTS(text, lang='en')
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
# Convert to wav for Gradio compatibility
audio_segment = AudioSegment.from_mp3(audio_file)
wav_file = BytesIO()
audio_segment.export(wav_file, format="wav")
wav_file.seek(0)
return wav_file.read()
except Exception as e:
return f"Error in converting text to speech: {str(e)}"
async def chatbot(audio):
try:
if audio is None:
return "No audio file provided.", None
# Handle Gradio audio input
if isinstance(audio, str):
# If the audio is a file path, convert it to BytesIO
with open(audio, "rb") as file:
audio = BytesIO(file.read())
text = await transcribe_audio_async(audio)
if "Error" in text:
return text, None
response_text = generate_response(text)
if "Error" in response_text:
return response_text, None
audio_response = text_to_speech(response_text)
if isinstance(audio_response, bytes):
return (response_text, audio_response)
else:
return response_text, None
except Exception as e:
return f"Error in chatbot processing: {str(e)}", None
# Define Gradio interface
iface = gr.Interface(
fn=lambda audio: asyncio.run(chatbot(audio)),
inputs=gr.Audio(type="filepath"), # Allow file upload
outputs=[gr.Textbox(), gr.Audio(type="filepath")]
)
# Launch the Gradio app
iface.launch()