Spaces:

Asrar990
/

audio_chatbot

Sleeping

App Files Files Community

audio_chatbot / app.py

Asrar990

Update app.py

1d28351 verified about 1 year ago

raw

history blame contribute delete

1.95 kB

	import whisper
	from groq import Groq
	import os
	from gtts import gTTS
	import tempfile
	import gradio as gr

	# Load Whisper model
	model = whisper.load_model("base") # Use openai-whisper's load_model

	# Initialize Groq client
	client = Groq(api_key=os.getenv("GROQ_API_KEY"))

	def transcribe_audio(audio_path):
	"""Transcribe audio to text using Whisper."""
	result = model.transcribe(audio_path)
	return result["text"]

	def get_llm_response(user_input):
	"""Get a response from the LLM via Groq's API."""
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": user_input}],
	model="llama3-8b-8192",
	stream=False,
	)
	return chat_completion.choices[0].message.content

	def text_to_speech(text):
	"""Convert text to speech using gTTS."""
	tts = gTTS(text)
	temp_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
	tts.save(temp_audio_path)
	return temp_audio_path

	def chatbot_pipeline(audio):
	"""Complete pipeline: audio transcription, LLM interaction, and text-to-speech."""
	# Step 1: Transcribe audio
	transcription = transcribe_audio(audio)

	# Step 2: Get LLM response
	llm_response = get_llm_response(transcription)

	# Step 3: Convert response to speech
	response_audio = text_to_speech(llm_response)

	# Return transcription, LLM response, and audio
	return transcription, llm_response, response_audio

	# Define Gradio interface
	interface = gr.Interface(
	fn=chatbot_pipeline,
	inputs=gr.Audio(type="filepath"),
	outputs=[
	gr.Textbox(label="Transcription"),
	gr.Textbox(label="LLM Response"),
	gr.Audio(label="Response Audio"),
	],
	live=True,
	title="Real-Time Voice-to-Voice Chatbot",
	description="Transcribe audio, interact with an LLM, and respond with audio in real-time.",
	)

	# Launch interface
	if __name__ == "__main__":
	interface.launch()