Spaces:

Engineer786
/

Voice-into-Voice_Chatbot

Sleeping

Update app.py

1512b06 verified about 1 year ago

1.77 kB

	import gradio as gr
	from gtts import gTTS
	from groq import Groq
	import whisper
	import tempfile
	import os

	# Initialize Groq client
	api = os.environ.get('GroqApi')
	client = Groq(api_key=api)

	# Load the Whisper model locally
	whisper_model = whisper.load_model("base") # Options: "tiny", "base", "small", "medium", "large"

	# Function to handle transcription, LLM response, and audio synthesis
	def voice_to_voice(audio_file):
	try:
	# 1. Transcribe the audio using the local Whisper model
	result = whisper_model.transcribe(audio_file)
	user_input = result["text"]

	# 2. Interact with the LLM via Groq API
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": user_input,
	}
	],
	model="llama3-8b-8192",
	stream=False,
	)
	response_text = chat_completion.choices[0].message.content

	# 3. Convert text response to speech using GTTS
	tts = gTTS(response_text)
	temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_audio_file.name)

	return temp_audio_file.name, response_text

	except Exception as e:
	return None, f"Error: {str(e)}"

	# Build Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
	audio_input = gr.Audio(type="filepath", label="Speak Something")
	audio_output = gr.Audio(label="Bot Response")
	text_output = gr.Textbox(label="Transcription & Response")

	btn = gr.Button("Process")
	btn.click(voice_to_voice, inputs=audio_input, outputs=[audio_output, text_output])

	# Launch the interface
	demo.launch()