Spaces:

hjaved202
/

gencent

Runtime error

App Files Files Community

gencent / app /main.py

hjaved202

Upload folder using huggingface_hub

a350173 verified 11 months ago

raw

history blame contribute delete

2.96 kB

	import gradio as gr
	import numpy as np
	from utils.speech_to_text import SpeechRecognizer
	from utils.text_to_speech import TextToSpeech

	class VoiceChatApp:
	def __init__(self):
	self.speech_recognizer = SpeechRecognizer()
	self.tts_engine = TextToSpeech()
	self.welcome_message = "Hello, this is GenCent AI calling. This is a follow-up call. Am I speaking to Alex?"
	self.chat_history = []

	async def welcome_audio(self):
	"""Generate and play the welcome message"""
	sample_rate, audio_data = await self.tts_engine.synthesize(self.welcome_message)
	audio_response = (sample_rate, audio_data.astype(np.int16))
	self.chat_history.append((None, self.welcome_message))
	return self.chat_history, audio_response

	async def process_audio(self, audio, history):
	"""Process user audio input and generate response"""
	if audio is None:
	return history, (24000, np.zeros(24000, dtype=np.int16)), None

	# Speech to text
	text_input = await self.speech_recognizer.transcribe(audio)
	if not text_input:
	return history, (24000, np.zeros(24000, dtype=np.int16)), None

	# Generate response
	response = "This is a test response. Please confirm if you can hear this clearly."

	# Text to speech
	sample_rate, audio_data = await self.tts_engine.synthesize(response)
	audio_response = (sample_rate, audio_data.astype(np.int16))

	# Update chat history
	history.append((text_input, response))

	return history, audio_response, None

	def launch(self):
	"""Launch the Gradio interface"""
	with gr.Blocks(title="Voice-Enabled Chatbot") as interface:
	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Chat History", height=400)
	audio_input = gr.Audio(sources=["microphone"], type="numpy",
	label="Speak Here", interactive=True)
	audio_output = gr.Audio(label="Assistant Response", autoplay=True, elem_classes="compact-audio")

	# Initial welcome message
	interface.load(
	fn=self.welcome_audio,
	outputs=[chatbot, audio_output]
	)

	# Audio processing chain
	audio_input.change(
	fn=self.process_audio,
	inputs=[audio_input, chatbot],
	outputs=[chatbot, audio_output, audio_input],
	api_name="process_audio"
	).then(
	lambda: None,
	None,
	audio_input,
	queue=False
	)

	interface.launch(
	server_name="127.0.0.1",
	server_port=7860,
	share=True,
	debug=True
	)

	if __name__ == "__main__":
	app = VoiceChatApp()
	app.launch()