import gradio as gr import numpy as np from utils.speech_to_text import SpeechRecognizer from utils.text_to_speech import TextToSpeech class VoiceChatApp: def __init__(self): self.speech_recognizer = SpeechRecognizer() self.tts_engine = TextToSpeech() self.welcome_message = "Hello, this is GenCent AI calling. This is a follow-up call. Am I speaking to Alex?" self.chat_history = [] async def welcome_audio(self): """Generate and play the welcome message""" sample_rate, audio_data = await self.tts_engine.synthesize(self.welcome_message) audio_response = (sample_rate, audio_data.astype(np.int16)) self.chat_history.append((None, self.welcome_message)) return self.chat_history, audio_response async def process_audio(self, audio, history): """Process user audio input and generate response""" if audio is None: return history, (24000, np.zeros(24000, dtype=np.int16)), None # Speech to text text_input = await self.speech_recognizer.transcribe(audio) if not text_input: return history, (24000, np.zeros(24000, dtype=np.int16)), None # Generate response response = "This is a test response. Please confirm if you can hear this clearly." # Text to speech sample_rate, audio_data = await self.tts_engine.synthesize(response) audio_response = (sample_rate, audio_data.astype(np.int16)) # Update chat history history.append((text_input, response)) return history, audio_response, None def launch(self): """Launch the Gradio interface""" with gr.Blocks(title="Voice-Enabled Chatbot") as interface: with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot(label="Chat History", height=400) audio_input = gr.Audio(sources=["microphone"], type="numpy", label="Speak Here", interactive=True) audio_output = gr.Audio(label="Assistant Response", autoplay=True, elem_classes="compact-audio") # Initial welcome message interface.load( fn=self.welcome_audio, outputs=[chatbot, audio_output] ) # Audio processing chain audio_input.change( fn=self.process_audio, inputs=[audio_input, chatbot], outputs=[chatbot, audio_output, audio_input], api_name="process_audio" ).then( lambda: None, None, audio_input, queue=False ) interface.launch( server_name="127.0.0.1", server_port=7860, share=True, debug=True ) if __name__ == "__main__": app = VoiceChatApp() app.launch()