import gradio as gr from transformers import pipeline from gtts import gTTS import os import numpy as np # Initialize Whisper for speech-to-text whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") # Hardcoded knowledge base for Q&A knowledge_base = { "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.", "price of camry": "The Toyota Camry starts at $25,000.", "price of tesla": "The Tesla starts at $60,000." } def transcribe(audio): print(f"Transcribing audio: {type(audio)}") try: # Check if audio is a tuple (numpy array, sample rate) if isinstance(audio, tuple): audio_data, _ = audio # Extract numpy array, ignore sample rate else: audio_data = audio result = whisper(audio_data)["text"] print(f"Transcription result: {result}") return result except Exception as e: print(f"Error in transcribe: {str(e)}") import traceback traceback.print_exc() raise def text_to_speech(text): print(f"Generating speech for text: {text}") try: tts = gTTS(text, lang="en") output_path = "/tmp/response.mp3" tts.save(output_path) print(f"Speech saved to {output_path}") return output_path except Exception as e: print(f"Error in text_to_speech: {str(e)}") import traceback traceback.print_exc() raise def answer_question(text): print(f"Answering question: {text}") try: for key in knowledge_base: if key in text.lower(): print(f"Found match for key: {key}") return knowledge_base[key] print("No match found in knowledge base") return "Sorry, I can help with car availability and prices. Try again!" except Exception as e: print(f"Error in answer_question: {str(e)}") import traceback traceback.print_exc() raise def process_audio(audio): print(f"Processing audio: {type(audio)}") try: text = transcribe(audio) response = answer_question(text) audio_response = text_to_speech(response) print(f"Process complete. Response: {response}, Audio: {audio_response}") return response, audio_response except Exception as e: print(f"Error in process_audio: {str(e)}") import traceback traceback.print_exc() raise # Gradio interface with gr.Blocks() as demo: gr.Markdown("# AI Support Agent: Car Dealership") audio_input = gr.Audio(label="Speak to the Agent") text_output = gr.Textbox(label="Agent Response") audio_output = gr.Audio(label="Listen to Response") btn = gr.Button("Submit") btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])