Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| import os | |
| import numpy as np | |
| # Initialize Whisper for speech-to-text | |
| whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") | |
| # Hardcoded knowledge base for Q&A | |
| knowledge_base = { | |
| "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.", | |
| "price of camry": "The Toyota Camry starts at $25,000.", | |
| "price of tesla": "The Tesla starts at $60,000." | |
| } | |
| def transcribe(audio): | |
| print(f"Transcribing audio: {type(audio)}") | |
| try: | |
| # Check if audio is a tuple (numpy array, sample rate) | |
| if isinstance(audio, tuple): | |
| audio_data, _ = audio # Extract numpy array, ignore sample rate | |
| else: | |
| audio_data = audio | |
| result = whisper(audio_data)["text"] | |
| print(f"Transcription result: {result}") | |
| return result | |
| except Exception as e: | |
| print(f"Error in transcribe: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def text_to_speech(text): | |
| print(f"Generating speech for text: {text}") | |
| try: | |
| tts = gTTS(text, lang="en") | |
| output_path = "/tmp/response.mp3" | |
| tts.save(output_path) | |
| print(f"Speech saved to {output_path}") | |
| return output_path | |
| except Exception as e: | |
| print(f"Error in text_to_speech: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def answer_question(text): | |
| print(f"Answering question: {text}") | |
| try: | |
| for key in knowledge_base: | |
| if key in text.lower(): | |
| print(f"Found match for key: {key}") | |
| return knowledge_base[key] | |
| print("No match found in knowledge base") | |
| return "Sorry, I can help with car availability and prices. Try again!" | |
| except Exception as e: | |
| print(f"Error in answer_question: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def process_audio(audio): | |
| print(f"Processing audio: {type(audio)}") | |
| try: | |
| text = transcribe(audio) | |
| response = answer_question(text) | |
| audio_response = text_to_speech(response) | |
| print(f"Process complete. Response: {response}, Audio: {audio_response}") | |
| return response, audio_response | |
| except Exception as e: | |
| print(f"Error in process_audio: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# AI Support Agent: Car Dealership") | |
| audio_input = gr.Audio(label="Speak to the Agent") | |
| text_output = gr.Textbox(label="Agent Response") | |
| audio_output = gr.Audio(label="Listen to Response") | |
| btn = gr.Button("Submit") | |
| btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output]) |