develops20's picture
Update app.py
9a4cedc verified
raw
history blame
2.84 kB
import gradio as gr
from transformers import pipeline
from gtts import gTTS
import os
import numpy as np
# Initialize Whisper for speech-to-text
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
# Hardcoded knowledge base for Q&A
knowledge_base = {
"what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
"price of camry": "The Toyota Camry starts at $25,000.",
"price of tesla": "The Tesla starts at $60,000."
}
def transcribe(audio):
print(f"Transcribing audio: {type(audio)}")
try:
# Check if audio is a tuple (numpy array, sample rate)
if isinstance(audio, tuple):
audio_data, _ = audio # Extract numpy array, ignore sample rate
else:
audio_data = audio
result = whisper(audio_data)["text"]
print(f"Transcription result: {result}")
return result
except Exception as e:
print(f"Error in transcribe: {str(e)}")
import traceback
traceback.print_exc()
raise
def text_to_speech(text):
print(f"Generating speech for text: {text}")
try:
tts = gTTS(text, lang="en")
output_path = "/tmp/response.mp3"
tts.save(output_path)
print(f"Speech saved to {output_path}")
return output_path
except Exception as e:
print(f"Error in text_to_speech: {str(e)}")
import traceback
traceback.print_exc()
raise
def answer_question(text):
print(f"Answering question: {text}")
try:
for key in knowledge_base:
if key in text.lower():
print(f"Found match for key: {key}")
return knowledge_base[key]
print("No match found in knowledge base")
return "Sorry, I can help with car availability and prices. Try again!"
except Exception as e:
print(f"Error in answer_question: {str(e)}")
import traceback
traceback.print_exc()
raise
def process_audio(audio):
print(f"Processing audio: {type(audio)}")
try:
text = transcribe(audio)
response = answer_question(text)
audio_response = text_to_speech(response)
print(f"Process complete. Response: {response}, Audio: {audio_response}")
return response, audio_response
except Exception as e:
print(f"Error in process_audio: {str(e)}")
import traceback
traceback.print_exc()
raise
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# AI Support Agent: Car Dealership")
audio_input = gr.Audio(label="Speak to the Agent")
text_output = gr.Textbox(label="Agent Response")
audio_output = gr.Audio(label="Listen to Response")
btn = gr.Button("Submit")
btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])