Spaces:

develops20
/

VoiceSupportAgent

Sleeping

File size: 2,844 Bytes

import gradio as gr
from transformers import pipeline
from gtts import gTTS
import os
import numpy as np

# Initialize Whisper for speech-to-text
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

# Hardcoded knowledge base for Q&A
knowledge_base = {
    "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
    "price of camry": "The Toyota Camry starts at $25,000.",
    "price of tesla": "The Tesla starts at $60,000."
}

def transcribe(audio):
    print(f"Transcribing audio: {type(audio)}")
    try:
        # Check if audio is a tuple (numpy array, sample rate)
        if isinstance(audio, tuple):
            audio_data, _ = audio  # Extract numpy array, ignore sample rate
        else:
            audio_data = audio
        result = whisper(audio_data)["text"]
        print(f"Transcription result: {result}")
        return result
    except Exception as e:
        print(f"Error in transcribe: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def text_to_speech(text):
    print(f"Generating speech for text: {text}")
    try:
        tts = gTTS(text, lang="en")
        output_path = "/tmp/response.mp3"
        tts.save(output_path)
        print(f"Speech saved to {output_path}")
        return output_path
    except Exception as e:
        print(f"Error in text_to_speech: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def answer_question(text):
    print(f"Answering question: {text}")
    try:
        for key in knowledge_base:
            if key in text.lower():
                print(f"Found match for key: {key}")
                return knowledge_base[key]
        print("No match found in knowledge base")
        return "Sorry, I can help with car availability and prices. Try again!"
    except Exception as e:
        print(f"Error in answer_question: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def process_audio(audio):
    print(f"Processing audio: {type(audio)}")
    try:
        text = transcribe(audio)
        response = answer_question(text)
        audio_response = text_to_speech(response)
        print(f"Process complete. Response: {response}, Audio: {audio_response}")
        return response, audio_response
    except Exception as e:
        print(f"Error in process_audio: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# AI Support Agent: Car Dealership")
    audio_input = gr.Audio(label="Speak to the Agent")
    text_output = gr.Textbox(label="Agent Response")
    audio_output = gr.Audio(label="Listen to Response")
    btn = gr.Button("Submit")
    btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])