File size: 2,844 Bytes
266eee8
 
 
 
6d5604d
266eee8
 
 
 
 
 
 
c1732f2
9a4cedc
266eee8
 
 
6d5604d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266eee8
 
6d5604d
 
 
9a4cedc
 
 
 
6d5604d
 
 
 
 
266eee8
 
6d5604d
 
 
 
 
 
 
 
 
 
 
 
 
266eee8
 
6d5604d
 
 
 
 
 
 
 
 
 
 
 
266eee8
 
 
 
6d5604d
266eee8
 
b4eb3f6
6d5604d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
from transformers import pipeline
from gtts import gTTS
import os
import numpy as np

# Initialize Whisper for speech-to-text
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

# Hardcoded knowledge base for Q&A
knowledge_base = {
    "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
    "price of camry": "The Toyota Camry starts at $25,000.",
    "price of tesla": "The Tesla starts at $60,000."
}

def transcribe(audio):
    print(f"Transcribing audio: {type(audio)}")
    try:
        # Check if audio is a tuple (numpy array, sample rate)
        if isinstance(audio, tuple):
            audio_data, _ = audio  # Extract numpy array, ignore sample rate
        else:
            audio_data = audio
        result = whisper(audio_data)["text"]
        print(f"Transcription result: {result}")
        return result
    except Exception as e:
        print(f"Error in transcribe: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def text_to_speech(text):
    print(f"Generating speech for text: {text}")
    try:
        tts = gTTS(text, lang="en")
        output_path = "/tmp/response.mp3"
        tts.save(output_path)
        print(f"Speech saved to {output_path}")
        return output_path
    except Exception as e:
        print(f"Error in text_to_speech: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def answer_question(text):
    print(f"Answering question: {text}")
    try:
        for key in knowledge_base:
            if key in text.lower():
                print(f"Found match for key: {key}")
                return knowledge_base[key]
        print("No match found in knowledge base")
        return "Sorry, I can help with car availability and prices. Try again!"
    except Exception as e:
        print(f"Error in answer_question: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def process_audio(audio):
    print(f"Processing audio: {type(audio)}")
    try:
        text = transcribe(audio)
        response = answer_question(text)
        audio_response = text_to_speech(response)
        print(f"Process complete. Response: {response}, Audio: {audio_response}")
        return response, audio_response
    except Exception as e:
        print(f"Error in process_audio: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# AI Support Agent: Car Dealership")
    audio_input = gr.Audio(label="Speak to the Agent")
    text_output = gr.Textbox(label="Agent Response")
    audio_output = gr.Audio(label="Listen to Response")
    btn = gr.Button("Submit")
    btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])