import gradio as gr
from transformers import pipeline
from gtts import gTTS
import os

# Initialize Whisper for speech-to-text
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

# Hardcoded knowledge base for Q&A
knowledge_base = {
    "what cars are available": "We have Toyota Camry, Honda Civic, and Ford Mustang.",
    "price of camry": "The Toyota Camry starts at $25,000."
}

def transcribe(audio):
    return whisper(audio)["text"]

def text_to_speech(text):
    tts = gTTS(text, lang="en")
    tts.save("response.mp3")
    return "response.mp3"

def answer_question(text):
    for key in knowledge_base:
        if key in text.lower():
            return knowledge_base[key]
    return "Sorry, I can help with car availability and prices. Try again!"

def process_audio(audio):
    text = transcribe(audio)
    response = answer_question(text)
    audio_response = text_to_speech(response)
    return response, audio_response

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# AI Support Agent: Car Dealership")
    audio_input = gr.Audio(label="Speak to the Agent")  # Removed 'type' parameter
    text_output = gr.Textbox(label="Agent Response")
    audio_output = gr.Audio(label="Listen to Response")
    btn = gr.Button("Submit")
    btn.click(fn=process_audio, inputs=audio_input, outputs=[text_output, audio_output])

demo.launch()