import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2

messages = []

multimodal_phi2 = MultiModalPhi2(
    modelname_or_path="Navyabhat/Llava-Phi2",
    temperature=0.2,
    max_new_tokens=1024,
    device="cpu",
)


def add_content(chatbot, input_data, input_type) -> gr.Chatbot:
    textflag, imageflag, audioflag = False, False, False
    if input_type == "text":
        chatbot.append((text, None))
        textflag = True
    if input_type == "image":
        chatbot.append(((image,), None))
        imageflag = True
    if input_type == "audio":
        chatbot.append(((audio_mic,), None))
        audioflag = True
    # else:
    #     if audio_upload is not None:
    #         chatbot.append(((audio_upload,), None))
    #         audioflag = True
    if not any([textflag, imageflag, audioflag]):
        # Raise an error if neither text nor file is provided
        raise gr.Error("Enter a valid text, image or audio")
    return chatbot


def clear_data():
    return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}


def run(history, text, image, audio_upload, audio_mic):
    if text in [None, ""]:
        text = None

    if audio_upload is not None:
        audio = audio_upload
    elif audio_mic is not None:
        audio = audio_mic
    else:
        audio = None

    print("text", text)
    print("image", image)
    print("audio", audio)

    if image is not None:
        image = Image.open(image)
    outputs = multimodal_phi2(text, audio, image)
    # outputs = ""

    history.append((None, outputs.title()))
    return history, None, None, None, None


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
    )

    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="Enter text and press enter",
            container=False,
        )
        img_audio = gr.UploadButton("📁", file_types=["image", "audio"], label="Upload Image or Audio")

    txt_msg = txt.submit(add_content, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
        bot, chatbot, chatbot, api_name="bot_response"
    )
    img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
        bot, chatbot, chatbot
    )

    # chatbot.like(print_like_dislike, None, None)


    submit.click(
        add_content,
        inputs=[chatbot, prompt, image, audio_upload, audio_mic],
        outputs=[chatbot],
    ).success(
        run,
        inputs=[chatbot, prompt, image, audio_upload, audio_mic],
        outputs=[chatbot, prompt, image, audio_upload, audio_mic],
    )

    clear.click(
        clear_data,
        outputs=[prompt, image, audio_upload, audio_mic, chatbot],
    )

demo.launch()


import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2
import os

messages = []

multimodal_phi2 = MultiModalPhi2(
    modelname_or_path="Navyabhat/Llava-Phi2",
    temperature=0.2,
    max_new_tokens=1024,
    device="cpu",
)


def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
    textflag, imageflag, audioflag = False, False, False
    if text not in ["", None]:
        chatbot.append((text, None))
        textflag = True
    if image is not None:
        chatbot.append(((image,), None))
        imageflag = True
    if audio_mic is not None:
        chatbot.append(((audio_mic,), None))
        audioflag = True
    else:
        if audio_upload is not None:
            chatbot.append(((audio_upload,), None))
            audioflag = True
    if not any([textflag, imageflag, audioflag]):
        # Raise an error if neither text nor file is provided
        raise gr.Error("Enter a valid text, image or audio")
    return chatbot


def clear_data():
    return {"text": None, "image": None, "audio_upload": None, "audio_mic": None, "chatbot": []}


def run(history, text, image, audio_upload, audio_mic):
    if text in [None, ""]:
        text = None

    if audio_upload is not None:
        audio = audio_upload
    elif audio_mic is not None:
        audio = audio_mic
    else:
        audio = None

    print("text", text)
    print("image", image)
    print("audio", audio)

    if image is not None:
        image = Image.open(image)
    outputs = multimodal_phi2(text, audio, image)

    history.append((None, outputs.title()))
    return history, None, None, None, None


# def print_like_dislike(x: gr.LikeData):
#     print(x.index, x.value, x.liked)


def add_text(history, text):
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)


def add_file(history, file):
    history = history + [((file.name,), None)]
    return history


def bot(history):
    response = "**That's cool!**"
    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        time.sleep(0.05)
        yield history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
    )

    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="Enter text and press enter",
            container=False,
        )
        img_audio = gr.UploadButton("📁", file_types=["image", "audio"], label="Upload Image or Audio")

    with gr.Row():
                    # Adding a Button
                    submit = gr.Button()
                    clear = gr.Button(value="Clear")

    txt_msg = txt.submit(add_input, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
        bot, chatbot, chatbot, api_name="bot_response"
    )
    img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
        bot, chatbot, chatbot
    )

    # submit.click(
    #     add_content,
    #     inputs=[chatbot, txt, image, audio_upload, audio_mic],
    #     outputs=[chatbot],
    # ).success(
    #     run,
    #     inputs=[chatbot, txt, image, audio_upload, audio_mic],
    #     outputs=[chatbot, txt, image, audio_upload, audio_mic],
    # )

    clear.click(
        clear_data,
        outputs=[prompt, image, audio_upload, audio_mic, chatbot],
    )

    # chatbot.like(print_like_dislike, None, None)

# demo.queue()
demo.launch()