File size: 4,698 Bytes
62557b1
ee45f56
62557b1
ee45f56
6ef634d
ee45f56
 
 
 
 
 
 
 
82be472
ea1807a
 
 
 
82be472
b25273d
ee45f56
 
 
 
 
 
 
62557b1
 
ee45f56
 
 
 
 
 
 
 
82be472
ee45f56
 
 
 
 
 
 
 
 
 
 
 
 
62557b1
 
 
 
 
 
 
 
 
 
 
 
 
 
ee45f56
 
 
 
 
 
 
 
 
 
 
 
 
 
82be472
ee45f56
 
62557b1
 
ee45f56
 
 
62557b1
ee45f56
 
 
62557b1
ee45f56
 
 
 
 
 
 
 
 
 
 
 
62557b1
ee45f56
 
62557b1
ee45f56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import logging
import random
import os
#
from datasets import load_dataset
from huggingface_hub import login

try:
    login()
except:
    pass

try:
    iiw_400 = load_dataset('google/imageinwords', token="", trust_remote_code=True, name="IIW-400")
    docci_test = load_dataset('google/imageinwords', token="", trust_remote_code=True, name="DOCCI_Test")
    locnar_eval = load_dataset('google/imageinwords', token="", trust_remote_code=True, name="LocNar_Eval")
    cm_3600 = load_dataset('google/imageinwords', token="", trust_remote_code=True, name="CM_3600")
except Exception as e:
    raise ValueError("could you fetch the datasets with error: %s", e)

_SELECTOR_TO_DATASET = {
    "IIW-400": iiw_400,
    "DOCCI_Test": docci_test,
    "LocNar_Eval": locnar_eval,
    "CM_3600": cm_3600
}


def display_iiw_data_with_slider_change(dataset_type, index):
    dataset_split, image_key, image_url_key = "test", "image/key", "image/url"
    if dataset_type == "LocNar_Eval":
        dataset_split = "validation"
    if dataset_type == "DOCCI_Test":
        image_url_key = "image/thumbnail_url"
        image_key = "image"

    logging.debug(f"SELECTION: {dataset_type} : {dataset_split}: {index}")
    data = _SELECTOR_TO_DATASET[dataset_type][dataset_split][index]
    image_html = f'<img src="{data[image_url_key]}" style="width:100%; max-width:800px; height:auto;">'
    image_key_html = f"<p style='font-size: 10px'>Image Key: {data[image_key]}</p>"

    iiw_text, iiw_p5b_text, ratings = "", "", ""
    if "IIW" in data:
        iiw_text = f"<h2>IIW Human-Authored Descriptions</h2><p style='font-size: 16px'>{data['IIW']}</p>"

    if "IIW-P5B" in data:
        iiw_p5b_text = f"<h2>IIW PaLI-5B Generated Descriptions</h2><p style='font-size: 16px'>{data['IIW-P5B']}</p>"
    
    if 'iiw-human-sxs-iiw-p5b' in data and data['iiw-human-sxs-iiw-p5b'] is not None:
        ratings = "<h2>Ratings</h2>"
        for key, value in data['iiw-human-sxs-iiw-p5b'].items():
            key = key.split("metrics/")[-1]
            emoji = ""
            if key == "Comprehensiveness":
                emoji = "πŸ“š"  # Book
            elif key == "Specificity":
                emoji = "🎯"  # Bullseye
            elif key == "Hallucination":
                emoji = "πŸ‘»"  # Ghost
            elif key == "First few line(s) as tldr":
                emoji = "πŸ”"  # Magnifying Glass Tilted Left
            elif key == "Human Like":
                emoji = "πŸ‘€"  # Bust in Silhouette
            ratings += f"<p style='font-size: 16px'>{emoji} <strong>{key}</strong>: {value}</p>"
    return image_key_html, image_html, iiw_text, iiw_p5b_text, ratings


def display_iiw_data_with_dataset_change(dataset_type, index):
    slider = gr.Slider(minimum=0, maximum=max_index(dataset_type)-1, label="Dataset Size", value=0)
    image_key_html, image_html, iiw_text, iiw_p5b_text, ratings = display_iiw_data_with_slider_change(dataset_type, index=0)
    return slider, image_key_html, image_html, iiw_text, iiw_p5b_text, ratings


def max_index(dataset_type):
    dataset_split = "test"
    if dataset_type == "LocNar_Eval":
        dataset_split = "validation"

    logging.debug(f"SELECTION: {dataset_type} : {dataset_split}")
    dataset_instance =_SELECTOR_TO_DATASET[dataset_type][dataset_split]
    return len(dataset_instance)


with gr.Blocks() as demo:
    gr.Markdown("# ImageInWords: Unlocking Hyper-Detailed Image Descriptions")
    gr.Markdown("Slide across the slider to see various examples across the different IIW datasets.")

    with gr.Row():
        dataset_selector = gr.Radio(["IIW-400", "DOCCI_Test", "LocNar_Eval", "CM_3600"], value="IIW-400", label="IIW Datasets")
        slider, image_key_html, image_html, iiw_text, iiw_p5b_text, ratings = display_iiw_data_with_dataset_change(dataset_selector.value, index=0)

    with gr.Row():
        with gr.Column():
            image_output = gr.HTML(image_html)
        
        with gr.Column():
            image_key_output = gr.HTML(image_key_html)
            if iiw_text:
                iiw_text_output = gr.HTML(iiw_text)
            if iiw_p5b_text:
                iiw_p5b_text_output = gr.HTML(iiw_p5b_text)
            if ratings:
                ratings_output = gr.HTML(ratings)

    slider.change(display_iiw_data_with_slider_change, inputs=[dataset_selector, slider], outputs=[image_key_output, image_output, iiw_text_output, iiw_p5b_text_output, ratings_output])
    dataset_selector.change(display_iiw_data_with_dataset_change, inputs=[dataset_selector, slider], outputs=[slider, image_key_output, image_output, iiw_text_output, iiw_p5b_text_output, ratings_output])

demo.launch(debug=True)