In [1]:
import hydra
import os
import src.arguments
import tqdm
import pickle

  from .autonotebook import tqdm as notebook_tqdm


[2023-09-27 12:23:21,998] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
# config_name = "data/vg-grit-local"
config_name = "data/vg-densecap-local"
with hydra.initialize(version_base=None, config_path="../../src/conf"):
    cfg = hydra.compose(config_name=config_name)

train_dataset_no_image = hydra.utils.instantiate(cfg.data, split="train", with_image=False)
eval_dataset_no_image = hydra.utils.instantiate(cfg.data, split="test", with_image=False)
train_dataset = hydra.utils.instantiate(cfg.data, split="train")  # 10 it/s, needs 2.5h
eval_dataset = hydra.utils.instantiate(cfg.data, split="test")  # 10 it/s, needs 2.5h

Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)
Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)
Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)
Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)


In [3]:
def build_image_id_to_dataset_id(dataset):
    image_id_to_dataset_id = {}
    for idx, sample in enumerate(tqdm.tqdm(dataset)):
        image_id = sample["image_id"]
        image_id_to_dataset_id[image_id] = idx
    return image_id_to_dataset_id

In [4]:
tmp_dir = "tmp/data"
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir, exist_ok=True)

config_name_ = os.path.basename(config_name)
plk_train_image_id_to_dataset = os.path.join(tmp_dir, f"{config_name_}.train_image_id_to_dataset.pkl")
plk_eval_image_id_to_dataset = os.path.join(tmp_dir, f"{config_name_}.eval_image_id_to_dataset.pkl")

if os.path.exists(plk_train_image_id_to_dataset):
    with open(plk_train_image_id_to_dataset, "rb") as f:
        train_image_id_to_dataset_id = pickle.load(f)
else:
    train_image_id_to_dataset_id = build_image_id_to_dataset_id(train_dataset_no_image)
    with open(plk_train_image_id_to_dataset, "wb") as f:
        pickle.dump(train_image_id_to_dataset_id, f)

if os.path.exists(plk_eval_image_id_to_dataset):
    with open(plk_eval_image_id_to_dataset, "rb") as f:
        eval_image_id_to_dataset_id = pickle.load(f)
else:
    eval_image_id_to_dataset_id = build_image_id_to_dataset_id(eval_dataset_no_image)
    with open(plk_eval_image_id_to_dataset, "wb") as f:
        pickle.dump(eval_image_id_to_dataset_id, f)

In [5]:
import gradio as gr
import pandas as pd
import pprint

def show_image_based_on_image_id(image_id, region_id):
    image_id = int(image_id)
    if region_id == "":
        region_id = None
    else:
        region_id = int(region_id)

    if train_image_id_to_dataset_id.get(image_id, None) is None:
        image_id_to_dataset_id = eval_image_id_to_dataset_id
        dataset = eval_dataset
    else:
        image_id_to_dataset_id = train_image_id_to_dataset_id
        dataset = train_dataset

    sample = dataset[image_id_to_dataset_id[image_id]]

    image=sample.pop("image")
    regions = sample.pop("regions")
    df = pd.DataFrame(regions)

    if region_id is None:
        region = df.iloc[0]
    else:
        region = df[df["region_id"] == region_id].iloc[0]

    phrases = region["phrases"]
    phrase = phrases[0]
    x = region["x"]
    y = region["y"]
    width = region["width"]
    height = region["height"]
    bbox = (x, y, x+width, y+height)

    text = pprint.pformat(sample)
    

    return text, (image, [(bbox, phrase)]), df

demo = gr.Interface(
    fn=show_image_based_on_image_id,
    inputs=["text", "text"],
    outputs=["text", "annotatedimage", "dataframe"])

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [6]:
import pandas as pd
import pprint
df = pd.DataFrame(eval_dataset[0]["regions"])
sample = eval_dataset[0]
sample.pop("regions")
sample.pop("image")
print(pprint.pformat(sample))

{'coco_url': 'https://cs.stanford.edu/people/rak248/VG_100K/2342728.jpg',
 'file_name': 'VG_100K/2342728.jpg',
 'height': 500,
 'image_id': 2342728,
 'task_type': 'caption',
 'width': 333}


In [7]:
df[df["region_id"] == 3491943].iloc[0]["region_id"]

3491943