{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/anaconda/envs/sca-v2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2023-09-27 12:23:21,998] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] } ], "source": [ "import hydra\n", "import os\n", "import src.arguments\n", "import tqdm\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n" ] } ], "source": [ "# config_name = \"data/vg-grit-local\"\n", "config_name = \"data/vg-densecap-local\"\n", "with hydra.initialize(version_base=None, config_path=\"../../src/conf\"):\n", " cfg = hydra.compose(config_name=config_name)\n", "\n", "train_dataset_no_image = hydra.utils.instantiate(cfg.data, split=\"train\", with_image=False)\n", "eval_dataset_no_image = hydra.utils.instantiate(cfg.data, split=\"test\", with_image=False)\n", "train_dataset = hydra.utils.instantiate(cfg.data, split=\"train\") # 10 it/s, needs 2.5h\n", "eval_dataset = hydra.utils.instantiate(cfg.data, split=\"test\") # 10 it/s, needs 2.5h" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def build_image_id_to_dataset_id(dataset):\n", " image_id_to_dataset_id = {}\n", " for idx, sample in enumerate(tqdm.tqdm(dataset)):\n", " image_id = sample[\"image_id\"]\n", " image_id_to_dataset_id[image_id] = idx\n", " return image_id_to_dataset_id" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tmp_dir = \"tmp/data\"\n", "if not os.path.exists(tmp_dir):\n", " os.makedirs(tmp_dir, exist_ok=True)\n", "\n", "config_name_ = os.path.basename(config_name)\n", "plk_train_image_id_to_dataset = os.path.join(tmp_dir, f\"{config_name_}.train_image_id_to_dataset.pkl\")\n", "plk_eval_image_id_to_dataset = os.path.join(tmp_dir, f\"{config_name_}.eval_image_id_to_dataset.pkl\")\n", "\n", "if os.path.exists(plk_train_image_id_to_dataset):\n", " with open(plk_train_image_id_to_dataset, \"rb\") as f:\n", " train_image_id_to_dataset_id = pickle.load(f)\n", "else:\n", " train_image_id_to_dataset_id = build_image_id_to_dataset_id(train_dataset_no_image)\n", " with open(plk_train_image_id_to_dataset, \"wb\") as f:\n", " pickle.dump(train_image_id_to_dataset_id, f)\n", "\n", "if os.path.exists(plk_eval_image_id_to_dataset):\n", " with open(plk_eval_image_id_to_dataset, \"rb\") as f:\n", " eval_image_id_to_dataset_id = pickle.load(f)\n", "else:\n", " eval_image_id_to_dataset_id = build_image_id_to_dataset_id(eval_dataset_no_image)\n", " with open(plk_eval_image_id_to_dataset, \"wb\") as f:\n", " pickle.dump(eval_image_id_to_dataset_id, f)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "import pandas as pd\n", "import pprint\n", "\n", "def show_image_based_on_image_id(image_id, region_id):\n", " image_id = int(image_id)\n", " if region_id == \"\":\n", " region_id = None\n", " else:\n", " region_id = int(region_id)\n", "\n", " if train_image_id_to_dataset_id.get(image_id, None) is None:\n", " image_id_to_dataset_id = eval_image_id_to_dataset_id\n", " dataset = eval_dataset\n", " else:\n", " image_id_to_dataset_id = train_image_id_to_dataset_id\n", " dataset = train_dataset\n", "\n", " sample = dataset[image_id_to_dataset_id[image_id]]\n", "\n", " image=sample.pop(\"image\")\n", " regions = sample.pop(\"regions\")\n", " df = pd.DataFrame(regions)\n", "\n", " if region_id is None:\n", " region = df.iloc[0]\n", " else:\n", " region = df[df[\"region_id\"] == region_id].iloc[0]\n", "\n", " phrases = region[\"phrases\"]\n", " phrase = phrases[0]\n", " x = region[\"x\"]\n", " y = region[\"y\"]\n", " width = region[\"width\"]\n", " height = region[\"height\"]\n", " bbox = (x, y, x+width, y+height)\n", "\n", " text = pprint.pformat(sample)\n", " \n", "\n", " return text, (image, [(bbox, phrase)]), df\n", "\n", "demo = gr.Interface(\n", " fn=show_image_based_on_image_id,\n", " inputs=[\"text\", \"text\"],\n", " outputs=[\"text\", \"annotatedimage\", \"dataframe\"])\n", "\n", "demo.launch()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'coco_url': 'https://cs.stanford.edu/people/rak248/VG_100K/2342728.jpg',\n", " 'file_name': 'VG_100K/2342728.jpg',\n", " 'height': 500,\n", " 'image_id': 2342728,\n", " 'task_type': 'caption',\n", " 'width': 333}\n" ] } ], "source": [ "import pandas as pd\n", "import pprint\n", "df = pd.DataFrame(eval_dataset[0][\"regions\"])\n", "sample = eval_dataset[0]\n", "sample.pop(\"regions\")\n", "sample.pop(\"image\")\n", "print(pprint.pformat(sample))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3491943" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df[\"region_id\"] == 3491943].iloc[0][\"region_id\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "sca-v2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }