{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/anaconda/envs/sca-v2/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2023-09-27 12:23:21,998] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] } ], "source": [ "import hydra\n", "import os\n", "import src.arguments\n", "import tqdm\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-d21508b8e9fe7010/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n", "Found cached dataset visual_genome-densecap-local (/home/v-xiaokhuang/segment-caption-anything-v2/.data.cache/visual_genome-densecap-local/densecap-92dcf1a55c11eb80/0.0.0/5af7ab7884b0ff8c43a600fd7b27650836642710744ca83173c50ecc337b944d)\n" ] } ], "source": [ "# config_name = \"data/vg-grit-local\"\n", "config_name = \"data/vg-densecap-local\"\n", "with hydra.initialize(version_base=None, config_path=\"../../src/conf\"):\n", " cfg = hydra.compose(config_name=config_name)\n", "\n", "train_dataset_no_image = hydra.utils.instantiate(cfg.data, split=\"train\", with_image=False)\n", "eval_dataset_no_image = hydra.utils.instantiate(cfg.data, split=\"test\", with_image=False)\n", "train_dataset = hydra.utils.instantiate(cfg.data, split=\"train\") # 10 it/s, needs 2.5h\n", "eval_dataset = hydra.utils.instantiate(cfg.data, split=\"test\") # 10 it/s, needs 2.5h" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def build_image_id_to_dataset_id(dataset):\n", " image_id_to_dataset_id = {}\n", " for idx, sample in enumerate(tqdm.tqdm(dataset)):\n", " image_id = sample[\"image_id\"]\n", " image_id_to_dataset_id[image_id] = idx\n", " return image_id_to_dataset_id" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tmp_dir = \"tmp/data\"\n", "if not os.path.exists(tmp_dir):\n", " os.makedirs(tmp_dir, exist_ok=True)\n", "\n", "config_name_ = os.path.basename(config_name)\n", "plk_train_image_id_to_dataset = os.path.join(tmp_dir, f\"{config_name_}.train_image_id_to_dataset.pkl\")\n", "plk_eval_image_id_to_dataset = os.path.join(tmp_dir, f\"{config_name_}.eval_image_id_to_dataset.pkl\")\n", "\n", "if os.path.exists(plk_train_image_id_to_dataset):\n", " with open(plk_train_image_id_to_dataset, \"rb\") as f:\n", " train_image_id_to_dataset_id = pickle.load(f)\n", "else:\n", " train_image_id_to_dataset_id = build_image_id_to_dataset_id(train_dataset_no_image)\n", " with open(plk_train_image_id_to_dataset, \"wb\") as f:\n", " pickle.dump(train_image_id_to_dataset_id, f)\n", "\n", "if os.path.exists(plk_eval_image_id_to_dataset):\n", " with open(plk_eval_image_id_to_dataset, \"rb\") as f:\n", " eval_image_id_to_dataset_id = pickle.load(f)\n", "else:\n", " eval_image_id_to_dataset_id = build_image_id_to_dataset_id(eval_dataset_no_image)\n", " with open(plk_eval_image_id_to_dataset, \"wb\") as f:\n", " pickle.dump(eval_image_id_to_dataset_id, f)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "