Spaces:

DHPR
/

Evaluation_Server

Runtime error

App Files Files Community

DHPR commited on Jun 11, 2023

Commit

f638d9c

1 Parent(s): df3ffa8

Upload 25 files

Browse files

Files changed (26) hide show

.gitattributes +5 -0
app.py +235 -0
config/__init__.py +0 -0
config/examples.py +16 -0
config/experiment.yaml +41 -0
config/with_decoder.yaml +60 -0
create_result_script.py +531 -0
data/chatgpt_similarity_score_test_direct.json +0 -0
data/chatgpt_similarity_score_test_indirect.json +0 -0
data/eval_test_image.json +0 -0
data/eval_test_text.json +0 -0
data/images/.DS_Store +0 -0
data/key_pair.json +0 -0
data/preview_image.jpeg +0 -0
data/random_sample_test_direct_ids.json +3 -0
data/random_sample_test_indirect_ids.json +3 -0
models/__init__.py +0 -0
models/__pycache__/__init__.cpython-38.pyc +0 -0
models/__pycache__/fused_model.cpython-38.pyc +0 -0
models/fused_model.py +544 -0
models/model.py +117 -0
results/config.yaml +42 -0
results/model_epoch3.pth +3 -0
results/results_pair_dict.json +3 -0
results_pair_dict1.json +3 -0
results_pair_dict2.json +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/random_sample_test_direct_ids.json filter=lfs diff=lfs merge=lfs -text
+data/random_sample_test_indirect_ids.json filter=lfs diff=lfs merge=lfs -text
+results_pair_dict1.json filter=lfs diff=lfs merge=lfs -text
+results_pair_dict2.json filter=lfs diff=lfs merge=lfs -text
+results/results_pair_dict.json filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,235 @@

+# %%
+from PIL import Image
+import torch
+import torchvision
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+import json
+from collections import defaultdict
+import numpy as np
+import matplotlib.pyplot as plt
+import random
+from os import listdir
+from os.path import isfile, join
+from torchvision.io import read_image
+from torchvision.utils import draw_bounding_boxes
+from PIL import Image
+import os
+from scipy.stats import rankdata
+import tqdm
+import streamlit as st
+import pandas as pd
+# %%
+def load_json(PATH):
+    if os.path.isfile(PATH) and os.access(PATH, os.R_OK):
+        with open(PATH) as json_file:
+            dict_data = json.load(json_file)
+    else:
+        print("The Path of", PATH,"is not exist")
+        dict_data = {}
+    return dict_data
+def get_list_folder(PATH):
+    return [name for name in os.listdir(PATH) if os.path.isdir(os.path.join(PATH, name))]
+def get_file_only(PATH):
+    return [f for f in os.listdir(PATH) if os.path.isfile(os.path.join(PATH, f))]
+# %%
+def compute_ndcg(ranks, scores, k=3):
+    """
+    ranks = [5, 1, 4, 2, 3]
+    scores = [0.1, 0.5, 0.3, 0.95, 1.0]
+    """
+    rank_score_tuple = list(zip(ranks, scores))
+    top_k = sorted(rank_score_tuple, key=lambda x: x[1], reverse=True)[:k]
+    dcg = sum([score / np.log2(rank + 1) for rank, score in top_k])
+    ideal_dcg = sum([score / np.log2(idx + 2) for idx, (_, score) in enumerate(top_k)])
+    ndcg = dcg / ideal_dcg
+    return ndcg
+def compute_ndcg_score_per_mode(pred_rank_dict, gpt_rel_scores, random_sample_dict, mode='indrect', split='test', k=200):
+    ndcg_scores = []
+    for key in tqdm.tqdm(pred_rank_dict.keys(), total=len(pred_rank_dict.keys())):
+        gpt_scores_for_key = [gpt_rel_scores[key][cand_key] if cand_key in gpt_rel_scores[key] else 0.0  for cand_key in random_sample_dict[key]]
+        pred_rank_for_key = pred_rank_dict[key]
+        ndcg_score = compute_ndcg(pred_rank_for_key, gpt_scores_for_key, k=k)
+        ndcg_scores.append(ndcg_score)
+    avg_ndcg_score = sum(ndcg_scores) / len(ndcg_scores)
+    print(f"Random split, mode={mode} ndcg score: ", avg_ndcg_score)
+    return avg_ndcg_score
+# %%
+def get_score_direct(random_sample_pair_test_direct, predictions, key_pair, similarity_score_test_direct, k = 200):
+    mode = 'direct'
+    i2t_ranks = []
+    t2i_ranks = []
+    i2t_rank_dict = {}
+    results_dict = {}
+    key_pair_reversed = {v: k for k, v in key_pair.items()}
+    for file_key in tqdm.tqdm(random_sample_pair_test_direct.keys(), total=len(random_sample_pair_test_direct.keys())):
+        i2t_rank = rankdata([predictions[str(file_key)+':'+str(key_pair[k])] for k in random_sample_pair_test_direct[file_key]])
+        t2i_rank = rankdata([predictions[str(key_pair_reversed[key_pair[k]])+':'+str(key_pair[file_key])] for k in random_sample_pair_test_direct[file_key]])
+        i2t_ranks.append(i2t_rank[-1])
+        t2i_ranks.append(t2i_rank[-1])
+        i2t_rank_dict[file_key] = i2t_rank
+    assert len(i2t_ranks) == len(t2i_ranks) == 1000
+    ndcg_score = compute_ndcg_score_per_mode(i2t_rank_dict, similarity_score_test_direct, random_sample_pair_test_direct, mode='indirect', split='test', k=200)
+    results_dict['direct'] = {}
+    results_dict['direct']['i2t rank'] = float(sum(i2t_ranks) / len(i2t_ranks))
+    results_dict['direct']['t2i rank'] = float(sum(t2i_ranks) / len(t2i_ranks))
+    results_dict['direct']['ndcg score'] = float(ndcg_score)
+    print(f"Random split, mode={mode} i2t rank: ", sum(i2t_ranks) / len(i2t_ranks))
+    print(f"Random split, mode={mode} t2i rank: ", sum(t2i_ranks) / len(t2i_ranks))
+    return results_dict
+# %%
+def get_score_indirect(random_sample_pair_test_indirect, predictions, key_pair, similarity_score_test_indirect, k = 200):
+    mode = 'indirect'
+    i2t_ranks = []
+    t2i_ranks = []
+    i2t_rank_dict = {}
+    results_dict = {}
+    key_pair_reversed = {v: k for k, v in key_pair.items()}
+    for file_key in tqdm.tqdm(random_sample_pair_test_indirect.keys(), total=len(random_sample_pair_test_indirect.keys())):
+        i2t_rank = rankdata([predictions[str(file_key)+':'+str(key_pair[k])] for k in random_sample_pair_test_indirect[file_key]])
+        t2i_rank = rankdata([predictions[str(key_pair_reversed[key_pair[k]])+':'+str(key_pair[file_key])] for k in random_sample_pair_test_indirect[file_key]])
+        i2t_ranks.append(i2t_rank[-1])
+        t2i_ranks.append(t2i_rank[-1])
+        i2t_rank_dict[file_key] = i2t_rank
+    assert len(i2t_ranks) == len(t2i_ranks) == 1000
+    ndcg_score = compute_ndcg_score_per_mode(i2t_rank_dict, similarity_score_test_indirect, random_sample_pair_test_indirect, mode='indrect', split='test', k=200)
+    results_dict['indirect'] = {}
+    results_dict['indirect']['i2t rank'] = float(sum(i2t_ranks) / len(i2t_ranks))
+    results_dict['indirect']['t2i rank'] = float(sum(t2i_ranks) / len(t2i_ranks))
+    results_dict['indirect']['ndcg score'] = float(ndcg_score)
+    print(f"Random split, mode={mode} i2t rank: ", sum(i2t_ranks) / len(i2t_ranks))
+    print(f"Random split, mode={mode} t2i rank: ", sum(t2i_ranks) / len(t2i_ranks))
+    return results_dict
+# %%
+def main(json_file):
+    ### Setup
+    # os.environ['ROOT'] = os.path.dirname(os.path.realpath(__file__))
+    # %%
+    ### Load data
+    # if os.path.isfile(os.path.join(os.environ['ROOT'], json_file)): #'results_pair_dict.json')):
+    #     predictions_file_path = os.path.join(os.environ['ROOT'],  json_file) #'results_pair_dict.json')
+    # else:
+    #     predictions_file_path = os.path.join(os.environ['ROOT'], json_file) #'data/results_pair_dict.json')
+    # with open(predictions_file_path) as f:
+    #     predictions = json.load(f)
+    predictions = json_file
+    with open(os.path.join(os.environ['ROOT'], 'data/key_pair.json')) as f:
+        key_pair = json.load(f)
+    # key_pair_reversed = {v: k for k, v in key_pair.items()}
+    with open(os.path.join(os.environ['ROOT'], 'data/random_sample_test_direct_ids.json')) as f:
+        random_sample_pair_test_direct = json.load(f)
+    with open(os.path.join(os.environ['ROOT'], 'data/random_sample_test_indirect_ids.json')) as f:
+        random_sample_pair_test_indirect = json.load(f)
+    with open(os.path.join(os.environ['ROOT'], 'data/chatgpt_similarity_score_test_direct.json')) as f:
+        similarity_score_test_direct = json.load(f)
+    with open(os.path.join(os.environ['ROOT'], 'data/chatgpt_similarity_score_test_indirect.json')) as f:
+        similarity_score_test_indirect = json.load(f)
+    # %%
+    ### Compute scores
+    result_direct = get_score_direct(random_sample_pair_test_direct, predictions, key_pair, similarity_score_test_direct, k = 200)
+    result_indirect = get_score_indirect(random_sample_pair_test_indirect, predictions, key_pair, similarity_score_test_indirect, k = 200)
+    result_dict = {**result_direct, **result_indirect}
+    return result_dict
+# %%
+if __name__ == '__main__':
+    os.environ['ROOT'] = os.path.dirname(os.path.realpath(__file__))
+    st.title("Evaluation Server for Driving Hazard Prediction and Reasoning ")
+    st.image(os.path.join(os.environ['ROOT'],'data/preview_image.jpeg'))
+    st.divider()
+    result_text = ''
+    result_dict = {}
+    uploaded_files = None
+    json_file = None
+    uploaded_files = st.file_uploader("Upload All Result Files Here (results_pair_dict1.json, results_pair_dict2.json)", type=["csv"], accept_multiple_files=True)
+    if st.button('Run Evaluation with no upload files (using demo files)'):
+        json_file1 = load_json(os.path.join(os.environ['ROOT'], 'results_pair_dict1.json'))
+        json_file2 = load_json(os.path.join(os.environ['ROOT'], 'results_pair_dict2.json'))
+        json_file = {**json_file1, **json_file2}
+    dataframe = pd.DataFrame([])
+    if uploaded_files is not None:
+        for i in range(len(uploaded_files)):
+            dataframe = pd.concat([dataframe, pd.read_csv(uploaded_files[i])])
+        result = dataframe.to_dict('tight')['data']
+        json_file = {}
+        for i in range(len(result)):
+            json_file[str(result[i][1])] = float(result[i][2])
+    if len(json_file) >= 1:
+        result_dict = main(json_file)
+        result_text = json.dumps(result_dict)
+    st.download_button('Download Results', result_text)
+    st.json(result_dict)
+    # !streamlit run app.py --server.fileWatcherType none
+    # if st.button('Load Results File1 from local instead'):
+    #     json_file1_path = os.path.join(os.environ['ROOT'], 'results_pair_dict1.json')
+    #     json_file1 = load_json(json_file1_path)
+    #     st.write(json_file1)
+    # if uploaded_files is not None:
+    #     with open(uploaded_file1) as jf:
+    #         json_file1 = json.load(jf)
+    #     json_file1 = load_json(uploaded_file1)
+    #     uploaded1 = True
+    # uploaded_file2 = st.file_uploader("Upload Results File2")
+    # if st.button('Load Results File2 from local instead'):
+    #     json_file2_path = os.path.join(os.environ['ROOT'], 'results_pair_dict2.json')
+    #     json_file2 = load_json(json_file2_path)
+    #     st.write(json_file2)
+    # if uploaded_file2 is not None:
+    #     with open(uploaded_file2) as jf:
+    #         json_file2 = json.load(jf)
+    #     uploaded2 = True
+    # # if uploaded1 and uploaded2:
+    # json_file = {**json_file1, **json_file2}
+# %%

config/__init__.py ADDED Viewed

File without changes

config/examples.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import sys
+import json
+from omegaconf import OmegaConf
+from hydra.core.global_hydra import GlobalHydra
+from hydra import initialize, initialize_config_module, initialize_config_dir, compose
+os.environ['ROOT'] = "/home/quang/workspace/traffic_var"
+os.environ['DATA_ROOT'] = "/home/quang/datasets/traffic_var"
+# initialize hydra config
+GlobalHydra.instance().clear()
+initialize(config_path="./")
+with_decoder_config = compose(config_name='with_decoder.yaml', overrides=["clip_model=ViT-B/16", "rationale_type=0", "val_rationale_type=0"])
+original_config = compose(config_name='experiment.yaml', overrides=["clip_model=ViT-B/16", "rationale_type=0", "val_rationale_type=0"])

config/experiment.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+# config.yaml
+exp_name: exp1-2
+wandb: 90788c79e1500570b08e5acf283e17df7e0c54b2
+root: "${oc.env:DATA_ROOT}"
+overfit: False
+batch_size: 4
+num_workers: 4
+img_size: 224
+rationale_type: 0 # 0 - only rationale, 1 - randomly add entity desc, 2 - add entity desc
+val_rationale_type: 0
+hide_true_bbox: 8 # clues and inferences selected randomly
+widescreen_processing: 1 # 0 - no widescreen, 1 - widescreen
+h_flip: False
+ema_decay: 0.9999
+clip_model: 'ViT-B/16' # 'RN101' 'RN50x4''RN50x16' 'RN50x64' 'ViT-L/14@336px' 'ViT-B/32'
+num_layers: 3
+dim_hidden: 512
+warmup: 1000
+init_from: ''
+lr: .00001
+n_epochs: 15
+save_every: 0
+early_stop: 5
+val_stat: 'loss'
+device: 'cuda'
+use_multi: False
+local_rank: 0
+hydra:
+  run:
+    dir: ./results/${exp_name}
+  output_subdir: ./ # directory for saving the yaml configs
+  job:
+    config:
+      override_dirname:
+        exclude_keys:
+          - exp.name

config/with_decoder.yaml ADDED Viewed

	@@ -0,0 +1,60 @@

+# config.yaml
+exp_name: exp1-2
+wandb: 90788c79e1500570b08e5acf283e17df7e0c54b2
+root: "${oc.env:DATA_ROOT}"
+overfit: False
+batch_size: 4
+num_workers: 4
+img_size: 224
+rationale_type: 0 # 0 - only rationale, 1 - randomly add entity desc, 2 - add entity desc
+val_rationale_type: 0
+hide_true_bbox: 8 # clues and inferences selected randomly
+widescreen_processing: 1 # 0 - no widescreen, 1 - widescreen
+h_flip: False
+ema_decay: 0.9999
+aux_weight: 0.2
+no_hard_negative_itm: False
+clip_model: 'ViT-B/16' # 'RN101' 'RN50x4''RN50x16' 'RN50x64' 'ViT-L/14@336px' 'ViT-B/32'
+has_extra_txt_decoder: False
+has_extra_img_decoder: False
+has_extra_mix_decoder: False
+has_extra_gen_decoder: False
+extra_decoder:
+  is_decoder: True
+  vocab_size: 1000
+  d_ff: 512
+  d_kv: 64
+  d_model: 512
+  dropout_rate: 0.1
+  num_heads: 8
+  num_layers: 2
+  # eos_token_id: 1
+  # pad_token_id: 0
+  # decoder_start_token_id: 0
+  # n_positions: 512
+  relative_attention_max_distance: 128
+  relative_attention_num_buckets: 32
+warmup: 1000
+init_from: ''
+lr: .00001
+n_epochs: 15
+save_every: 0
+early_stop: 5
+val_stat: 'loss'
+device: 'cuda'
+use_multi: False
+local_rank: 0
+hydra:
+  run:
+    dir: ./results/${exp_name}
+  output_subdir: ./ # directory for saving the yaml configs
+  job:
+    config:
+      override_dirname:
+        exclude_keys:
+          - exp.name

create_result_script.py ADDED Viewed

	@@ -0,0 +1,531 @@

+# %%
+import json
+import sys
+import pickle
+sys.path.append("../")
+import collections
+from models.fused_model import Model
+import os
+import tqdm
+import time
+import json
+import random
+from PIL import ImageFile
+from PIL import Image, ImageDraw
+import clip
+import torch
+import numpy as np
+import torchvision.transforms as T
+import torchvision.transforms.functional as F
+from pathlib import Path
+import pandas as pd
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+# %%
+from types import SimpleNamespace
+# get config
+import os
+from omegaconf import OmegaConf
+from hydra.core.global_hydra import GlobalHydra
+from hydra import initialize, initialize_config_module, initialize_config_dir, compose
+os.environ['ROOT'] = os.path.dirname(os.path.realpath(__file__))
+os.environ['DATA_ROOT'] = os.path.join(os.environ['ROOT'], 'data')
+# initialize hydra config
+GlobalHydra.instance().clear()
+initialize(config_path="./config")
+config = compose(config_name='with_decoder.yaml',
+                 overrides=["clip_model=ViT-L/14@336px",
+                            "rationale_type=0", "val_rationale_type=0"])
+class SquarePad:
+    def __call__(self, image):
+        max_wh = max(image.size)
+        p_left, p_top = [(max_wh - s) // 2 for s in image.size]
+        p_right, p_bottom = [max_wh - (s + pad) for s, pad in zip(image.size, [p_left, p_top])]
+        padding = (p_left, p_top, p_right, p_bottom)
+        return F.pad(image, padding, 0, 'constant')
+class VarDatasetForAuxEncoders:
+    def __init__(self, config, file_path, split="train", mode="combined", do_swap=False, tensorize=True, do_crop=True):
+        self.config = config
+        self.mode = mode
+        self.split = split
+        self.do_swap = do_swap
+        self.rationale_type = config.rationale_type if split == "train" else config.val_rationale_type
+        self.root_path = Path(config.root)
+        self.anno_path = file_path #self.root_path / f'annotations/13_05/anno_{split}_{mode}.json'
+        if split == "test" and mode == "combined" and config.overfit:
+            self.anno_path = self.root_path / f'annotations/13_05/anno_{split}_{mode}_overfit.json'
+        self.data = json.load(open(self.anno_path))
+        self.idx2name = list(self.data.keys())
+        if 'bounding_box' in self.data[list(self.data.keys())[0]]['details'][-1]:
+            self.one_ent_keys = [k for k, v in self.data.items() if len(v['details'][-1]["bounding_box"]) == 1]
+            self.two_ent_keys = [k for k, v in self.data.items() if len(v['details'][-1]["bounding_box"]) == 2]
+            self.three_ent_keys = [k for k, v in self.data.items() if len(v['details'][-1]["bounding_box"]) == 3]
+            self.all_ent_keys = self.one_ent_keys + self.two_ent_keys + self.three_ent_keys
+            self.keys = {1: self.one_ent_keys, 2: self.two_ent_keys, 3: self.three_ent_keys}
+        if self.config.widescreen_processing in [0, 1]:
+            self.resize_crop = self.get_transform(config.img_size, split == "train", padding=False)
+        else:
+            self.resize_crop = self.get_transform(config.img_size, split == "train", padding=True)
+        self.tensorize = tensorize
+        self.jitter_transform = T.ColorJitter(brightness=.5, hue=.3, saturation=.3) if split == "train" else lambda x: x
+        self.final_transform = T.Compose([
+            lambda image: image.convert("RGB"),
+            T.ToTensor() if tensorize else lambda x: x,
+            T.Normalize(
+                (0.48145466, 0.4578275, 0.40821073),
+                (0.26862954, 0.26130258, 0.27577711),
+            ) if tensorize else lambda x: x
+        ])
+    def get_transform(self, n_px, training, padding=False):
+        resize = T.Resize((n_px + 16, n_px + 16), interpolation=Image.BICUBIC)
+        # for traning split
+        if training and not padding:  # train
+            return T.Compose([resize, T.RandomCrop(n_px)])
+        if training and padding:  # train_pad
+            return T.Compose([SquarePad(), resize, T.RandomCrop(n_px)])
+        # for test and val split
+        if not training and not padding:  # test
+            return T.Compose([resize, T.CenterCrop(n_px)])
+        if not training and padding:  # test_pad
+            return T.Compose([SquarePad(), resize, T.CenterCrop(n_px)])
+    def key2img_path(self, key):
+        file_paths = [
+            self.root_path / f"var_images/{key}.jpg",
+            self.root_path / f"var_images/{key}.png",
+            self.root_path / f"images/{key}.jpg",
+            self.root_path / f"img/train/{key.split('_')[0]}/{key}.png",
+            self.root_path / f"img/val/{key.split('_')[0]}/{key}.png",
+            self.root_path / f"img/test/{key.split('_')[0]}/{key}.png",
+            self.root_path / f"img/{key}.png",
+            self.root_path / f"img/{key}.jpg",
+            self.root_path / f"images/{key}.png",
+            self.root_path / f"images/{key}.jpg",
+        ]
+        for file_path in file_paths:
+            if file_path.exists():
+                return file_path
+    def key2img(self, key):
+        file_path = self.key2img_path(key)
+        return Image.open(file_path)
+    def hide_region(self, image, bboxes):
+        image = image.convert('RGBA')
+        if self.config.hide_true_bbox == 1:  # hide mode
+            draw = ImageDraw.Draw(image, 'RGBA')
+        if self.config.hide_true_bbox in [2, 5, 7, 8, 9]:  #highlight mode
+            overlay = Image.new('RGBA', image.size, '#00000000')
+            draw = ImageDraw.Draw(overlay, 'RGBA')
+        if self.config.hide_true_bbox == 3 or self.config.hide_true_bbox == 6:  #blackout mode or position only mode
+            overlay = Image.new('RGBA', image.size, '#7B7575ff')
+            draw = ImageDraw.Draw(overlay, 'RGBA')
+        color_fill_list = ['#ff05cd3c', '#00F1E83c', '#F2D4003c']  # Green, Blue, Yellow?
+        for idx, bbox in enumerate(bboxes):
+            if bbox == None:
+                continue
+            color_fill = color_fill_list[idx]
+            x, y = bbox['left'], bbox['top']
+            if self.config.hide_true_bbox == 1:  # hide mode
+                draw.rectangle([(x, y), (x + bbox['width'], y + bbox['height'])], fill='#7B7575')
+            elif self.config.hide_true_bbox in [2, 5, 7, 8, 9]:  # highlight mode
+                draw.rectangle([(x, y), (x + bbox['width'], y + bbox['height'])], fill=color_fill, outline='#05ff37ff',
+                               width=3)  # Fill with Pink 60% ##00F1E8
+            elif self.config.hide_true_bbox == 3:  # blackout mode
+                draw.rectangle([(x, y), (x + bbox['width'], y + bbox['height'])], fill='#00000000')
+            elif self.config.hide_true_bbox == 6:  # position only mode
+                draw.rectangle([(x, y), (x + bbox['width'], y + bbox['height'])], fill=color_fill)
+        if self.config.hide_true_bbox in [2, 3, 5, 6, 7, 8, 9]:
+            image = Image.alpha_composite(image, overlay)
+        return image
+    def get_entity_codes(self):
+        entity_codes = [0, 1, 2]
+        if self.do_swap:
+            random.shuffle(entity_codes)
+        return entity_codes
+    def swap_entities(self, bboxes, text, entity_codes):
+        # text
+        for entity_idx, entity_code in enumerate(entity_codes):
+            text = text.replace(f"Entity #{entity_idx + 1}", f"Entity #{entity_code + 1}")
+        # bboxes: [1, 0, 2] -> [b[1], b[0], b[2]]
+        new_boxes = [bboxes[entity_code] for entity_code in entity_codes]
+        return new_boxes, text
+    def get_text_from_meta(self, meta):
+        n_boxes = len(meta['bounding_box'])  # key ['1', '2', '3']
+        # for rationale
+        text = 'Rationale: ' + str(meta['rationale'])
+        if self.rationale_type == 1 or self.rationale_type == 2:
+            for box_idx in range(n_boxes):
+                ent_name = f'Entity #{box_idx + 1}'
+                ent_desc = f'{ent_name}, {meta[ent_name]}'
+                # todo: replace randomly
+                text = text.replace(ent_name, ent_desc, 1)
+        return text
+    def get_itm_text(self, ori_file_key):
+        file_key = ori_file_key
+        if random.random() < 0.5:
+            n_boxes = len(self.data[file_key]['details'][-1]['bounding_box'])
+            file_key = random.choice(self.keys[n_boxes])
+            if self.config.get('no_hard_negative_itm', False):
+                file_key = random.choice(self.all_ent_keys)
+        itm_label = 1 if file_key == ori_file_key else 0
+        meta = self.data[file_key]['details'][-1]
+        itm_text = self.get_text_from_meta(meta)
+        return itm_text, itm_label
+    def get_bboxes_and_text(self, file_key, meta):
+        text = self.get_text_from_meta(meta)
+        bboxes = [meta['bounding_box'].get(str(box_idx + 1), None) for box_idx in range(3)]
+        entity_codes = self.get_entity_codes()
+        bboxes, text = self.swap_entities(bboxes, text, entity_codes)
+        itm_text, itm_label = self.get_itm_text(file_key)
+        _, itm_text = self.swap_entities([None, None, None], itm_text, entity_codes)
+        return {'bboxes': bboxes, 'text': text, 'itm_text': itm_text, 'itm_label': itm_label}
+    def get_image(self, file_key, bboxes):
+        image = self.key2img(file_key)
+        image = self.jitter_transform(image)
+        image = self.hide_region(image, bboxes)
+        image = self.final_transform(self.resize_crop(image))
+        return image
+    def __getitem__(self, idx):
+        file_key = self.idx2name[idx]
+        # Select the last version of label of the sample
+        meta = self.data[file_key]['details'][-1]
+        # read bboxes and rationale
+        outputs = self.get_bboxes_and_text(file_key, meta)
+        text = clip.tokenize(outputs['text'], truncate=True).squeeze()
+        itm_text = clip.tokenize(outputs['itm_text'], truncate=True).squeeze()
+        itm_label = torch.tensor(outputs['itm_label'])
+        image = self.get_image(file_key, outputs['bboxes'])
+        return {'image': image, 'caption': text, 'raw_text': text, 'file_key': file_key, 'itm_text': itm_text, 'itm_label': itm_label}
+    def __len__(self):
+        if self.config.overfit and not (self.split == 'test' and self.mode == 'combined'):
+            return 16
+        return len(self.data)
+# %%
+class VarDatasetImageOnly(VarDatasetForAuxEncoders):
+    def __init__(self, args, file_path, split="val", mode="combined", do_swap= False):
+        super().__init__(args, file_path, split=split, mode=mode, do_swap=do_swap)
+    def __getitem__(self, idx):
+        file_key = self.idx2name[idx]
+        meta = self.data[file_key]['details'][-1]
+        bboxes = [meta['bounding_box'].get(str(box_idx + 1), None) for box_idx in range(3)]
+        entity_codes = self.get_entity_codes()
+        bboxes = [bboxes[entity_code] for entity_code in entity_codes]
+        image = self.get_image(file_key, bboxes)
+        return {'image': image, 'file_key': file_key}
+# %%
+class VarDatasetTextOnly(VarDatasetForAuxEncoders):
+    def __init__(self, args, file_path, split="val", mode="combined", do_swap= False):
+        super().__init__(args, file_path, split=split, mode=mode, do_swap=do_swap)
+    def __getitem__(self, idx):
+        file_key = self.idx2name[idx]
+        meta = self.data[file_key]['details'][-1]
+        # text = self.get_text_from_meta(meta)
+        if 'Entity #3' in meta['hazard']:
+            n_boxes = 3
+        elif 'Entity #2' in meta['hazard']:
+            n_boxes = 2
+        else:
+            n_boxes = 1
+        # for rationale
+        text = 'Rationale: ' + str(meta['hazard'])
+        if self.rationale_type == 1 or self.rationale_type == 2:
+            for box_idx in range(n_boxes):
+                ent_name = f'Entity #{box_idx + 1}'
+                ent_desc = f'{ent_name}, {meta[ent_name]}'
+                # todo: replace randomly
+                text = text.replace(ent_name, ent_desc, 1)
+        entity_codes = self.get_entity_codes()
+        for entity_idx, entity_code in enumerate(entity_codes):
+            text = text.replace(f"Entity #{entity_idx + 1}", f"Entity #{entity_code + 1}")
+        text = clip.tokenize(text, truncate=True).squeeze()
+        return {'caption': text,'file_key': file_key}
+# %%
+import os
+import sys
+sys.path.append('..')
+import json
+import fire
+import tqdm
+import clip
+import torch
+import sklearn
+import numpy as np
+from omegaconf import OmegaConf
+from models.fused_model import Model
+from torch.utils.data import DataLoader
+# from datasets import VarDatasetForAuxEncoders
+from scipy.stats import rankdata
+from sklearn.metrics import ndcg_score
+from sklearn.metrics import pairwise_distances
+# def get_data_loader(config, split="test", mode="combined", do_swap=False):
+#     dataset = VarDatasetForAuxEncoders(config, split=split, mode=mode, do_swap=do_swap)
+#     return DataLoader(dataset, batch_size=4, shuffle=False)
+def get_image_data_loader(config, file_path, split="test", mode="combined", do_swap=False):
+    dataset = VarDatasetImageOnly(config, file_path,   split=split, mode=mode, do_swap=do_swap)
+    return DataLoader(dataset, batch_size=4, shuffle=False)
+def get_text_data_loader(config, file_path,  split="test", mode="combined", do_swap=False):
+    dataset = VarDatasetTextOnly(config, file_path, split=split, mode=mode, do_swap=do_swap)
+    return DataLoader(dataset, batch_size=4, shuffle=False)
+# def get_data_loader(config, split="test", mode="combined", do_swap=False):
+#     dataset = VarDatasetForAuxEncoders(config, split=split, mode=mode, do_swap=do_swap)
+#     return DataLoader(dataset, batch_size=4, shuffle=False)
+def compute_rand_rank(split='test', mode='spec', img_token_dict={}, txt_token_dict={}):  # the dicts contain all 2000 test samples
+    data = json.load(open( os.path.join(os.environ['ROOT'], f"data/annotations/13_05/anno_random_{split}_{mode}_ids.json")))
+    i2t_ranks = []
+    t2i_ranks = []
+    i2t_rank_dict = {}
+    t2i_rank_dict = {}
+    for file_key in data.keys():
+        img_emb = (img_token_dict[file_key]).unsqueeze(0)
+        txt_emb = (txt_token_dict[file_key]).unsqueeze(0)
+        txt_embs = torch.stack([txt_token_dict[k] for k in data[file_key]])
+        img_embs = torch.stack([img_token_dict[k] for k in data[file_key]])
+        assert txt_embs.shape[0] == img_embs.shape[0] == 1000
+        i2t_rank = rankdata(pairwise_distances(img_emb, txt_embs, metric='cosine', n_jobs=8), axis=1)[0]
+        t2i_rank = rankdata(pairwise_distances(txt_emb, img_embs, metric='cosine', n_jobs=8), axis=1)[0]
+        i2t_ranks.append(i2t_rank[-1])
+        t2i_ranks.append(t2i_rank[-1])
+        i2t_rank_dict[file_key] = i2t_rank
+        t2i_rank_dict[file_key] = t2i_rank
+    assert len(i2t_ranks) == len(t2i_ranks) == 1000
+    print(f"Random split, mode={mode} i2t rank: ", sum(i2t_ranks) / len(i2t_ranks))
+    print(f"Random split, mode={mode} t2i rank: ", sum(t2i_ranks) / len(t2i_ranks))
+    # for k in i2t_rank_dict.keys():
+    #     print(k, i2t_rank_dict[k])
+    #     print('------------------')
+    #     break
+    return i2t_rank_dict  # for computing the NDCG scores
+def read_relevance_scores(anno_path="anno_random_test_obvi_ids.json", gpt_path="chatgpt_similarity_score_test_direct_combined.json"):
+    gpt_scores = json.load(open(gpt_path))
+    data = json.load(open(anno_path))
+    # add_missing_relevance_scores
+    for k in tqdm.tqdm(data, total=len(data)):
+        cand_keys = data[k]
+        for cand_key in cand_keys:
+            if cand_key not in gpt_scores[k]:
+                gpt_scores[k][cand_key] = 0.0
+            if cand_key == k:
+                gpt_scores[k][cand_key] = 1.0
+    return gpt_scores
+# %%
+def compute_ndcg(ranks, scores, k=3):
+    """
+    ranks = [5, 1, 4, 2, 3]
+    scores = [0.1, 0.5, 0.3, 0.95, 1.0]
+    """
+    rank_score_tuple = list(zip(ranks, scores))
+    top_k = sorted(rank_score_tuple, key=lambda x: x[1], reverse=True)[:k]
+    dcg = sum([score / np.log2(rank + 1) for rank, score in top_k])
+    ideal_dcg = sum([score / np.log2(idx + 2) for idx, (_, score) in enumerate(top_k)])
+    ndcg = dcg / ideal_dcg
+    return ndcg
+def compute_ndcg_score_per_mode(pred_rank_dict, gpt_rel_scores, mode='spec', split='test', k=200):
+    data = json.load(open(os.path.join(os.environ['ROOT'],f"data/annotations/13_05/anno_random_{split}_{mode}_ids.json")))
+    ndcg_scores = []
+    for key in tqdm.tqdm(pred_rank_dict.keys(), total=len(pred_rank_dict.keys())):
+        gpt_scores_for_key = [gpt_rel_scores[key][cand_key] for cand_key in data[key]]
+        pred_rank_for_key = pred_rank_dict[key]
+        ndcg_score = compute_ndcg(pred_rank_for_key, gpt_scores_for_key, k=k)
+        ndcg_scores.append(ndcg_score)
+    avg_ndcg_score = sum(ndcg_scores) / len(ndcg_scores)
+    print(f"Random split, mode={mode} ndcg score: ", avg_ndcg_score)
+    return avg_ndcg_score
+# %%
+def main():
+    # %%
+    ## Load Model
+    config_path= os.path.join(os.environ['ROOT'],"results/config.yaml")
+    model_path= os.path.join(os.environ['ROOT'],"results/model_epoch3.pth")
+    # %%
+    print("Loading config from:", config_path)
+    config = OmegaConf.load(config_path)
+    #print(OmegaConf.to_yaml(config))
+    # %%
+    # load checkpoint
+    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
+    print("Loaded model from:", model_path)
+    clip_model, _ = clip.load(config.clip_model, jit=False)
+    model = Model(clip_model, config)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model = model.to(config.device)
+    model = model.eval()
+    model = model.float()
+    logit_scale = model.clip_model.logit_scale.exp()
+    image_path = os.path.join(os.environ['ROOT'], "data/eval_test_image.json")
+    text_path = os.path.join(os.environ['ROOT'], "data/eval_test_text.json")
+    data_loader_image = get_image_data_loader(config, image_path, split='test', mode='combined' )
+    data_loader_text = get_text_data_loader(config, text_path, split='test', mode='combined' )
+    # %%
+    key_text_dict = {}
+    text_tensor_embedding = None
+    with torch.no_grad():
+        for i, d in tqdm.tqdm(enumerate(data_loader_text), total=len(data_loader_text)):
+            # print("d", d['file_key'])
+            # with torch.amp.autocast(device_type=config.device, dtype=torch.float16):
+            text_tensor_out, text_cls_out = model.var_txt_forward(d['caption'].to(config.device))
+            #print("text_tensor_out", text_tensor_out[0].shape)
+            if text_tensor_embedding == None:
+                text_tensor_embedding = text_cls_out
+            else:
+                text_tensor_embedding = torch.cat((text_tensor_embedding, text_cls_out), 0)
+            for j,key in enumerate(d['file_key']):
+                key_text_dict[key] = int(i*len(d['file_key']) +j)
+    # %%
+    key_image_dict = {}
+    image_tensor_embedding = None
+    with torch.no_grad():
+        for i, d in tqdm.tqdm(enumerate(data_loader_image), total=len(data_loader_image)):
+            image_tensor_out, img_cls_out = model.var_img_forward(d['image'].to(config.device))
+            if image_tensor_embedding == None:
+                image_tensor_embedding = img_cls_out
+            else:
+                image_tensor_embedding = torch.cat((image_tensor_embedding, img_cls_out), 0)
+            for j,key in enumerate(d['file_key']):
+                key_image_dict[key] = int(i*len(d['file_key']) +j)
+    idx2img = {idx: k for idx, k in enumerate(key_image_dict)}
+    idx2text = {idx: k for idx, k in enumerate(key_text_dict)}
+    # %%
+    image_tensor_embedding = image_tensor_embedding.to('cpu')
+    text_tensor_embedding = text_tensor_embedding.to('cpu')
+    # %%
+    similarity_matrix = pairwise_distances(image_tensor_embedding, text_tensor_embedding, metric='cosine', n_jobs=8)
+    # %%
+    results_pair_dict = {}
+    ## put into matrix
+    for i in range (2000):
+        for j in range (2000):
+            results_pair_dict[str(idx2img[i])+':'+str(idx2text[j])] = float(similarity_matrix[i][j])
+    # %%
+    results_pair_dict1 = {}
+    results_pair_dict2 = {}
+    len_ = int(len(results_pair_dict)/2)
+    for j, key in enumerate(results_pair_dict):
+        if j <= len_:
+            results_pair_dict1[key] = results_pair_dict[key]
+        else:
+            results_pair_dict2[key] = results_pair_dict[key]
+    # %%
+    # with open(os.path.join(os.environ['ROOT'],'results_pair_dict1.json'), 'w', encoding='utf-8') as f:
+    #     json.dump(results_pair_dict1, f, ensure_ascii=False, indent=4)
+    # with open(os.path.join(os.environ['ROOT'],'results_pair_dict2.json'), 'w', encoding='utf-8') as f:
+    #     json.dump(results_pair_dict2, f, ensure_ascii=False, indent=4)
+    df = pd.DataFrame(results_pair_dict1.items(), columns=['key_pair','score'])
+    df.to_csv(os.path.join(os.environ['ROOT'],'results_pair_dict1.csv'))
+    df = pd.DataFrame(results_pair_dict2.items(), columns=['key_pair','score'])
+    df.to_csv(os.path.join(os.environ['ROOT'],'results_pair_dict2.csv'))
+# %%
+if __name__ == "__main__":
+    main()
+# %%

data/chatgpt_similarity_score_test_direct.json ADDED Viewed