import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
from natsort import natsorted

if __name__ == '__main__':
    root_path = '/data/work2-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap'
    # splits = ['trainval', 'test-dev']
    # we only do val evaluation
    # annotation_path = os.path.join(root_path, f'2017/{splits[0]}/Annotations/480p')
    # image_path = os.path.join(root_path, f'2017/{splits[0]}/JPEGImages/480p')
    #
    # set_path = os.path.join(root_path, f'2017/{splits[0]}/ImageSets/2017/val.txt')
    save_path = os.path.join(root_path, 'egoexo_val_psalm.json')

    # val_set = []
    # with open(set_path, 'r') as f:
    #     for line in f:
    #         val_set.append(line.strip())
    #修改
    split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/SegSwap/data/split.json"
    with open(split_path, "r") as fp:
        data_split = json.load(fp)
    # val_set = data_split["val"]
    val_set = ["51fc36b3-e769-4617-b087-3826b280cad3"]
    new_img_id = 0
    egoexo_dataset = []

    '''
    build_DAVIS.py的代码逻辑是先处理每个视频的第一帧，第一帧中的unique_instances、高宽等信息用于该视频下后续的每一帧。
    注意，unique_instances代表的是第一帧下像素的所有类别信息，如果该视频下后续的帧中有像素的类别不在unique_instances中，会报错
    '''



    for val_name in tqdm(val_set):
        #不同视角下两个相机的总路径
        vid_root_path = os.path.join(root_path, val_name)
        anno_path = os.path.join(vid_root_path, "annotation.json")
        with open(anno_path, 'r') as fp:
            annotations = json.load(fp)
        #取出本take下的所有物体
        objs = list(annotations["masks"].keys())
        print(len(objs))
        print(f"objs:{objs}")
        #将物体名称映射为id "cook":1 从1开始，区别于背景
        #TODO看看这个要不要修改为以obj_ref中的物体为准
        coco_id_to_cont_id = {coco_id: cont_id+1 for cont_id, coco_id in enumerate(objs)}

        #区分相机
        valid_cams = os.listdir(vid_root_path)
        #这一行必须加
        valid_cams.remove("annotation.json")

        #给相机排序，方便取出01开头的相机，因为序号小的相机对应的物体更多
        valid_cams = natsorted(valid_cams)
        print(valid_cams)


        ego_cams = []
        exo_cams = []
        for vc in valid_cams:
            if 'aria' in vc:
                ego_cams.append(vc)
            else:
                exo_cams.append(vc)

        ego = ego_cams[0]
        exo = exo_cams[0]
        # print(ego, exo)

        #ego、exo相机路径
        vid_ego_path = os.path.join(vid_root_path, ego)
        vid_exo_path = os.path.join(vid_root_path, exo)

        # first_frame_annotation_path = os.path.join(anno_path, sorted(os.listdir(anno_path))[0])
        # first_frame_annotation_relpath = os.path.relpath(first_frame_annotation_path, root_path)

        #setting为ego->exo，所以ego作为第一帧，即visual prompt
        #取出第一帧ego图像的id
        #获取帧的索引时，不能简单地通过os.listdir来获取，因为路径下有的图片是没有标注的，需要以注释文件的索引为准
        #路径下图片的索引和注释文件中的索引的关系：图片名称里的subsample_idx是包含annotations里的idx的 即有的图片是没有对应的注释的，所以会出现索引报错
        ego_frames = natsorted(os.listdir(vid_ego_path))
        ego_frames = [int(f.split(".")[0]) for f in ego_frames]
        print(f"vid_exo_path:{vid_exo_path}")
        exo_frames = natsorted(os.listdir(vid_exo_path))
        # exo_frames = [int(f.split(".")[0]) for f in exo_frames]
        #exo_frames是字符串形式 be like ['1' '2' '3']
        exo_frames = [f.split(".")[0] for f in exo_frames]


        #先选出两个摄像机下都出现的物体作为总的物体范围,然后再判断在该摄像机视角下每一帧中出现了哪些物体
        objs_both_have = []
        for obj in objs:
            if ego in annotations["masks"][obj].keys() and exo in annotations["masks"][obj].keys():
                objs_both_have.append(obj)

        # 获取ego注释文件中的所有索引，用于后续和exo的交叉
        # 取所有ego obj annotated_frames最长的作为基准帧数
        # 后续对exo的操作以基准帧为核心，而不是以物体为核心
        # 取ego视角下出现时间最长的物体对应的所有注释帧，作为基准帧
        obj_ref = objs_both_have[0]
        for obj in objs_both_have:
            if len(list(annotations["masks"][obj_ref][ego].keys())) < len(list(annotations["masks"][obj][ego].keys())):
                obj_ref = obj
        ego_anno_frames = natsorted(list(annotations["masks"][obj_ref][ego].keys()))
        # TODO给frames排个序
        frames = natsorted(np.intersect1d(ego_anno_frames, exo_frames))
        print(f"frames:{frames}")

        #查看每一个物体下具体有哪些帧
        # ego_anno_frames = natsorted(list(annotations["masks"][objs[0]][ego].keys()))
        # ego_anno_frames2 = natsorted(list(annotations["masks"][objs[1]][ego].keys()))
        # print(f"ego_anno_frames3:{ego_anno_frames3}")

        #TODO测试一下结果是什么样的，默认最好是字符串




        #获取ego有注释的第一帧作为参考图像
        all_ref_keys = np.asarray(
            natsorted(annotations["masks"][obj_ref][ego])
        ).astype(np.int64)
        #first_anno_key是ego有注释第一张图片的索引
        first_anno_key = str(all_ref_keys[0])
        rgb_name = f"{first_anno_key}.jpg"
        first_frame_img_path = os.path.join(vid_ego_path, rgb_name)
        first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)

        # first_frame_annotation_img = Image.open(first_frame_annotation_path)
        # first_frame_annotation = np.array(first_frame_annotation_img)
        # height, width = first_frame_annotation.shape


       #测试查看subsample_idx和注释文件中的idx索引是否相同
        # id_sort = natsorted(os.listdir(vid_ego_path))
        # all_ref_keys = natsorted(annotations["masks"][objs[0]][ego])
        # json_output_path1 = '/home/yuqian_fu/Projects/PSALM/annotation456.json'
        # json_output_path2 = '/home/yuqian_fu/Projects/PSALM/annotation123.json'
        # with open(json_output_path1, 'w') as json_file:
        #     json.dump(id_sort, json_file)
        # with open(json_output_path2, 'w') as json_file:
        #     json.dump(all_ref_keys, json_file)
        # print(first_frame_img_id, type(first_frame_img_id))
        # print(objs[0])
        # print(ego)


        # 改为通过json文件获取大小
        height, width = annotations["masks"][obj_ref][ego][first_anno_key]["size"]
        # print(annotations["masks"][objs[0]][ego].keys())


        #np.unique存储每一帧中的所有像素类别
        # unique_instances = np.unique(first_frame_annotation)
        # unique_instances = unique_instances[unique_instances != 0]
        coco_format_annotations = []
        # for semi-supervised VOS, we use first frame's GT for input
        # for instance_value in unique_instances:
        #     binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
        #     segmentation = encode(np.asfortranarray(binary_mask))
        #     #可以直接从annotation中取出来
        #     segmentation = {
        #         'counts': segmentation['counts'].decode('ascii'),
        #         'size': segmentation['size'],
        #     }
        #     #area可能得decode搞一下
        #     area = binary_mask.sum().astype(float)
        #     coco_format_annotations.append(
        #         {
        #             'segmentation': segmentation,
        #             'area': area,
        #             'category_id': instance_value.astype(float),
        #         }
        #     )

        #统计每一帧下具体有哪些物体，这里统计的是参考帧ego的
        #追踪的物体范围以ego参考帧中的物体为准，因为你输入的mask不可能超过这个范围
        obj_list_ego = []
        for obj in objs_both_have:
            if first_anno_key in annotations["masks"][obj][ego].keys():
                obj_list_ego.append(obj)
        print(len(obj_list_ego))
        print(obj_list_ego)




        #处理ego帧中的物体mask
        for obj in obj_list_ego:
            # binary_mask = (first_frame_annotation == instance_value).astype(np.uint8)
            #TODO看看segmentation中count和size的顺序影不影响使用
            segmentation = annotations["masks"][obj][ego][first_anno_key]
            # 可以直接从annotation中取出来
            # segmentation = {
            # 'counts': segmentation['counts'].decode('ascii'),
            # 'size': segmentation['size'],
            # }
            # area可能得decode搞一下
            binary_mask = decode(segmentation)
            #TODO检查一下binary mask
            area = binary_mask.sum().astype(float)
            coco_format_annotations.append(
            {
                'segmentation': segmentation,
                'area': area,
                'category_id': float(coco_id_to_cont_id[obj]),
            }
            )

        #检查每个物体对应哪些摄像机，因为并不是每个物体对应所有的摄像机
        # for obj in objs:
        #     cams = list(annotations["masks"][obj].keys())
        #     print(f"{obj}:{cams}")


        #TODO
        for idx in frames[1:]:
            filename = f"{idx}.jpg"
            sample_img_path = os.path.join(vid_exo_path, filename)
            sample_img_relpath = os.path.relpath(sample_img_path, root_path)

            #统计每一exo帧下有哪些物体
            #有两种方式，第一种是统计该帧下在本take所有物体范围objs中出现的物体，可能会出现Found new target not in the first frame的错误
            #第二种方式是统计统计该帧下在参考帧范围obj_list_ego中出现的物体
            obj_list_exo = []
            for obj in obj_list_ego:
                if idx in annotations["masks"][obj][exo].keys():
                    obj_list_exo.append(obj)
            height, width = annotations["masks"][obj_list_exo[0]][exo][idx]["size"]
            image_info = {
                'file_name': sample_img_relpath,
                'height': height,
                'width': width,
            }

            # sample_annotation_path = os.path.join(anno_path, annfilename)
            # sample_annotation = np.array(Image.open(sample_annotation_path))
            # sample_unique_instances = np.unique(sample_annotation)
            # sample_unique_instances = sample_unique_instances[sample_unique_instances != 0]
            anns = []

            #统计本帧下物体对应的id，方便后续根据category_id调整first_frame_anns
            sample_unique_instances = [float(coco_id_to_cont_id[obj]) for obj in obj_list_exo]
            print(f"sample_unique_instances:{sample_unique_instances}")

            for obj in obj_list_exo:
                # binary_mask = (sample_annotation == instance_value).astype(np.uint8)
                assert obj in obj_list_ego, 'Found new target not in the first frame'
                segmentation = annotations["masks"][obj][exo][idx]
                binary_mask = decode(segmentation)
                area = binary_mask.sum().astype(float)
                anns.append(
                    {
                        'segmentation': segmentation,
                        'area': area,
                        'category_id': float(coco_id_to_cont_id[obj]),
                    }
                )

            #deepcopy的目的是，后续要根据本exo帧中物体的数量对参考帧的注释进行调整，防止修改原始注释
            first_frame_anns = copy.deepcopy(coco_format_annotations)
            #考虑本帧物体的数量小于参考帧的情况，仅取出参考帧中本帧有的物体的注释；但是实际情况下，有可能本帧物体的数量会大于参考帧，这时候就需要调整统计本帧下有哪些物体时，总的物体范围
            if len(anns) < len(first_frame_anns):
                first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
            assert len(anns) == len(first_frame_anns)
            sample = {
                'image': sample_img_relpath,
                'image_info': image_info,
                'anns': anns,
                'first_frame_image': first_frame_img_relpath,
                'first_frame_anns': first_frame_anns,
                'new_img_id': new_img_id,
                'video_name': val_name,
            }
            egoexo_dataset.append(sample)
            new_img_id += 1

    with open(save_path, 'w') as f:
        json.dump(egoexo_dataset, f)
    print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')