import json import random json_path = "/work/yuqian_fu/Ego/data_segswap/egoexo_val_framelevel_newprompt_all_instruction.json" save_path = "/work/yuqian_fu/Ego/data_segswap/check_text_byname_600_select_scene.json" filter_byname_path = "/work/yuqian_fu/Ego/filter_takes_byname.json" #names = ["basketball", "bike", "cooking", "health", "music", "soccer"] names = ["basketball", "bike", "music", "soccer"] with open(json_path, "r") as fp: datas = json.load(fp) with open(filter_byname_path, "r") as fp: take_names = json.load(fp) result = {} # soccer只有两个take,针对soccer之外的场景,每个场景随机选取5个takes,每个take下随机抽取20个sample for name in names[:-2]: data_save = [] result[name] = {} takes_ids = take_names[name] takes_ids = random.sample(takes_ids, 10) for take in takes_ids: data_tmp = [] for data in datas: if data["video_name"] == take: data_tmp.append(data) # data_tmp = random.sample(data_tmp, 20) data_save += data_tmp data_save = random.sample(data_save, 200) result[name] = data_save # soccer的每个take下随机抽取50个sample data_soccer = [] result["soccer"] = {} takes_ids = take_names["soccer"] for take in takes_ids: data_tmp = [] for data in datas: if data["video_name"] == take: data_tmp.append(data) data_tmp = random.sample(data_tmp, 50) data_soccer += data_tmp result["soccer"] = data_soccer data_music = [] result["music"] = {} takes_ids = take_names["music"] takes_ids = random.sample(takes_ids, 5) for take in takes_ids: data_tmp = [] for data in datas: if data["video_name"] == take: data_tmp.append(data) data_music += data_tmp data_music = random.sample(data_music, 100) result["music"] = data_music for name in names: print(name, len(result[name])) with open(save_path, "w") as fp: json.dump(result,fp)