File size: 1,959 Bytes
625a17f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import json
import random
json_path = "/work/yuqian_fu/Ego/data_segswap/egoexo_val_framelevel_newprompt_all_instruction.json"
save_path = "/work/yuqian_fu/Ego/data_segswap/check_text_byname_600_select_scene.json"
filter_byname_path = "/work/yuqian_fu/Ego/filter_takes_byname.json"
#names = ["basketball", "bike", "cooking", "health", "music", "soccer"]
names = ["basketball", "bike", "music", "soccer"]
with open(json_path, "r") as fp:
datas = json.load(fp)
with open(filter_byname_path, "r") as fp:
take_names = json.load(fp)
result = {}
# soccer只有两个take,针对soccer之外的场景,每个场景随机选取5个takes,每个take下随机抽取20个sample
for name in names[:-2]:
data_save = []
result[name] = {}
takes_ids = take_names[name]
takes_ids = random.sample(takes_ids, 10)
for take in takes_ids:
data_tmp = []
for data in datas:
if data["video_name"] == take:
data_tmp.append(data)
# data_tmp = random.sample(data_tmp, 20)
data_save += data_tmp
data_save = random.sample(data_save, 200)
result[name] = data_save
# soccer的每个take下随机抽取50个sample
data_soccer = []
result["soccer"] = {}
takes_ids = take_names["soccer"]
for take in takes_ids:
data_tmp = []
for data in datas:
if data["video_name"] == take:
data_tmp.append(data)
data_tmp = random.sample(data_tmp, 50)
data_soccer += data_tmp
result["soccer"] = data_soccer
data_music = []
result["music"] = {}
takes_ids = take_names["music"]
takes_ids = random.sample(takes_ids, 5)
for take in takes_ids:
data_tmp = []
for data in datas:
if data["video_name"] == take:
data_tmp.append(data)
data_music += data_tmp
data_music = random.sample(data_music, 100)
result["music"] = data_music
for name in names:
print(name, len(result[name]))
with open(save_path, "w") as fp:
json.dump(result,fp) |