File size: 1,959 Bytes
625a17f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import json
import random

json_path = "/work/yuqian_fu/Ego/data_segswap/egoexo_val_framelevel_newprompt_all_instruction.json"
save_path = "/work/yuqian_fu/Ego/data_segswap/check_text_byname_600_select_scene.json"
filter_byname_path = "/work/yuqian_fu/Ego/filter_takes_byname.json" 
#names = ["basketball", "bike", "cooking", "health", "music", "soccer"]
names = ["basketball", "bike", "music", "soccer"]


with open(json_path, "r") as fp:
    datas = json.load(fp)

with open(filter_byname_path, "r") as fp:
    take_names = json.load(fp)

result = {}

# soccer只有两个take,针对soccer之外的场景,每个场景随机选取5个takes,每个take下随机抽取20个sample
for name in names[:-2]:
    data_save = []
    result[name] = {}
    takes_ids = take_names[name]
    takes_ids = random.sample(takes_ids, 10)
    for take in takes_ids:
        data_tmp = []
        for data in datas:
            if data["video_name"] == take:
                data_tmp.append(data)
        # data_tmp = random.sample(data_tmp, 20)
        data_save += data_tmp
    data_save = random.sample(data_save, 200)
    result[name] = data_save


# soccer的每个take下随机抽取50个sample
data_soccer = []
result["soccer"] = {}
takes_ids = take_names["soccer"]
for take in takes_ids:
    data_tmp = []
    for data in datas:
        if data["video_name"] == take:
            data_tmp.append(data)
    data_tmp = random.sample(data_tmp, 50)
    data_soccer += data_tmp
result["soccer"] = data_soccer

data_music = []
result["music"] = {}
takes_ids = take_names["music"]
takes_ids = random.sample(takes_ids, 5)
for take in takes_ids:
    data_tmp = []
    for data in datas:
        if data["video_name"] == take:
            data_tmp.append(data)
    data_music += data_tmp
data_music = random.sample(data_music, 100)
result["music"] = data_music

for name in names:
    print(name, len(result[name]))

with open(save_path, "w") as fp:
    json.dump(result,fp)