File size: 1,205 Bytes
8613355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import json

# 分类规则
RULES = [
    (1, [
        'data/output_GRPO_correctdata',
        'output_correct_pause_500_1000',
        'output_correct',
        'output_correct_pause_500_1000',
    ]),
    (2, [
        'output_GRPO_overlap_800',
        'output_2000_3000_wrongpause',
    ]),
    (3, [
        'output_GRPO_silence_500',
        'output_wrong_laugh',
    ]),
    (4, [
        'output_GRPO-texterror_800',
        'output_text_error_dialog_10002000',
    ]),
]

def classify(audio_url):
    for solution, substrings in RULES:
        for sub in substrings:
            if sub in audio_url:
                return solution
    return None  # 如果没有匹配到

def main():
    json_path = 'merged_test_list.json'
    output_path = 'merged_test_list_classified.json'
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    for item in data:
        audio_url = item.get('audio_url', '')
        solution = classify(audio_url)
        if solution is not None:
            item['solution'] = solution

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

if __name__ == '__main__':
    main()