import json # 分类规则 RULES = [ (1, [ 'data/output_GRPO_correctdata', 'output_correct_pause_500_1000', 'output_correct', 'output_correct_pause_500_1000', ]), (2, [ 'output_GRPO_overlap_800', 'output_2000_3000_wrongpause', ]), (3, [ 'output_GRPO_silence_500', 'output_wrong_laugh', ]), (4, [ 'output_GRPO-texterror_800', 'output_text_error_dialog_10002000', ]), ] def classify(audio_url): for solution, substrings in RULES: for sub in substrings: if sub in audio_url: return solution return None # 如果没有匹配到 def main(): json_path = 'merged_test_list.json' output_path = 'merged_test_list_classified.json' with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) for item in data: audio_url = item.get('audio_url', '') solution = classify(audio_url) if solution is not None: item['solution'] = solution with open(output_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) if __name__ == '__main__': main()