|
|
import json |
|
|
|
|
|
|
|
|
RULES = [ |
|
|
(1, [ |
|
|
'data/output_GRPO_correctdata', |
|
|
'output_correct_pause_500_1000', |
|
|
'output_correct', |
|
|
'output_correct_pause_500_1000', |
|
|
]), |
|
|
(2, [ |
|
|
'output_GRPO_overlap_800', |
|
|
'output_2000_3000_wrongpause', |
|
|
]), |
|
|
(3, [ |
|
|
'output_GRPO_silence_500', |
|
|
'output_wrong_laugh', |
|
|
]), |
|
|
(4, [ |
|
|
'output_GRPO-texterror_800', |
|
|
'output_text_error_dialog_10002000', |
|
|
]), |
|
|
] |
|
|
|
|
|
def classify(audio_url): |
|
|
for solution, substrings in RULES: |
|
|
for sub in substrings: |
|
|
if sub in audio_url: |
|
|
return solution |
|
|
return None |
|
|
|
|
|
def main(): |
|
|
json_path = 'merged_test_list.json' |
|
|
output_path = 'merged_test_list_classified.json' |
|
|
with open(json_path, 'r', encoding='utf-8') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
for item in data: |
|
|
audio_url = item.get('audio_url', '') |
|
|
solution = classify(audio_url) |
|
|
if solution is not None: |
|
|
item['solution'] = solution |
|
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(data, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
main() |
|
|
|