| import pandas as pd | |
| import json | |
| import os | |
| # 读取 CSV | |
| csv_path = 'datasets/stage3/video_instruct_data.csv' | |
| df = pd.read_csv(csv_path) | |
| json_data = [] | |
| # 遍历每一行 | |
| for index, row in df.iterrows(): | |
| # 获取视频ID | |
| vid = str(row['video_id']).strip() | |
| # 获取问题和答案 | |
| question = str(row['q']).strip() | |
| answer = str(row['a']).strip() | |
| # 【关键修改】这里改回代码喜欢的 "q" 和 "a" | |
| entry = { | |
| "video_id": vid, | |
| "q": question, # 之前写的是 "instruction",现在改回 "q" | |
| "a": answer, # 之前写的是 "answer",现在改回 "a" | |
| "length": 100 | |
| } | |
| json_data.append(entry) | |
| # 覆盖保存为 JSON | |
| output_path = 'datasets/stage3/video_instruct_data.json' | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(json_data, f, indent=4) | |
| print(f"转换完成!已重新生成符合代码要求的 JSON。") |