|
|
import json |
|
|
from pathlib import Path |
|
|
|
|
|
def process_json(input_path, output_path, file_stem): |
|
|
|
|
|
with open(input_path, 'r', encoding='utf-8') as f: |
|
|
original_data = json.load(f) |
|
|
|
|
|
processed = [] |
|
|
|
|
|
|
|
|
for index, (key, item) in enumerate(original_data.items()): |
|
|
|
|
|
video_id = key.split('_')[0] |
|
|
media_path = "./" + (Path("data") / file_stem / video_id).as_posix() |
|
|
|
|
|
|
|
|
options = [] |
|
|
for opt_id in ['A', 'B', 'C', 'D']: |
|
|
if text := item.get(f'({opt_id})', ''): |
|
|
options.append({"id": opt_id, "text": text.strip()}) |
|
|
|
|
|
|
|
|
try: |
|
|
answer_num = int(item['Answer index']) |
|
|
answer_ids = [options[answer_num]['id']] if 0 <= answer_num < len(options) else [] |
|
|
except (ValueError, IndexError, KeyError): |
|
|
answer_ids = [] |
|
|
|
|
|
|
|
|
processed.append({ |
|
|
"index": index, |
|
|
"media_type": "Video", |
|
|
"media_paths": media_path, |
|
|
"description": item.get("Category", ""), |
|
|
"task_type": "Vision-Question-Answer", |
|
|
"question": [item.get("Question", "")], |
|
|
"question_type": "multi-choice", |
|
|
"annotations": {}, |
|
|
"options": options, |
|
|
"answer": answer_ids, |
|
|
"source": "4D-Bench", |
|
|
"domain": "Embodied_ai" |
|
|
}) |
|
|
|
|
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(processed, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
input_path = "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/emb_ai/4d/4D_Object_Question_Answering/data/4d_qa.json" |
|
|
output_path = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/4D_Object_Question_Answering.json" |
|
|
file_stem = "4D_Object_Question_Answering" |
|
|
|
|
|
|
|
|
process_json( |
|
|
input_path=input_path, |
|
|
output_path=output_path, |
|
|
file_stem=file_stem |
|
|
) |