import json import os def reformat_json(input_file, output_file): with open(input_file, 'r') as f: data = json.load(f) new_data = [] for item in data: new_item = { "image_id": item["filename"], "sentences": [" ".join(sentence["tokens"]) for sentence in item["sentences"]], } new_data.append(new_item) with open(output_file, 'wt') as f: json.dump(new_data, f, indent=4) if __name__ == "__main__": splits = ["train", "val", "test"] for split in splits: input_file = f"./{split}.json" output_file = f"./reformat_{split}.json" reformat_json(input_file, output_file) # Reformat test with open("./reformat_test.json", 'r') as f: data = json.load(f) new_data = {item["image_id"]: item["sentences"] for item in data} with open("./reformat_test_all.json", "wt") as f: json.dump(new_data, f)