| import os | |
| import json | |
| folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\" | |
| base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\" | |
| tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\" | |
| json_data = [] | |
| id_counter = 0 | |
| for filename in os.listdir(folder_path): | |
| if filename.endswith(".jpg"): | |
| image_name = os.path.splitext(filename)[0] | |
| image_path = os.path.join(folder_path, filename) | |
| txt_path = os.path.join(folder_path, f"{image_name}.txt") | |
| if os.path.exists(txt_path): | |
| with open(txt_path, "r") as f: | |
| txt_content = f.read() | |
| tags_path = os.path.join(tags_folder_path, f"{image_name}.txt") | |
| if os.path.exists(tags_path): | |
| with open(tags_path, "r") as f: | |
| tags_content = f.read().strip() | |
| prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}" | |
| else: | |
| prompt = "<ImageHere> Make a caption that describe this image" | |
| json_object = { | |
| "id": str(id_counter), | |
| "image": [image_path], | |
| "conversations": [ | |
| {"from": "user", "value": prompt}, | |
| {"from": "assistant", "value": txt_content} | |
| ] | |
| } | |
| json_data.append(json_object) | |
| id_counter += 1 | |
| with open(os.path.join(base_folder, "output.json"), "w") as f: | |
| json.dump(json_data, f, indent=4) |