Upload convert.py
Browse files- convert.py +41 -0
convert.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
|
| 5 |
+
base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
|
| 6 |
+
tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
|
| 7 |
+
json_data = []
|
| 8 |
+
id_counter = 0
|
| 9 |
+
|
| 10 |
+
for filename in os.listdir(folder_path):
|
| 11 |
+
if filename.endswith(".jpg"):
|
| 12 |
+
image_name = os.path.splitext(filename)[0]
|
| 13 |
+
image_path = os.path.join(folder_path, filename)
|
| 14 |
+
txt_path = os.path.join(folder_path, f"{image_name}.txt")
|
| 15 |
+
|
| 16 |
+
if os.path.exists(txt_path):
|
| 17 |
+
with open(txt_path, "r") as f:
|
| 18 |
+
txt_content = f.read()
|
| 19 |
+
|
| 20 |
+
tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
|
| 21 |
+
if os.path.exists(tags_path):
|
| 22 |
+
with open(tags_path, "r") as f:
|
| 23 |
+
tags_content = f.read().strip()
|
| 24 |
+
prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
|
| 25 |
+
else:
|
| 26 |
+
prompt = "<ImageHere> Make a caption that describe this image"
|
| 27 |
+
|
| 28 |
+
json_object = {
|
| 29 |
+
"id": str(id_counter),
|
| 30 |
+
"image": [image_path],
|
| 31 |
+
"conversations": [
|
| 32 |
+
{"from": "user", "value": prompt},
|
| 33 |
+
{"from": "assistant", "value": txt_content}
|
| 34 |
+
]
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
json_data.append(json_object)
|
| 38 |
+
id_counter += 1
|
| 39 |
+
|
| 40 |
+
with open(os.path.join(base_folder, "output.json"), "w") as f:
|
| 41 |
+
json.dump(json_data, f, indent=4)
|