Desm0nt
/

TestFinetunes

Model card Files Files and versions

Desm0nt commited on Jun 28, 2024

Commit

c4edf1f

·

verified ·

1 Parent(s): 0380c20

Upload convert.py

Files changed (1) hide show

convert.py +41 -0

convert.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import json
+folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
+base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
+tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
+json_data = []
+id_counter = 0
+for filename in os.listdir(folder_path):
+    if filename.endswith(".jpg"):
+        image_name = os.path.splitext(filename)[0]
+        image_path = os.path.join(folder_path, filename)
+        txt_path = os.path.join(folder_path, f"{image_name}.txt")
+        if os.path.exists(txt_path):
+            with open(txt_path, "r") as f:
+                txt_content = f.read()
+            tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
+            if os.path.exists(tags_path):
+                with open(tags_path, "r") as f:
+                    tags_content = f.read().strip()
+                prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
+            else:
+                prompt = "<ImageHere> Make a caption that describe this image"
+            json_object = {
+                "id": str(id_counter),
+                "image": [image_path],
+                "conversations": [
+                    {"from": "user", "value": prompt},
+                    {"from": "assistant", "value": txt_content}
+                ]
+            }
+            json_data.append(json_object)
+            id_counter += 1
+with open(os.path.join(base_folder, "output.json"), "w") as f:
+    json.dump(json_data, f, indent=4)