jiyatai commited on
Commit
fd9d77f
·
verified ·
1 Parent(s): d0f43fd

Delete read.py

Browse files
Files changed (1) hide show
  1. read.py +0 -19
read.py DELETED
@@ -1,19 +0,0 @@
1
- from datasets import load_dataset
2
- import json
3
- from tqdm import tqdm
4
-
5
- # capture = load_dataset("/group/40005/public_datasets/DetailCaps-4870")
6
- capture = load_dataset("parquet", data_files={"test": "/group/40005/public_datasets/DetailCaps-4870/DetailCaps-4870.parquet"})['test']
7
- print(len(capture))
8
- save_dir = "/group/40005/auroraji/CAPTURE/samples"
9
- anno = {}
10
-
11
- for i, instance in tqdm(enumerate(capture)):
12
- img_binary = instance['binary']
13
- anno[i] = [instance['GT_Caption_GPT4V'], instance['GT_Caption_GPT4O'], instance['GT_Caption_Gemini15Pro']]
14
-
15
- with open(f"{save_dir}/{i}.png", "wb") as f:
16
- f.write(img_binary)
17
-
18
- with open("annotations.json", "w") as f:
19
- json.dump(anno, f, indent=4)