jiyatai commited on
Commit
3db4484
·
verified ·
1 Parent(s): 50b146e

Upload ./read.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. read.py +19 -0
read.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import json
3
+ from tqdm import tqdm
4
+
5
+ # capture = load_dataset("/group/40005/public_datasets/DetailCaps-4870")
6
+ capture = load_dataset("parquet", data_files={"test": "/group/40005/public_datasets/DetailCaps-4870/DetailCaps-4870.parquet"})['test']
7
+ print(len(capture))
8
+ save_dir = "/group/40005/auroraji/CAPTURE/samples"
9
+ anno = {}
10
+
11
+ for i, instance in tqdm(enumerate(capture)):
12
+ img_binary = instance['binary']
13
+ anno[i] = [instance['GT_Caption_GPT4V'], instance['GT_Caption_GPT4O'], instance['GT_Caption_Gemini15Pro']]
14
+
15
+ with open(f"{save_dir}/{i}.png", "wb") as f:
16
+ f.write(img_binary)
17
+
18
+ with open("annotations.json", "w") as f:
19
+ json.dump(anno, f, indent=4)