import tensorflow as tf from PIL import Image import json save_directories = { 'general': './images/*', 'google_apps': 'google_apps/*', 'install': 'install/*', 'single': 'single/*', 'web_shopping': 'web_shopping/*', } dataset_directories = { 'general': 'general/*', 'google_apps': 'google_apps/*', 'install': 'install/*', 'single': 'single/*', 'web_shopping': 'web_shopping/*', } def get_episode(dataset): episode = [] episode_id = None for d in dataset: ex = tf.train.Example() ex.ParseFromString(d) ep_id = ex.features.feature['episode_id'].bytes_list.value[0].decode('utf-8') if episode_id is None: episode_id = ep_id episode.append(ex) elif ep_id == episode_id: episode.append(ex) else: break return episode def _decode_image( example, image_height, image_width, image_channels, ): image = tf.io.decode_raw( example.features.feature['image/encoded'].bytes_list.value[0], out_type=tf.uint8, ) height = tf.cast(image_height, tf.int32) width = tf.cast(image_width, tf.int32) n_channels = tf.cast(image_channels, tf.int32) return tf.reshape(image, (height, width, n_channels)) # general_need_files = [] # json_data = json.load(open('./gpt4v_android_general_detailed_caption_bbox.json')) # for item in json_data: # general_need_files.append(item['image'].split('/')[-1]) # json_data = json.load(open('./gpt4v_android_general_QA_bbox.json')) # for item in json_data: # if item['image'].split('/')[-1] in general_need_files: # continue # general_need_files.append(item['image'].split('/')[-1]) # google_apps_need_files = [] # json_data = json.load(open('./gpt4v_android_google_apps_detailed_caption_bbox.json')) # for item in json_data: # google_apps_need_files.append(item['image'].split('/')[-1]) # json_data = json.load(open('./gpt4v_android_google_apps_QA_bbox.json')) # for item in json_data: # if item['image'].split('/')[-1] in google_apps_need_files: # continue # google_apps_need_files.append(item['image'].split('/')[-1]) # install_need_files = [] # json_data = json.load(open('./gpt4v_android_install_detailed_caption_bbox.json')) # for item in json_data: # install_need_files.append(item['image'].split('/')[-1]) # json_data = json.load(open('./gpt4v_android_install_QA_bbox.json')) # for item in json_data: # if item['image'].split('/')[-1] in install_need_files: # continue # google_apps_need_files.append(item['image'].split('/')[-1]) # single_need_files = [] # json_data = json.load(open('./gpt4v_android_single_detailed_caption_bbox.json')) # for item in json_data: # single_need_files.append(item['image'].split('/')[-1]) # json_data = json.load(open('./gpt4v_android_single_QA_bbox.json')) # for item in json_data: # if item['image'].split('/')[-1] in single_need_files: # continue # single_need_files.append(item['image'].split('/')[-1]) # web_shopping_need_files = [] # json_data = json.load(open('./gpt4v_android_web_shopping_detailed_caption_bbox.json')) # for item in json_data: # web_shopping_need_files.append(item['image'].split('/')[-1]) # json_data = json.load(open('./gpt4v_android_web_shopping_QA_bbox.json')) # for item in json_data: # if item['image'].split('/')[-1] in web_shopping_need_files: # continue # web_shopping_need_files.append(item['image'].split('/')[-1]) # need_files = { # 'general': general_need_files, # 'google_apps': google_apps_need_files, # 'install': install_need_files, # 'single': single_need_files, # 'web_shopping': web_shopping_need_files, # } for dataset_name in [ 'web_shopping']: filenames = tf.io.gfile.glob(dataset_directories[dataset_name]) for filename in filenames: raw_dataset = tf.data.TFRecordDataset(filename, compression_type='GZIP').as_numpy_iterator() episode = get_episode(raw_dataset) for i, example in enumerate(episode): image_height = example.features.feature['image/height'].int64_list.value[0] image_width = example.features.feature['image/width'].int64_list.value[0] image_channels = example.features.feature['image/channels'].int64_list.value[0] episode_id = example.features.feature['episode_id'].bytes_list.value[0].decode('utf-8') image = _decode_image(example, image_height, image_width, image_channels) pil_img = tf.keras.utils.array_to_img(image) if 'step_id' in example.features.feature: step_id = example.features.feature['step_id'].int64_list.value[0] pil_img.save(f'./images/{dataset_name}_{episode_id}_{step_id}.png') print('saving ', f'./images/{dataset_name}_{episode_id}_{step_id}.png') else: pil_img.save(f'./images/{dataset_name}_{episode_id}.png') print('saving ', f'./images/{dataset_name}_{episode_id}.png') print('Done.')