File size: 3,307 Bytes
8bc3305 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | import os
import json
import cv2
import lmdb
import yaml
from PIL import Image
import io
import numpy as np
def file_to_binary(file_path):
"""convert to binary"""
if file_path.endswith('.npy'):
data = np.load(file_path)
file_binary = data.tobytes()
else:
with open(file_path, 'rb') as f:
file_binary = f.read()
return file_binary
def create_lmdb_dataset(source_folder, lmdb_path, dataset_name, map_size):
"""create LMDB dataset"""
# open LMDB file,create dataset
db = lmdb.open(lmdb_path, map_size=map_size)
with db.begin(write=True) as txn:
for root, dirs, files in os.walk(source_folder,followlinks=True):
print(root)
if 'video' in root:
continue
for file in files:
print(file)
image_path = os.path.join(root, file)
#
relative_path = f"{dataset_name}/" + os.path.relpath(image_path, source_folder)
print("relative_path:", relative_path)
key = relative_path.encode('utf-8')
# txn.delete(key)
# relative_path = f"{dataset_name}\\original_sequences" + os.path.relpath(image_path, source_folder)
# key = relative_path.encode('utf-8')
print("image_path:", image_path)
value = file_to_binary(image_path)
# write dataset
txn.put(key, value)
db.close()
def read_lmdb(lmdb_dir_path):
# validate the key and value in the generated LMDB
env = lmdb.open(lmdb_dir_path)
idx = '%09d' % 5
with env.begin(write=False) as txn:
# key for validation
key='npy_test\\000_003\\000.npy'
binary = txn.get(key.encode())
data = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
# image_buf = np.frombuffer(image_bin, dtype=np.uint8)
# img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
# image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# Usage example
import argparse
# Create the ArgumentParser object
parser = argparse.ArgumentParser(description='Process some inputs.')
# Add the --dataset_size argument
parser.add_argument('--dataset_size', type=int, default=25, required=True,
help='lmdb requires pre-specifying the total dataset size (GB)')
# Parse the arguments
args = parser.parse_args()
if __name__ == '__main__':
# from config.yaml load parameters
yaml_path = './config_DFo.yaml'
# open the yaml file
try:
with open(yaml_path, 'r') as f:
config = yaml.safe_load(f)
except yaml.parser.ParserError as e:
print("YAML file parsing error:", e)
config=config['to_lmdb']
dataset_name = config['dataset_name']['default']
dataset_size = args.dataset_size
dataset_root_path = config['dataset_root_path']['default']
output_lmdb_dir =config['output_lmdb_dir']['default']
os.makedirs(output_lmdb_dir,exist_ok=True)
dataset_dir_path = f"{dataset_root_path}/{dataset_name}"
lmdb_path=f"{output_lmdb_dir}/{dataset_name}_lmdb"
create_lmdb_dataset(dataset_dir_path, lmdb_path, dataset_name,map_size=int(dataset_size) * 1024 * 1024 * 1024)
#read_lmdb(lmdb_path)
|