File size: 3,307 Bytes

8bc3305

import os
import json
import cv2
import lmdb
import yaml
from PIL import Image
import io
import numpy as np
def file_to_binary(file_path):
    """convert to binary"""
    if file_path.endswith('.npy'):
        data = np.load(file_path)
        file_binary = data.tobytes()
    else:
        with open(file_path, 'rb') as f:
            file_binary = f.read()
    return file_binary


def create_lmdb_dataset(source_folder, lmdb_path, dataset_name, map_size):
    """create LMDB dataset"""
    # open LMDB file，create dataset
    db = lmdb.open(lmdb_path, map_size=map_size)
    with db.begin(write=True) as txn:
        
        for root, dirs, files in os.walk(source_folder,followlinks=True):
            print(root)
            if 'video' in root:
                continue
            for file in files:
                print(file)
                image_path = os.path.join(root, file)
                # 
                relative_path = f"{dataset_name}/" + os.path.relpath(image_path, source_folder)
                print("relative_path:", relative_path)
                key = relative_path.encode('utf-8')
                # txn.delete(key)
                # relative_path = f"{dataset_name}\\original_sequences" + os.path.relpath(image_path, source_folder)
                # key = relative_path.encode('utf-8')
                print("image_path:", image_path)
                value = file_to_binary(image_path)

                # write dataset
                txn.put(key, value)


        
    db.close()
 

def read_lmdb(lmdb_dir_path):
    # validate the key and value in the generated LMDB
    env = lmdb.open(lmdb_dir_path)

    idx = '%09d' % 5
    with env.begin(write=False) as txn:
        # key for validation
        key='npy_test\\000_003\\000.npy'
        binary = txn.get(key.encode())
        data = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))

        # image_buf = np.frombuffer(image_bin, dtype=np.uint8)
        # img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
        # image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))


# Usage example
import argparse
# Create the ArgumentParser object
parser = argparse.ArgumentParser(description='Process some inputs.')

# Add the --dataset_size argument
parser.add_argument('--dataset_size', type=int, default=25, required=True,
                    help='lmdb requires pre-specifying the total dataset size (GB)')

# Parse the arguments
args = parser.parse_args()

if __name__ == '__main__':
    # from config.yaml load parameters
    yaml_path = './config_DFo.yaml'
    # open the yaml file
    try:
        with open(yaml_path, 'r') as f:
            config = yaml.safe_load(f)
    except yaml.parser.ParserError as e:
        print("YAML file parsing error:", e)

    config=config['to_lmdb']
    dataset_name = config['dataset_name']['default']
    dataset_size = args.dataset_size
    dataset_root_path = config['dataset_root_path']['default']
    output_lmdb_dir =config['output_lmdb_dir']['default']
    os.makedirs(output_lmdb_dir,exist_ok=True)
    
    dataset_dir_path = f"{dataset_root_path}/{dataset_name}"
    lmdb_path=f"{output_lmdb_dir}/{dataset_name}_lmdb"
    create_lmdb_dataset(dataset_dir_path, lmdb_path, dataset_name,map_size=int(dataset_size) * 1024 * 1024 * 1024)
    #read_lmdb(lmdb_path)