| | import cv2 |
| | import lmdb |
| | import sys |
| | from multiprocessing import Pool |
| | from os import path as osp |
| | from tqdm import tqdm |
| |
|
| |
|
| | def make_lmdb_from_imgs(data_path, |
| | lmdb_path, |
| | img_path_list, |
| | keys, |
| | batch=5000, |
| | compress_level=1, |
| | multiprocessing_read=False, |
| | n_thread=40, |
| | map_size=None): |
| | """Make lmdb from images. |
| | |
| | Contents of lmdb. The file structure is: |
| | example.lmdb |
| | βββ data.mdb |
| | βββ lock.mdb |
| | βββ meta_info.txt |
| | |
| | The data.mdb and lock.mdb are standard lmdb files and you can refer to |
| | https://lmdb.readthedocs.io/en/release/ for more details. |
| | |
| | The meta_info.txt is a specified txt file to record the meta information |
| | of our datasets. It will be automatically created when preparing |
| | datasets by our provided dataset tools. |
| | Each line in the txt file records 1)image name (with extension), |
| | 2)image shape, and 3)compression level, separated by a white space. |
| | |
| | For example, the meta information could be: |
| | `000_00000000.png (720,1280,3) 1`, which means: |
| | 1) image name (with extension): 000_00000000.png; |
| | 2) image shape: (720,1280,3); |
| | 3) compression level: 1 |
| | |
| | We use the image name without extension as the lmdb key. |
| | |
| | If `multiprocessing_read` is True, it will read all the images to memory |
| | using multiprocessing. Thus, your server needs to have enough memory. |
| | |
| | Args: |
| | data_path (str): Data path for reading images. |
| | lmdb_path (str): Lmdb save path. |
| | img_path_list (str): Image path list. |
| | keys (str): Used for lmdb keys. |
| | batch (int): After processing batch images, lmdb commits. |
| | Default: 5000. |
| | compress_level (int): Compress level when encoding images. Default: 1. |
| | multiprocessing_read (bool): Whether use multiprocessing to read all |
| | the images to memory. Default: False. |
| | n_thread (int): For multiprocessing. |
| | map_size (int | None): Map size for lmdb env. If None, use the |
| | estimated size from images. Default: None |
| | """ |
| |
|
| | assert len(img_path_list) == len(keys), ('img_path_list and keys should have the same length, ' |
| | f'but got {len(img_path_list)} and {len(keys)}') |
| | print(f'Create lmdb for {data_path}, save to {lmdb_path}...') |
| | print(f'Totoal images: {len(img_path_list)}') |
| | if not lmdb_path.endswith('.lmdb'): |
| | raise ValueError("lmdb_path must end with '.lmdb'.") |
| | if osp.exists(lmdb_path): |
| | print(f'Folder {lmdb_path} already exists. Exit.') |
| | sys.exit(1) |
| |
|
| | if multiprocessing_read: |
| | |
| | dataset = {} |
| | shapes = {} |
| | print(f'Read images with multiprocessing, #thread: {n_thread} ...') |
| | pbar = tqdm(total=len(img_path_list), unit='image') |
| |
|
| | def callback(arg): |
| | """get the image data and update pbar.""" |
| | key, dataset[key], shapes[key] = arg |
| | pbar.update(1) |
| | pbar.set_description(f'Read {key}') |
| |
|
| | pool = Pool(n_thread) |
| | for path, key in zip(img_path_list, keys): |
| | pool.apply_async(read_img_worker, args=(osp.join(data_path, path), key, compress_level), callback=callback) |
| | pool.close() |
| | pool.join() |
| | pbar.close() |
| | print(f'Finish reading {len(img_path_list)} images.') |
| |
|
| | |
| | if map_size is None: |
| | |
| | img = cv2.imread(osp.join(data_path, img_path_list[0]), cv2.IMREAD_UNCHANGED) |
| | _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) |
| | data_size_per_img = img_byte.nbytes |
| | print('Data size per image is: ', data_size_per_img) |
| | data_size = data_size_per_img * len(img_path_list) |
| | map_size = data_size * 10 |
| |
|
| | env = lmdb.open(lmdb_path, map_size=map_size) |
| |
|
| | |
| | pbar = tqdm(total=len(img_path_list), unit='chunk') |
| | txn = env.begin(write=True) |
| | txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') |
| | for idx, (path, key) in enumerate(zip(img_path_list, keys)): |
| | pbar.update(1) |
| | pbar.set_description(f'Write {key}') |
| | key_byte = key.encode('ascii') |
| | if multiprocessing_read: |
| | img_byte = dataset[key] |
| | h, w, c = shapes[key] |
| | else: |
| | _, img_byte, img_shape = read_img_worker(osp.join(data_path, path), key, compress_level) |
| | h, w, c = img_shape |
| |
|
| | txn.put(key_byte, img_byte) |
| | |
| | txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') |
| | if idx % batch == 0: |
| | txn.commit() |
| | txn = env.begin(write=True) |
| | pbar.close() |
| | txn.commit() |
| | env.close() |
| | txt_file.close() |
| | print('\nFinish writing lmdb.') |
| |
|
| |
|
| | def read_img_worker(path, key, compress_level): |
| | """Read image worker. |
| | |
| | Args: |
| | path (str): Image path. |
| | key (str): Image key. |
| | compress_level (int): Compress level when encoding images. |
| | |
| | Returns: |
| | str: Image key. |
| | byte: Image byte. |
| | tuple[int]: Image shape. |
| | """ |
| |
|
| | img = cv2.imread(path, cv2.IMREAD_UNCHANGED) |
| | if img.ndim == 2: |
| | h, w = img.shape |
| | c = 1 |
| | else: |
| | h, w, c = img.shape |
| | _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) |
| | return (key, img_byte, (h, w, c)) |
| |
|
| |
|
| | class LmdbMaker(): |
| | """LMDB Maker. |
| | |
| | Args: |
| | lmdb_path (str): Lmdb save path. |
| | map_size (int): Map size for lmdb env. Default: 1024 ** 4, 1TB. |
| | batch (int): After processing batch images, lmdb commits. |
| | Default: 5000. |
| | compress_level (int): Compress level when encoding images. Default: 1. |
| | """ |
| |
|
| | def __init__(self, lmdb_path, map_size=1024**4, batch=5000, compress_level=1): |
| | if not lmdb_path.endswith('.lmdb'): |
| | raise ValueError("lmdb_path must end with '.lmdb'.") |
| | if osp.exists(lmdb_path): |
| | print(f'Folder {lmdb_path} already exists. Exit.') |
| | sys.exit(1) |
| |
|
| | self.lmdb_path = lmdb_path |
| | self.batch = batch |
| | self.compress_level = compress_level |
| | self.env = lmdb.open(lmdb_path, map_size=map_size) |
| | self.txn = self.env.begin(write=True) |
| | self.txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') |
| | self.counter = 0 |
| |
|
| | def put(self, img_byte, key, img_shape): |
| | self.counter += 1 |
| | key_byte = key.encode('ascii') |
| | self.txn.put(key_byte, img_byte) |
| | |
| | h, w, c = img_shape |
| | self.txt_file.write(f'{key}.png ({h},{w},{c}) {self.compress_level}\n') |
| | if self.counter % self.batch == 0: |
| | self.txn.commit() |
| | self.txn = self.env.begin(write=True) |
| |
|
| | def close(self): |
| | self.txn.commit() |
| | self.env.close() |
| | self.txt_file.close() |
| |
|