| | |
| | |
| |
|
| | import io |
| | import os |
| | import struct |
| | from PIL import Image |
| |
|
| | class BigFileMemoryLoader(object): |
| | def __load_bigfile(self): |
| | print('start load bigfile (%0.02f GB) into memory' % (os.path.getsize(self.file_path)/1024/1024/1024)) |
| | with open(self.file_path, 'rb') as fid: |
| | self.img_num = struct.unpack('i', fid.read(4))[0] |
| | self.img_names = [] |
| | self.img_bytes = [] |
| | print('find total %d images' % self.img_num) |
| | for i in range(self.img_num): |
| | img_name_len = struct.unpack('i', fid.read(4))[0] |
| | img_name = fid.read(img_name_len).decode('utf-8') |
| | self.img_names.append(img_name) |
| | img_bytes_len = struct.unpack('i', fid.read(4))[0] |
| | self.img_bytes.append(fid.read(img_bytes_len)) |
| | if i % 5000 == 0: |
| | print('load %d images done' % i) |
| | print('load all %d images done' % self.img_num) |
| |
|
| | def __init__(self, file_path): |
| | super(BigFileMemoryLoader, self).__init__() |
| | self.file_path = file_path |
| | self.__load_bigfile() |
| |
|
| | def __getitem__(self, index): |
| | try: |
| | img = Image.open(io.BytesIO(self.img_bytes[index])).convert('RGB') |
| | return self.img_names[index], img |
| | except Exception: |
| | print('Image read error for index %d: %s' % (index, self.img_names[index])) |
| | return self.__getitem__((index+1)%self.img_num) |
| |
|
| |
|
| | def __len__(self): |
| | return self.img_num |
| |
|