MSherbinii's picture
Add IPAD model implementation
0f5deb2 verified
import numpy as np
from collections import OrderedDict
import os
import glob
import cv2
import torch.utils.data as data
import random
from PIL import Image
rng = np.random.RandomState(2020)
def np_load_frame(filename, resize_height, resize_width, grayscale=False):
grayscale = False
"""
Load image path and convert it to numpy.ndarray. Notes that the color channels are BGR and the color space
is normalized from [0, 255] to [-1, 1].
:param filename: the full path of image
:param resize_height: resized height
:param resize_width: resized width
:return: numpy.ndarray
"""
if grayscale:
image_decoded = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
else:
image_decoded = cv2.imread(filename)
image_resized = cv2.resize(image_decoded, (resize_width, resize_height))
# image_resized = np.copy(image_decoded)
image_resized = image_resized.astype(dtype=np.float32)
image_resized = (image_resized / 127.5) - 1.0
return image_resized
class Reconstruction3DDataLoader(data.Dataset):
def __init__(self, video_folder, transform, resize_height, resize_width, num_frames=16,
img_extension='.jpg', dataset='ped2', jump=[2], hold=[2], return_normal_seq=False):
self.dir = video_folder
self.transform = transform
self.videos = OrderedDict()
self._resize_height = resize_height
self._resize_width = resize_width
self._num_frames = num_frames
self.extension = img_extension
self.dataset = dataset
self.setup()
self.samples, self.background_models = self.get_all_samples()
self.jump = jump
self.hold = hold
self.return_normal_seq = return_normal_seq # for fast and slow moving
def setup(self):
videos = glob.glob(os.path.join(self.dir, '*/'))
for video in sorted(videos):
print(video)
video_name = video.split('/')[-2]
self.videos[video_name] = {}
self.videos[video_name]['path'] = video
self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*' + self.extension))
self.videos[video_name]['frame'].sort()
self.videos[video_name]['length'] = len(self.videos[video_name]['frame'])
def get_all_samples(self):
frames = []
background_models = []
videos = glob.glob(os.path.join(self.dir, '*/'))
for video in sorted(videos):
video_name = video.split('/')[-2]
for i in range(len(self.videos[video_name]['frame']) - self._num_frames + 1):
frames.append(self.videos[video_name]['frame'][i])
# background_models.append(bg_filename)
return frames, background_models
def __getitem__(self, index):
# index = 8
video_name = self.samples[index].split('/')[-2]
if self.dataset == 'shanghai' and 'training' in self.samples[index]:
frame_name = int(self.samples[index].split('/')[-1].split('.')[-2]) - 1
else:
frame_name = int(self.samples[index].split('/')[-1].split('.')[-2])
batch = []
for i in range(self._num_frames):
image = np_load_frame(self.videos[video_name]['frame'][frame_name + i], self._resize_height,
self._resize_width, grayscale=True)
if self.transform is not None:
batch.append(self.transform(image))
# batch:len=16 ,batch[0]:torch(3,256,256)
img = OrderedDict()
img['batch'] = np.stack(batch, axis=1)
img['index'] = frame_name*200//len(self.videos[video_name]['frame'])
# return np.stack(batch, axis=1)
return img
def __len__(self):
return len(self.samples)
class Reconstruction3DDataLoaderJump(Reconstruction3DDataLoader):
def __getitem__(self, index):
# index = 8
video_name = self.samples[index].split('/')[-2]
if self.dataset == 'shanghai' and 'training' in self.samples[index]: # bcos my shanghai's start from 1
frame_name = int(self.samples[index].split('/')[-1].split('.')[-2]) - 1
else:
frame_name = int(self.samples[index].split('/')[-1].split('.')[-2])
batch = []
normal_batch = []
jump = random.choice(self.jump)
retry = 0
while len(self.videos[video_name]['frame']) < frame_name + (self._num_frames-1) * jump and retry < 10:
# reselect the frame_name
frame_name = np.random.randint(len(self.videos[video_name]['frame']))
retry += 1
for i in range(self._num_frames):
image = np_load_frame(self.videos[video_name]['frame'][min(frame_name + i*jump, len(self.videos[video_name]['frame'])-1)], self._resize_height,
self._resize_width, grayscale=True)
if self.transform is not None:
batch.append(self.transform(image))
if self.return_normal_seq:
for i in range(self._num_frames):
image = np_load_frame(self.videos[video_name]['frame'][min(frame_name + i, len(self.videos[video_name]['frame'])-1)], self._resize_height,
self._resize_width, grayscale=True)
if self.transform is not None:
normal_batch.append(self.transform(image))
return np.stack(batch, axis=1), np.stack(normal_batch, axis=1)
else:
return np.stack(batch, axis=1), normal_batch