s3_net / scripts /model.py

Upload folder using huggingface_hub

d9c5371 verified 2 days ago

17.2 kB

	#!/usr/bin/env python
	#
	# file: $ISIP_EXP/SOGMP/scripts/model.py
	#
	# revision history: xzt
	# 20220824 (TE): first version
	#
	# usage:
	#
	# This script hold the model architecture
	#------------------------------------------------------------------------------

	# import pytorch modules
	#
	from __future__ import print_function
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	from collections import OrderedDict

	# import modules
	#
	import os
	import random

	# for reproducibility, we seed the rng
	#
	SEED1 = 1337
	NEW_LINE = "\n"

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	#-----------------------------------------------------------------------------
	#
	# helper functions are listed here
	#
	#-----------------------------------------------------------------------------

	# function: set_seed
	#
	# arguments: seed - the seed for all the rng
	#
	# returns: none
	#
	# this method seeds all the random number generators and makes
	# the results deterministic
	#
	def set_seed(seed):
	#torch.manual_seed(seed)
	#torch.cuda.manual_seed_all(seed)
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False
	#random.seed(seed)
	#os.environ['PYTHONHASHSEED'] = str(seed)
	#
	# end of method

	# calculate the angle of incidence of the lidar ray:
	def angle_incidence_calculation(b, c, alpha, last_ray=False):
	'''
	# remove invalid values:
	if(last_ray): # the last ray
	if(np.isnan(b) or np.isinf(b)):
	b = 60.
	if(np.isnan(c) or np.isinf(c)):
	c = 60.
	else:
	b[np.isnan(b)] = 60.
	b[np.isinf(b)] = 60.
	c[np.isnan(c)] = 60.
	c[np.isinf(c)] = 60.
	'''
	# the law of cosines:
	a = np.sqrt(bb + cc - 2bc*np.cos(alpha))
	if(last_ray): # the last ray
	beta = np.arccos([(aa + cc - bb)/(2a*c)])
	theta = np.abs(np.pi/2 - beta)
	else:
	gamma = np.arccos([(aa + bb - cc)/(2a*b)])
	theta = np.abs(np.pi/2 - gamma)

	return theta

	# function: get_data
	#
	# arguments: fp - file pointer
	# num_feats - the number of features in a sample
	#
	# returns: data - the signals/features
	# labels - the correct labels for them
	#
	# this method takes in a fp and returns the data and labels
	POINTS = 1081
	class VaeTestDataset(torch.utils.data.Dataset):
	def __init__(self, img_path, file_name):
	# initialize the data and labels
	# read the names of image data:
	self.scan_file_names = []
	self.intensity_file_names = []
	#self.vel_file_names = []
	self.label_file_names = []
	# parameters: data mean std: scan, intensity, angle of incidence:
	# [[4.518406, 8.2914915], [3081.8167, 1529.4413]]
	# [4.518406, 8.2914915], [3081.8167, 1529.4413], [0.5959513, 0.4783924]]
	self.s_mu = 4.518406
	self.s_std = 8.2914915
	self.i_mu = 3081.8167
	self.i_std = 1529.4413
	self.a_mu = 0.5959513
	self.a_std = 0.4783924
	# open train.txt or dev.txt:
	fp_folder = open(img_path+'dataset.txt','r')

	# for each line of the file:
	for folder_line in fp_folder.read().split(NEW_LINE):
	if('-' in folder_line):
	folder_path = folder_line
	fp_file = open(img_path+folder_path+'/'+file_name+'.txt', 'r')
	for line in fp_file.read().split(NEW_LINE):
	if('.npy' in line):
	self.scan_file_names.append(img_path+folder_path+'/scans_lidar/'+line)
	self.intensity_file_names.append(img_path+folder_path+'/intensities_lidar/'+line)
	#self.vel_file_names.append(img_path+folder_path+'/velocities/'+line)
	self.label_file_names.append(img_path+folder_path+'/semantic_label/'+line)
	# close txt file:
	fp_file.close()

	# close txt file:
	fp_folder.close()

	self.length = len(self.scan_file_names)

	print("dataset length: ", self.length)


	def __len__(self):
	return self.length

	def __getitem__(self, idx):
	# get the index of start point:
	scan = np.zeros((1, POINTS))
	intensity = np.zeros((1, POINTS))
	angle_incidence = np.zeros((1, POINTS))
	label = np.zeros((1, POINTS))

	# get the scan data:
	intensity_name = self.intensity_file_names[idx]
	intensity = np.load(intensity_name)

	# get the scan data:
	scan_name = self.scan_file_names[idx]
	scan = np.load(scan_name)

	# get the semantic label data:
	label_name = self.label_file_names[idx]
	label = np.load(label_name)

	# get the angle of incidence of the ray:
	b = scan[:-1]
	c = scan[1:]
	alpha = np.ones(POINTS - 1)((270np.pi / 180) / (POINTS - 1))
	theta = angle_incidence_calculation(b, c, alpha)
	# last ray:
	b_last = scan[-2]
	c_last = scan[-1]
	alpha_last = (270*np.pi / 180) / (POINTS - 1)
	theta_last = angle_incidence_calculation(b_last, c_last, alpha_last, last_ray=True)
	angle_incidence = np.concatenate((theta[0], theta_last), axis=0)

	# initialize:
	scan[np.isnan(scan)] = 0.
	scan[np.isinf(scan)] = 0.

	intensity[np.isnan(intensity)] = 0.
	intensity[np.isinf(intensity)] = 0.

	angle_incidence[np.isnan(angle_incidence)] = 0.
	angle_incidence[np.isinf(angle_incidence)] = 0.

	label[np.isnan(label)] = 0.
	label[np.isinf(label)] = 0.

	# data normalization:
	# standardization: scan
	# mu: 4.518406, std: 8.2914915
	scan = (scan - self.s_mu) / self.s_std

	# standardization: intensity
	# mu: 3081.8167, std: 1529.4413
	intensity = (intensity - self.i_mu) / self.i_std

	# standardization: angle_incidence
	# mu: 0.5959513, std: 0.4783924
	angle_incidence = (angle_incidence - self.a_mu) / self.a_std

	# transfer to pytorch tensor:
	scan_tensor = torch.FloatTensor(scan)
	intensity_tensor = torch.FloatTensor(intensity)
	angle_incidence_tensor = torch.FloatTensor(angle_incidence)
	label_tensor = torch.FloatTensor(label)

	data = {
	'scan': scan_tensor,
	'intensity': intensity_tensor,
	'angle_incidence': angle_incidence_tensor,
	'label': label_tensor,
	}

	return data

	#
	# end of function


	#------------------------------------------------------------------------------
	#
	# the model is defined here
	#
	#------------------------------------------------------------------------------

	# define the PyTorch VAE model
	#
	# define a VAE
	# Residual blocks:
	class Residual(nn.Module):
	def __init__(self, in_channels, num_hiddens, num_residual_hiddens):
	super(Residual, self).__init__()
	self._block = nn.Sequential(
	nn.ReLU(True),
	nn.Conv1d(in_channels=in_channels,
	out_channels=num_residual_hiddens,
	kernel_size=3, stride=1, padding=1, bias=False),
	nn.BatchNorm1d(num_residual_hiddens),
	nn.ReLU(True),
	nn.Conv1d(in_channels=num_residual_hiddens,
	out_channels=num_hiddens,
	kernel_size=1, stride=1, bias=False),
	nn.BatchNorm1d(num_hiddens)
	)

	def forward(self, x):
	return x + self._block(x)

	class ResidualStack(nn.Module):
	def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
	super(ResidualStack, self).__init__()
	self._num_residual_layers = num_residual_layers
	self._layers = nn.ModuleList([Residual(in_channels, num_hiddens, num_residual_hiddens)
	for _ in range(self._num_residual_layers)])

	def forward(self, x):
	for i in range(self._num_residual_layers):
	x = self._layers[i](x)
	return F.relu(x)

	# Encoder & Decoder Architecture:
	# Encoder:
	class Encoder(nn.Module):
	def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
	super(Encoder, self).__init__()
	self._conv_1 = nn.Sequential(*[
	nn.Conv1d(in_channels=in_channels,
	out_channels=num_hiddens//2,
	kernel_size=4,
	stride=2,
	padding=1),
	nn.BatchNorm1d(num_hiddens//2),
	nn.ReLU(True)
	])
	self._conv_2 = nn.Sequential(*[
	nn.Conv1d(in_channels=num_hiddens//2,
	out_channels=num_hiddens,
	kernel_size=4,
	stride=2,
	padding=1),
	nn.BatchNorm1d(num_hiddens)
	#nn.ReLU(True)
	])
	self._residual_stack = ResidualStack(in_channels=num_hiddens,
	num_hiddens=num_hiddens,
	num_residual_layers=num_residual_layers,
	num_residual_hiddens=num_residual_hiddens)

	def forward(self, inputs):
	x = self._conv_1(inputs)
	x = self._conv_2(x)
	x = self._residual_stack(x)
	return x

	# Decoder:
	class Decoder(nn.Module):
	def __init__(self, out_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
	super(Decoder, self).__init__()

	self._residual_stack = ResidualStack(in_channels=num_hiddens,
	num_hiddens=num_hiddens,
	num_residual_layers=num_residual_layers,
	num_residual_hiddens=num_residual_hiddens)

	self._conv_trans_2 = nn.Sequential(*[
	nn.ReLU(True),
	nn.ConvTranspose1d(in_channels=num_hiddens,
	out_channels=num_hiddens//2,
	kernel_size=4,
	stride=2,
	padding=1),
	nn.BatchNorm1d(num_hiddens//2),
	nn.ReLU(True)
	])

	self._conv_trans_1 = nn.Sequential(*[
	nn.ConvTranspose1d(in_channels=num_hiddens//2,
	out_channels=num_hiddens//2,
	kernel_size=4,
	stride=2,
	padding=1,
	output_padding=1),
	nn.BatchNorm1d(num_hiddens//2),
	nn.ReLU(True),
	nn.Conv1d(in_channels=num_hiddens//2,
	out_channels=out_channels,
	kernel_size=3,
	stride=1,
	padding=1),
	#nn.Sigmoid()
	])

	def forward(self, inputs):
	x = self._residual_stack(inputs)
	x = self._conv_trans_2(x)
	x = self._conv_trans_1(x)
	return x

	class VAE_Encoder(nn.Module):
	def __init__(self, input_channel, num_hiddens, num_residual_layers, num_residual_hiddens, embedding_dim):
	super(VAE_Encoder, self).__init__()
	# parameters:
	self.input_channels = input_channel
	'''
	# Constants
	num_hiddens = 128 #128
	num_residual_hiddens = 64 #32
	num_residual_layers = 2
	embedding_dim = 2 #64
	'''

	# encoder:
	in_channels = input_channel
	self._encoder = Encoder(in_channels,
	num_hiddens,
	num_residual_layers,
	num_residual_hiddens)

	# z latent variable:
	self._encoder_z_mu = nn.Conv1d(in_channels=num_hiddens,
	out_channels=embedding_dim,
	kernel_size=1,
	stride=1)
	self._encoder_z_log_sd = nn.Conv1d(in_channels=num_hiddens,
	out_channels=embedding_dim,
	kernel_size=1,
	stride=1)

	def forward(self, x):
	# input reshape:
	x = x.reshape(-1, self.input_channels, POINTS)
	# Encoder:
	encoder_out = self._encoder(x)
	# get `mu` and `log_var`:
	z_mu = self._encoder_z_mu(encoder_out)
	z_log_sd = self._encoder_z_log_sd(encoder_out)
	return z_mu, z_log_sd

	# our proposed model:
	class S3Net(nn.Module):
	def __init__(self, input_channels, output_channels):
	super(S3Net, self).__init__()
	# parameters:
	self.input_channels = input_channels
	self.latent_dim = 270
	self.output_channels = output_channels

	# Constants
	num_hiddens = 64 #128
	num_residual_hiddens = 32 #64
	num_residual_layers = 2
	embedding_dim = 1 #2

	# prediction encoder:
	self._encoder = VAE_Encoder(self.input_channels,
	num_hiddens,
	num_residual_layers,
	num_residual_hiddens,
	embedding_dim)

	# decoder:
	self._decoder_z_mu = nn.ConvTranspose1d(in_channels=embedding_dim,
	out_channels=num_hiddens,
	kernel_size=1,
	stride=1)
	self._decoder = Decoder(self.output_channels,
	num_hiddens,
	num_residual_layers,
	num_residual_hiddens)

	self.softmax = nn.Softmax(dim=1)



	def vae_reparameterize(self, z_mu, z_log_sd):
	"""
	:param mu: mean from the encoder's latent space
	:param log_sd: log standard deviation from the encoder's latent space
	:output: reparameterized latent variable z, Monte carlo KL divergence
	"""
	# reshape:
	z_mu = z_mu.reshape(-1, self.latent_dim, 1)
	z_log_sd = z_log_sd.reshape(-1, self.latent_dim, 1)
	# define the z probabilities (in this case Normal for both)
	# p(z): N(z\|0,I)
	pz = torch.distributions.Normal(loc=torch.zeros_like(z_mu), scale=torch.ones_like(z_log_sd))
	# q(z\|x,phi): N(z\|mu, z_var)
	qz_x = torch.distributions.Normal(loc=z_mu, scale=torch.exp(z_log_sd))

	# repameterization trick: z = z_mu + xi (*) z_log_var, xi~N(xi\|0,I)
	z = qz_x.rsample()
	# Monte Carlo KL divergence: MCKL(p(z)\|\|q(z\|x,phi)) = log(p(z)) - log(q(z\|x,phi))
	# sum over weight dim, leaves the batch dim
	kl_divergence = (pz.log_prob(z) - qz_x.log_prob(z)).sum(dim=1)
	kl_loss = -kl_divergence.mean()

	return z, kl_loss

	def forward(self, x_s, x_i, x_a):
	"""
	Forward pass `input_img` through the network
	"""
	# reconstruction:
	# encode:
	# input reshape:
	x_s = x_s.reshape(-1, 1, POINTS)
	x_i = x_i.reshape(-1, 1, POINTS)
	x_a = x_a.reshape(-1, 1, POINTS)
	# concatenate along channel axis
	x = torch.cat([x_s, x_i, x_a], dim=1)

	# encode:
	z_mu, z_log_sd = self._encoder(x)

	# get the latent vector through reparameterization:
	z, kl_loss = self.vae_reparameterize(z_mu, z_log_sd)

	# decode:
	# reshape:
	z = z.reshape(-1, 1, 270)
	x_d = self._decoder_z_mu(z)
	semantic_channels = self._decoder(x_d)

	# semantic grid: 10 channels
	semantic_scan = self.softmax(semantic_channels)

	return semantic_scan, semantic_channels, kl_loss

	#
	# end of class

	#
	# end of file