Spaces:

rawanessam
/

DeepFloorPlan2

Runtime error

App Files Files Community

DeepFloorPlan2 / net.py

rawanessam

Upload 3 files

03804fe verified 7 months ago

raw

history blame contribute delete

15.1 kB

	import numpy as np
	import tensorflow.compat.v1 as tf
	tf.disable_v2_behavior() # using tf 1.10.1

	from tf_slim.nets import vgg

	import os
	import sys
	import glob
	import time
	import random

	from scipy import ndimage
	import imageio
	from PIL import Image

	sys.path.append('./utils/')
	from rgb_ind_convertor import *
	from util import fast_hist
	from tf_record import read_record, read_bd_rm_record

	GPU_ID = '0'

	def data_loader_bd_rm_from_tfrecord(batch_size=1):
	paths = open('../dataset/r3d_train.txt', 'r').read().splitlines()

	loader_dict = read_bd_rm_record('../dataset/r3d.tfrecords', batch_size=batch_size, size=512)

	num_batch = len(paths) // batch_size

	return loader_dict, num_batch

	class Network(object):
	"""docstring for Network"""
	def __init__(self, dtype=tf.float32):
	print('Initial nn network object...')
	self.dtype = dtype
	self.pre_train_restore_map = {'vgg_16/conv1/conv1_1/weights':'FNet/conv1_1/W', # {'checkpoint_scope_var_name':'current_scope_var_name'} shape must be the same
	'vgg_16/conv1/conv1_1/biases':'FNet/conv1_1/b',
	'vgg_16/conv1/conv1_2/weights':'FNet/conv1_2/W',
	'vgg_16/conv1/conv1_2/biases':'FNet/conv1_2/b',
	'vgg_16/conv2/conv2_1/weights':'FNet/conv2_1/W',
	'vgg_16/conv2/conv2_1/biases':'FNet/conv2_1/b',
	'vgg_16/conv2/conv2_2/weights':'FNet/conv2_2/W',
	'vgg_16/conv2/conv2_2/biases':'FNet/conv2_2/b',
	'vgg_16/conv3/conv3_1/weights':'FNet/conv3_1/W',
	'vgg_16/conv3/conv3_1/biases':'FNet/conv3_1/b',
	'vgg_16/conv3/conv3_2/weights':'FNet/conv3_2/W',
	'vgg_16/conv3/conv3_2/biases':'FNet/conv3_2/b',
	'vgg_16/conv3/conv3_3/weights':'FNet/conv3_3/W',
	'vgg_16/conv3/conv3_3/biases':'FNet/conv3_3/b',
	'vgg_16/conv4/conv4_1/weights':'FNet/conv4_1/W',
	'vgg_16/conv4/conv4_1/biases':'FNet/conv4_1/b',
	'vgg_16/conv4/conv4_2/weights':'FNet/conv4_2/W',
	'vgg_16/conv4/conv4_2/biases':'FNet/conv4_2/b',
	'vgg_16/conv4/conv4_3/weights':'FNet/conv4_3/W',
	'vgg_16/conv4/conv4_3/biases':'FNet/conv4_3/b',
	'vgg_16/conv5/conv5_1/weights':'FNet/conv5_1/W',
	'vgg_16/conv5/conv5_1/biases':'FNet/conv5_1/b',
	'vgg_16/conv5/conv5_2/weights':'FNet/conv5_2/W',
	'vgg_16/conv5/conv5_2/biases':'FNet/conv5_2/b',
	'vgg_16/conv5/conv5_3/weights':'FNet/conv5_3/W',
	'vgg_16/conv5/conv5_3/biases':'FNet/conv5_3/b'}

	def convert_one_hot_to_image(self, one_hot, dtype='float', act=None):
	# This method was moved from MODEL in main.py for inference compatibility
	if act == 'softmax':
	one_hot = tf.nn.softmax(one_hot, axis=-1)
	[n, h, w, c] = one_hot.shape.as_list()
	im = tf.reshape(tf.argmax(one_hot, axis=-1), [n, h, w, 1])
	if dtype == 'int':
	im = tf.cast(im, dtype=tf.uint8)
	else:
	im = tf.cast(im, dtype=tf.float32)
	return im

	# basic layer
	def _he_uniform(self, shape, regularizer=None, trainable=None, name=None):
	name = 'W' if name is None else name+'/W'

	# size = (k_h, k_w, in_dim, out_dim)
	kernel_size = np.prod(shape[:2]) # k_h*k_w
	fan_in = shape[-2]kernel_size # fan_out = shape[-1]kernel_size

	# compute the scale value
	s = np.sqrt(1. /fan_in)

	# create variable and specific GPU device
	w = tf.get_variable(name, shape, dtype=self.dtype,
	initializer=tf.random_uniform_initializer(minval=-s, maxval=s),
	regularizer=regularizer, trainable=trainable)

	return w

	def _constant(self, shape, value=0, regularizer=None, trainable=None, name=None):
	name = 'b' if name is None else name+'/b'

	b = tf.get_variable(name, shape, dtype=self.dtype,
	initializer=tf.constant_initializer(value=value),
	regularizer=regularizer, trainable=trainable)

	return b

	def _conv2d(self, tensor, dim, size=3, stride=1, rate=1, pad='SAME', act='relu', norm='none', G=16, bias=True, name='conv'):
	"""pre activate => norm => conv
	"""
	in_dim = tensor.shape.as_list()[-1]
	size = size if isinstance(size, (tuple, list)) else [size, size]
	stride = stride if isinstance(stride, (tuple, list)) else [1, stride, stride, 1]
	rate = rate if isinstance(rate, (tuple, list)) else [1, rate, rate, 1]
	kernel_shape = [size[0], size[1], in_dim, dim]

	w = self._he_uniform(kernel_shape, name=name)
	b = self._constant(dim, name=name) if bias else 0

	if act == 'relu':
	tensor = tf.nn.relu(tensor, name=name+'/relu')
	elif act == 'sigmoid':
	tensor = tf.nn.sigmoid(tensor, name=name+'/sigmoid')
	elif act == 'softplus':
	tensor = tf.nn.softplus(tensor, name=name+'/softplus')
	elif act =='leaky_relu':
	tensor = tf.nn.leaky_relu(tensor, name=name+'/leaky_relu')
	else:
	norm = 'none'

	if norm == 'gn': # group normalization after acitvation
	# normalize
	# tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper
	x = tf.transpose(tensor, [0, 3, 1, 2])
	N, C, H, W = x.get_shape().as_list()
	G = min(G, C)
	x = tf.reshape(x, [-1, G, C // G, H, W])
	mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
	x = (x - mean) / tf.sqrt(var + 1e-6)

	# per channel gamma and beta
	gamma = tf.get_variable(name+'/gamma', [C], dtype=self.dtype, initializer=tf.constant_initializer(1.0))
	beta = tf.get_variable(name+'/beta', [C], dtype=self.dtype, initializer=tf.constant_initializer(0.0))
	gamma = tf.reshape(gamma, [1, C, 1, 1])
	beta = tf.reshape(beta, [1, C, 1, 1])

	tensor = tf.reshape(x, [-1, C, H, W]) * gamma + beta
	# tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper
	tensor = tf.transpose(tensor, [0, 2, 3, 1])

	out = tf.nn.conv2d(tensor, w, strides=stride, padding=pad, dilations=rate, name=name) + b # default no bias

	return out

	def _upconv2d(self, tensor, dim, size=4, stride=2, pad='SAME', act='relu', name='upconv'):
	[batch_size, h, w, in_dim] = tensor.shape.as_list()

	size = size if isinstance(size, (tuple, list)) else [size, size]
	stride = stride if isinstance(stride, (tuple, list)) else [1, stride, stride, 1]

	kernel_shape = [size[0], size[1], dim, in_dim]
	W = self._he_uniform(kernel_shape, name=name)

	if pad == 'SAME':
	out_shape = [batch_size, hstride[1], wstride[2], dim]
	else:
	out_shape = [batch_size, (h-1)*stride[1]+size[0],
	(w-1)*stride[2]+size[1], dim]

	out = tf.nn.conv2d_transpose(tensor, W, output_shape=tf.stack(out_shape),
	strides=stride, padding=pad, name=name)

	# reset shape information
	out.set_shape(out_shape)

	if act == 'relu':
	out = tf.nn.relu(out, name=name+'/relu')
	elif act == 'sigmoid':
	out = tf.nn.sigmoid(out, name=name+'/sigmoid')
	else:
	pass

	return out


	def _max_pool2d(self, tensor, size=2, stride=2, pad='VALID'):
	size = size if isinstance(size, (tuple, list)) else [1, size, size, 1]
	stride = stride if isinstance(stride, (tuple, list)) else [1, stride, stride, 1]
	#
	size = [1, size[0], size[1], 1] if len(size)==2 else size
	stride = [1, stride[0], stride[1], 1] if len(stride)==2 else stride

	out = tf.nn.max_pool(tensor, size, stride, pad)

	return out

	# following three function used for combining context features
	def _constant_kernel(self, shape, value=1.0, diag=False, flip=False, regularizer=None, trainable=None, name=None):
	name = 'fixed_w' if name is None else name+'/fixed_w'

	if not diag:
	k = tf.get_variable(name, shape, dtype=self.dtype,
	initializer=tf.constant_initializer(value=value),
	regularizer=regularizer, trainable=trainable)
	else:
	w = tf.eye(shape[0], num_columns=shape[1])
	if flip:
	w = tf.reshape(w, (shape[0], shape[1], 1))
	w = tf.image.flip_left_right(w)
	w = tf.reshape(w, shape)
	k = tf.get_variable(name, None, dtype=self.dtype, # constant initializer dont specific shape
	initializer=w,
	regularizer=regularizer, trainable=trainable)

	return k

	def _context_conv2d(self, tensor, dim=1, size=7, diag=False, flip=False, stride=1, name='cconv'):
	"""
	Implement using identity matrix, combine neighbour pixels without bias, current only accept depth 1 of input tensor

	Args:
	diag: create diagnoal identity matrix
	transpose: transpose the diagnoal matrix
	"""
	in_dim = tensor.shape.as_list()[-1] # suppose to be 1
	size = size if isinstance(size, (tuple, list)) else [size, size]
	stride = stride if isinstance(stride, (tuple, list)) else [1, stride, stride, 1]
	kernel_shape = [size[0], size[1], in_dim, dim]

	w = self._constant_kernel(kernel_shape, diag=diag, flip=flip, trainable=False, name=name)
	out = tf.nn.conv2d(tensor, w, strides=stride, padding='SAME', name=name)

	return out

	def _non_local_context(self, tensor1, tensor2, stride=4, name='non_local_context'):
	"""Use 1/stride image size of identity one rank kernel to combine context features, default is half image size, embedding between encoder and decoder part

	Args:
	stride: define the neighbour size
	"""
	assert tensor1.shape.as_list() == tensor2.shape.as_list(), "input tensor should have same shape"

	[N, H, W, C] = tensor1.shape.as_list()

	hs = H // stride if (H // stride) > 1 else (stride-1)
	vs = W // stride if (W // stride) > 1 else (stride-1)

	hs = hs if (hs%2!=0) else hs+1
	vs = hs if (vs%2!=0) else vs+1

	# compute attention map
	a = self._conv2d(tensor1, dim=C, name=name+'/fa1')
	a = self._conv2d(a, dim=C, name=name+'/fa2')
	a = self._conv2d(a, dim=1, size=1, act='linear', norm=None, name=name+'/a')
	a = tf.nn.sigmoid(a, name=name+'/a_sigmoid')

	# reduce the tensor depth
	x = self._conv2d(tensor2, dim=C, name=name+'/fx1')
	x = self._conv2d(x, dim=1, size=1, act='linear', norm=None, name=name+'/x')

	# pre attention, prevent the text
	x = a*x

	h = self._context_conv2d(x, size=[hs, 1], name=name+'/cc_h') # h
	v = self._context_conv2d(x, size=[1, vs], name=name+'/cc_v') # v
	d1 = self._context_conv2d(x, size=[hs, vs], diag=True, name=name+'/cc_d1') # d
	d2 = self._context_conv2d(x, size=[hs, vs], diag=True, flip=True, name=name+'/cc_d2') # d_t

	# double attention, prevent blurring
	c1 = a*(h+v+d1+d2)
	# c1 = (h+v+d1+d2)

	# expand to dim
	c1 = self._conv2d(c1, dim=C, size=1, act='linear', norm=None, name=name+'/expand')
	# c1 = self._conv2d(c1, dim=C, name=name+'/conv1') # contextural feature

	# further convolution to learn richer feature
	features = tf.concat([tensor2, c1], axis=3, name=name+'/in_context_concat')
	out = self._conv2d(features, dim=C, name=name+'/conv2')

	# return out, a
	return out, None

	def _up_bilinear(self, tensor, dim, shape, name='upsample'):
	# [N, H, W, C] = tensor.shape.as_list()

	out = self._conv2d(tensor, dim=dim, size=1, act='linear', name=name+'/1x1_conv')
	return tf.image.resize_images(out, shape)


	def forward(self, inputs, init_with_pretrain_vgg=False, pre_trained_model='./vgg16/vgg_16.ckpt'):
	# feature extraction part and also the share network
	reuse_fnet = len([v for v in tf.global_variables() if v.name.startswith('FNet')]) > 0
	with tf.variable_scope('FNet', reuse=reuse_fnet):
	# feature extraction
	self.conv1_1 = self._conv2d(inputs, dim=64, name='conv1_1')
	self.conv1_2 = self._conv2d(self.conv1_1, dim=64, name='conv1_2')
	self.pool1 = self._max_pool2d(self.conv1_2) # 256 => /2

	self.conv2_1 = self._conv2d(self.pool1, dim=128, name='conv2_1')
	self.conv2_2 = self._conv2d(self.conv2_1, dim=128, name='conv2_2')
	self.pool2 = self._max_pool2d(self.conv2_2) # 128 => /4

	self.conv3_1 = self._conv2d(self.pool2, dim=256, name='conv3_1')
	self.conv3_2 = self._conv2d(self.conv3_1, dim=256, name='conv3_2')
	self.conv3_3 = self._conv2d(self.conv3_2, dim=256, name='conv3_3')
	self.pool3 = self._max_pool2d(self.conv3_3) # 64 => /8

	self.conv4_1 = self._conv2d(self.pool3, dim=512, name='conv4_1')
	self.conv4_2 = self._conv2d(self.conv4_1, dim=512, name='conv4_2')
	self.conv4_3 = self._conv2d(self.conv4_2, dim=512, name='conv4_3')
	self.pool4 = self._max_pool2d(self.conv4_3) # 32 => /16

	self.conv5_1 = self._conv2d(self.pool4, dim=512, name='conv5_1')
	self.conv5_2 = self._conv2d(self.conv5_1, dim=512, name='conv5_2')
	self.conv5_3 = self._conv2d(self.conv5_2, dim=512, name='conv5_3')
	self.pool5 = self._max_pool2d(self.conv5_3) # 16 => /32

	# init feature extraction part from pre-train vgg16
	if init_with_pretrain_vgg:
	tf.train.init_from_checkpoint(pre_trained_model, self.pre_train_restore_map)

	# input size for logits predict
	[n, h, w, c] = inputs.shape.as_list()

	reuse_cw_net = len([v for v in tf.global_variables() if v.name.startswith('CWNet')]) > 0
	with tf.variable_scope('CWNet', reuse=reuse_cw_net):
	# upsample
	up2 = (self._upconv2d(self.pool5, dim=256, act='linear', name='up2_1') # 32 => /16
	+ self._conv2d(self.pool4, dim=256, act='linear', name='pool4_s'))
	self.up2_cw = self._conv2d(up2, dim=256, name='up2_3')

	up4 = (self._upconv2d(self.up2_cw, dim=128, act='linear', name='up4_1') # 64 => /8
	+ self._conv2d(self.pool3, dim=128, act='linear', name='pool3_s'))
	self.up4_cw = self._conv2d(up4, dim=128, name='up4_3')

	up8 = (self._upconv2d(self.up4_cw, dim=64, act='linear', name='up8_1') # 128 => /4
	+ self._conv2d(self.pool2, dim=64, act='linear', name='pool2_s'))
	self.up8_cw = self._conv2d(up8, dim=64, name='up8_2')

	up16 = (self._upconv2d(self.up8_cw, dim=32, act='linear', name='up16_1') # 256 => /2
	+ self._conv2d(self.pool1, dim=32, act='linear', name='pool1_s'))
	self.up16_cw = self._conv2d(up16, dim=32, name='up16_2')

	# predict logits
	logits_cw = self._up_bilinear(self.up16_cw, dim=3, shape=(h, w), name='logits')

	# decode network for room type detection
	reuse_rnet = len([v for v in tf.global_variables() if v.name.startswith('RNet')]) > 0
	with tf.variable_scope('RNet', reuse=reuse_rnet):
	# upsample
	up2 = (self._upconv2d(self.pool5, dim=256, act='linear', name='up2_1') # 32 => /16
	+ self._conv2d(self.pool4, dim=256, act='linear', name='pool4_s'))
	up2 = self._conv2d(up2, dim=256, name='up2_2')
	up2, _ = self._non_local_context(self.up2_cw, up2, name='context_up2')

	up4 = (self._upconv2d(up2, dim=128, act='linear', name='up4_1') # 64 => /8
	+ self._conv2d(self.pool3, dim=128, act='linear', name='pool3_s'))
	up4 = self._conv2d(up4, dim=128, name='up4_2')
	up4, _ = self._non_local_context(self.up4_cw, up4, name='context_up4')

	up8 = (self._upconv2d(up4, dim=64, act='linear', name='up8_1') # 128 => /4
	+ self._conv2d(self.pool2, dim=64, act='linear', name='pool2_s'))
	up8 = self._conv2d(up8, dim=64, name='up8_2')
	up8, _ = self._non_local_context(self.up8_cw, up8, name='context_up8')

	up16 = (self._upconv2d(up8, dim=32, act='linear', name='up16_1') # 256 => /2
	+ self._conv2d(self.pool1, dim=32, act='linear', name='pool1_s'))
	up16 = self._conv2d(up16, dim=32, name='up16_2')
	self.up16_r, self.a = self._non_local_context(self.up16_cw, up16, name='context_up16')

	# predict logits
	logits_r = self._up_bilinear(self.up16_r, dim=9, shape=(h, w), name='logits')

	return logits_r, logits_cw