import argparse from net import * import os import time import random import numpy as np import tensorflow.compat.v1 as tf tf.disable_v2_behavior() import imageio from PIL import Image # os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID # Removed for CPU compatibility os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' seed = 8964 # input image path parser = argparse.ArgumentParser() parser.add_argument('--phase', type=str, default='Test', help='Train/Test network.') class MODEL(Network): """docstring for MODEL""" def __init__(self): Network.__init__(self) self.log_dir = 'pretrained' self.eval_file = './dataset/r3d_test.txt' self.loss_type = 'balanced' def convert_one_hot_to_image(self, one_hot, dtype='float', act=None): if act == 'softmax': one_hot = tf.nn.softmax(one_hot, axis=-1) [n, h, w, c] = one_hot.shape.as_list() im = tf.reshape(tf.argmax(one_hot, axis=-1), [n, h, w, 1]) if dtype == 'int': im = tf.cast(im, dtype=tf.uint8) else: im = tf.cast(im, dtype=tf.float32) return im def cross_two_tasks_weight(self, y1, y2): p1 = tf.reduce_sum(y1) p2 = tf.reduce_sum(y2) w1 = p2 / (p1 + p2) w2 = p1 / (p1 + p2) return w1, w2 def balanced_entropy(self, x, y): # cliped_by_eps eps = 1e-6 z = tf.nn.softmax(x) cliped_z = tf.clip_by_value(z, eps, 1-eps) log_z = tf.log(cliped_z) num_classes = y.shape.as_list()[-1] ind = tf.argmax(y, -1, output_type=tf.int32) # ind = tf.reshape(ind, shape=[1, 512, 512, 1]) # for debugging total = tf.reduce_sum(y) # total foreground pixels m_c = [] # index mask n_c = [] # each class foreground pixels for c in range(num_classes): m_c.append(tf.cast(tf.equal(ind, c), dtype=tf.int32)) n_c.append(tf.cast(tf.reduce_sum(m_c[-1]), dtype=tf.float32)) # compute count c = [] for i in range(num_classes): c.append(total - n_c[i]) tc = tf.add_n(c) # use for compute loss loss = 0. for i in range(num_classes): w = c[i] / tc m_c_one_hot = tf.one_hot((i*m_c[i]), num_classes, axis=-1) y_c = m_c_one_hot*y loss += w*tf.reduce_mean(-tf.reduce_sum(y_c*log_z, axis=1)) return (loss / num_classes) # mean def train(self, loader_dict, num_batch, max_step=40000): images = loader_dict['images'] labels_r_hot = loader_dict['label_rooms'] labels_cw_hot = loader_dict['label_boundaries'] max_ep = max_step // num_batch print('max_step = {}, max_ep = {}, num_batch = {}'.format(max_step, max_ep, num_batch)) logits1, logits2 = self.forward(images, init_with_pretrain_vgg=False) if self.loss_type == 'balanced': # in-task loss balance loss1 = self.balanced_entropy(logits1, labels_r_hot) # multi classes balance loss2 = self.balanced_entropy(logits2, labels_cw_hot) else: loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits1, labels=labels_r_hot, name='bce1')) loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits2, labels=labels_cw_hot, name='bce2')) # compute cross loss balance weight w1, w2 = self.cross_two_tasks_weight(labels_r_hot, labels_cw_hot) loss = (w1*loss1 + w2*loss2) optim = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, colocate_gradients_with_ops=True) # gradient ops assign to same device as forward ops # # add image summary # tf.summary.image('input', images) # tf.summary.image('label_r', self.convert_one_hot_to_image(labels_r_hot)) # tf.summary.image('predict_room', self.convert_one_hot_to_image(logits1, act='softmax')) # room type to use argmax to visualize # tf.summary.image('predict_close_wall', tf.nn.sigmoid(logits2)) # boundaries type to use argmax to visualize # # add scalar summary # tf.summary.scalar('bce', loss) # define session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth=True # prevent the program occupies all GPU memory with tf.Session(config=config) as sess: # init all variables in graph sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # saver saver = tf.train.Saver(max_to_keep=10) # filewriter for log info # log_dir = self.log_dir+'/run-%02d%02d-%02d%02d' % tuple(time.localtime(time.time()))[1:5] # writer = tf.summary.FileWriter(log_dir) # merged = tf.summary.merge_all() # coordinator for queue runner coord = tf.train.Coordinator() # start queue threads = tf.train.start_queue_runners(sess=sess, coord=coord) print("Start Training!") total_times = 0 for ep in range(max_ep): # epoch loop for n in range(num_batch): # batch loop tic = time.time() # [loss_value, update_value, summaries] = sess.run([loss, optim, merged]) [loss_value, update_value] = sess.run([loss, optim]) duration = time.time()-tic total_times += duration step = int(ep*num_batch + n) # write log print('step {}: loss = {:.3}; {:.2} data/sec, excuted {} minutes'.format(step, loss_value, 1.0/duration, int(total_times/60))) # writer.add_summary(summaries, global_step=step) # save model parameters after 2 epoch training if ep % 2 == 0: saver.save(sess, self.log_dir+'/model', global_step=ep) self.evaluate(sess=sess, epoch=ep) saver.save(sess, self.log_dir+'/model', global_step=max_ep) self.evaluate(sess=sess, epoch=max_ep) # close session coord.request_stop() coord.join(threads) sess.close() def infer(self, save_dir='out', resize=True, merge=True): print("generating test set of {}.... will save to [./{}]".format(self.eval_file, save_dir)) room_dir = os.path.join(save_dir, 'room') close_wall_dir = os.path.join(save_dir, 'boundary') if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(room_dir): os.mkdir(room_dir) if not os.path.exists(close_wall_dir): os.mkdir(close_wall_dir) x = tf.placeholder(shape=[1, 512, 512, 3], dtype=tf.float32) logits1, logits2 = self.forward(x, init_with_pretrain_vgg=False) rooms = self.convert_one_hot_to_image(logits1, act='softmax', dtype='int') close_walls = self.convert_one_hot_to_image(logits2, act='softmax', dtype='int') config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) saver = tf.train.Saver() # restore all parameters saver.restore(sess, save_path = tf.train.latest_checkpoint(self.log_dir)) # infer one by one paths = open(self.eval_file, 'r').read().splitlines() paths = [p.split('\t')[0] for p in paths] for p in paths: im = imageio.imread(p, mode='RGB') im_x = imageio.imresize(im, (512,512,3)) / 255. # resize and normalize im_x = np.reshape(im_x, (1,512,512,3)) [out1, out2] = sess.run([rooms, close_walls], feed_dict={x: im_x}) if resize: # out1 = imresize(np.squeeze(out1), (im.shape[0], im.shape[1])) # resize back # out2 = imresize(np.squeeze(out2), (im.shape[0], im.shape[1])) # resize back out1_rgb = ind2rgb(np.squeeze(out1)) out1_rgb = imageio.imresize(out1_rgb, (im.shape[0], im.shape[1])) # resize back out2_rgb = ind2rgb(np.squeeze(out2), color_map=floorplan_boundary_map) out2_rgb = imageio.imresize(out2_rgb, (im.shape[0], im.shape[1])) # resize back else: out1_rgb = ind2rgb(np.squeeze(out1)) out2_rgb = ind2rgb(np.squeeze(out2), color_map=floorplan_boundary_map) if merge: out1 = np.squeeze(out1) out2 = np.squeeze(out2) out1[out2==2] = 10 out1[out2==1] = 9 # out3_rgb = ind2rgb(out1, color_map=floorplan_fuse_map_figure) # use for present out3_rgb = ind2rgb(out1, color_map=floorplan_fuse_map) # use for present name = p.split('/')[-1] save_path1 = os.path.join(room_dir, name.split('.jpg')[0]+'_rooms.png') save_path2 = os.path.join(close_wall_dir, name.split('.jpg')[0]+'_bd_rm.png') save_path3 = os.path.join(save_dir, name.split('.jpg')[0]+'_rooms.png') imageio.imwrite(save_path1, out1_rgb) imageio.imwrite(save_path2, out2_rgb) if merge: imageio.imwrite(save_path3, out3_rgb) # imsave(save_path4, out4) print('Saving prediction: {}'.format(name)) def evaluate(self, sess, epoch, num_of_classes=11): x = tf.placeholder(shape=[1, 512, 512, 3], dtype=tf.float32) logits1, logits2 = self.forward(x, init_with_pretrain_vgg=False) predict_bd = self.convert_one_hot_to_image(logits2, act='softmax', dtype='int') predict_room = self.convert_one_hot_to_image(logits1, act='softmax', dtype='int') paths = open(self.eval_file, 'r').read().splitlines() image_paths = [p.split('\t')[0] for p in paths] # image gt2_paths = [p.split('\t')[2] for p in paths] # 2 denote doors (and windows) gt3_paths = [p.split('\t')[3] for p in paths] # 3 denote rooms gt4_paths = [p.split('\t')[-1] for p in paths] # last one denote close wall n = len(paths) hist = np.zeros((num_of_classes, num_of_classes)) for i in range(n): im = imageio.imread(image_paths[i], mode='RGB') # for fuse label dd = imageio.imread(gt2_paths[i], mode='L') rr = imageio.imread(gt3_paths[i], mode='RGB') cw = imageio.imread(gt4_paths[i], mode='L') im = imageio.imresize(im, (512, 512, 3)) / 255. # normalize input image im = np.reshape(im, (1,512,512,3)) # merge label rr = imageio.imresize(rr, (512, 512, 3)) rr_ind = rgb2ind(rr) cw = imageio.imresize(cw, (512, 512)) / 255 dd = imageio.imresize(dd, (512, 512)) / 255 cw = (cw>0.5).astype(np.uint8) dd = (dd>0.5).astype(np.uint8) rr_ind[cw==1] = 10 rr_ind[dd==1] = 9 # merge prediciton rm_ind, bd_ind = sess.run([predict_room, predict_bd], feed_dict={x: im}) rm_ind = np.squeeze(rm_ind) bd_ind = np.squeeze(bd_ind) rm_ind[bd_ind==2] = 10 rm_ind[bd_ind==1] = 9 hist += fast_hist(rm_ind.flatten(), rr_ind.flatten(), num_of_classes) overall_acc = np.diag(hist).sum() / hist.sum() mean_acc = np.diag(hist) / (hist.sum(1) + 1e-6) # iu = np.diag(hist) / (hist.sum(1) + 1e-6 + hist.sum(0) - np.diag(hist)) mean_acc9 = (np.nansum(mean_acc[:7])+mean_acc[-2]+mean_acc[-1]) / 9. file = open('EVAL_'+self.log_dir, 'a') print('Model at epoch {}: overall accuracy = {:.4}, mean_acc = {:.4}'.format(epoch, overall_acc, mean_acc9)) for i in range(mean_acc.shape[0]): if i not in [7 ,8]: # ingore class 7 & 8 print('\t\tepoch {}: {}th label: accuracy = {:.4}'.format(epoch, i, mean_acc[i])) file.close() def main(args): tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) model = MODEL() if args.phase.lower() == 'train': loader_dict, num_batch = data_loader_bd_rm_from_tfrecord(batch_size=1) # START TRAINING tic = time.time() model.train(loader_dict, num_batch) toc = time.time() print('total training + evaluation time = {} minutes'.format((toc-tic)/60)) elif args.phase.lower() == 'test': model.infer() else: pass if __name__ == '__main__': FLAGS, unparsed = parser.parse_known_args() main(FLAGS)