|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| """Evaluation script for the DeepLab model.
|
|
|
| See model.py for more details and usage.
|
| """
|
|
|
| import numpy as np
|
| import six
|
| import tensorflow as tf
|
| from tensorflow.contrib import metrics as contrib_metrics
|
| from tensorflow.contrib import quantize as contrib_quantize
|
| from tensorflow.contrib import tfprof as contrib_tfprof
|
| from tensorflow.contrib import training as contrib_training
|
| from deeplab import common
|
| from deeplab import model
|
| from deeplab.datasets import data_generator
|
|
|
| flags = tf.app.flags
|
| FLAGS = flags.FLAGS
|
|
|
| flags.DEFINE_string('master', '', 'BNS name of the tensorflow server')
|
|
|
|
|
|
|
| flags.DEFINE_string('eval_logdir', None, 'Where to write the event logs.')
|
|
|
| flags.DEFINE_string('checkpoint_dir', None, 'Directory of model checkpoints.')
|
|
|
|
|
|
|
| flags.DEFINE_integer('eval_batch_size', 1,
|
| 'The number of images in each batch during evaluation.')
|
|
|
| flags.DEFINE_list('eval_crop_size', '513,513',
|
| 'Image crop size [height, width] for evaluation.')
|
|
|
| flags.DEFINE_integer('eval_interval_secs', 60 * 5,
|
| 'How often (in seconds) to run evaluation.')
|
|
|
|
|
|
|
|
|
| flags.DEFINE_multi_integer('atrous_rates', None,
|
| 'Atrous rates for atrous spatial pyramid pooling.')
|
|
|
| flags.DEFINE_integer('output_stride', 16,
|
| 'The ratio of input to output spatial resolution.')
|
|
|
|
|
| flags.DEFINE_multi_float('eval_scales', [1.0],
|
| 'The scales to resize images for evaluation.')
|
|
|
|
|
| flags.DEFINE_bool('add_flipped_images', False,
|
| 'Add flipped images for evaluation or not.')
|
|
|
| flags.DEFINE_integer(
|
| 'quantize_delay_step', -1,
|
| 'Steps to start quantized training. If < 0, will not quantize model.')
|
|
|
|
|
|
|
| flags.DEFINE_string('dataset', 'pascal_voc_seg',
|
| 'Name of the segmentation dataset.')
|
|
|
| flags.DEFINE_string('eval_split', 'val',
|
| 'Which split of the dataset used for evaluation')
|
|
|
| flags.DEFINE_string('dataset_dir', None, 'Where the dataset reside.')
|
|
|
| flags.DEFINE_integer('max_number_of_evaluations', 0,
|
| 'Maximum number of eval iterations. Will loop '
|
| 'indefinitely upon nonpositive values.')
|
|
|
|
|
| def main(unused_argv):
|
| tf.logging.set_verbosity(tf.logging.INFO)
|
|
|
| dataset = data_generator.Dataset(
|
| dataset_name=FLAGS.dataset,
|
| split_name=FLAGS.eval_split,
|
| dataset_dir=FLAGS.dataset_dir,
|
| batch_size=FLAGS.eval_batch_size,
|
| crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
|
| min_resize_value=FLAGS.min_resize_value,
|
| max_resize_value=FLAGS.max_resize_value,
|
| resize_factor=FLAGS.resize_factor,
|
| model_variant=FLAGS.model_variant,
|
| num_readers=2,
|
| is_training=False,
|
| should_shuffle=False,
|
| should_repeat=False)
|
|
|
| tf.gfile.MakeDirs(FLAGS.eval_logdir)
|
| tf.logging.info('Evaluating on %s set', FLAGS.eval_split)
|
|
|
| with tf.Graph().as_default():
|
| samples = dataset.get_one_shot_iterator().get_next()
|
|
|
| model_options = common.ModelOptions(
|
| outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
|
| crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
|
| atrous_rates=FLAGS.atrous_rates,
|
| output_stride=FLAGS.output_stride)
|
|
|
|
|
| samples[common.IMAGE].set_shape(
|
| [FLAGS.eval_batch_size,
|
| int(FLAGS.eval_crop_size[0]),
|
| int(FLAGS.eval_crop_size[1]),
|
| 3])
|
| if tuple(FLAGS.eval_scales) == (1.0,):
|
| tf.logging.info('Performing single-scale test.')
|
| predictions = model.predict_labels(samples[common.IMAGE], model_options,
|
| image_pyramid=FLAGS.image_pyramid)
|
| else:
|
| tf.logging.info('Performing multi-scale test.')
|
| if FLAGS.quantize_delay_step >= 0:
|
| raise ValueError(
|
| 'Quantize mode is not supported with multi-scale test.')
|
|
|
| predictions = model.predict_labels_multi_scale(
|
| samples[common.IMAGE],
|
| model_options=model_options,
|
| eval_scales=FLAGS.eval_scales,
|
| add_flipped_images=FLAGS.add_flipped_images)
|
| predictions = predictions[common.OUTPUT_TYPE]
|
| predictions = tf.reshape(predictions, shape=[-1])
|
| labels = tf.reshape(samples[common.LABEL], shape=[-1])
|
| weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))
|
|
|
|
|
|
|
|
|
| labels = tf.where(
|
| tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)
|
|
|
| predictions_tag = 'miou'
|
| for eval_scale in FLAGS.eval_scales:
|
| predictions_tag += '_' + str(eval_scale)
|
| if FLAGS.add_flipped_images:
|
| predictions_tag += '_flipped'
|
|
|
|
|
| metric_map = {}
|
| num_classes = dataset.num_of_classes
|
| metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou(
|
| labels=labels, predictions=predictions, num_classes=num_classes,
|
| weights=weights)
|
|
|
| one_hot_predictions = tf.one_hot(predictions, num_classes)
|
| one_hot_predictions = tf.reshape(one_hot_predictions, [-1, num_classes])
|
| one_hot_labels = tf.one_hot(labels, num_classes)
|
| one_hot_labels = tf.reshape(one_hot_labels, [-1, num_classes])
|
| for c in range(num_classes):
|
| predictions_tag_c = '%s_class_%d' % (predictions_tag, c)
|
| tp, tp_op = tf.metrics.true_positives(
|
| labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c],
|
| weights=weights)
|
| fp, fp_op = tf.metrics.false_positives(
|
| labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c],
|
| weights=weights)
|
| fn, fn_op = tf.metrics.false_negatives(
|
| labels=one_hot_labels[:, c], predictions=one_hot_predictions[:, c],
|
| weights=weights)
|
| tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op)
|
| iou = tf.where(tf.greater(tp + fn, 0.0),
|
| tp / (tp + fn + fp),
|
| tf.constant(np.NaN))
|
| metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op)
|
|
|
| (metrics_to_values,
|
| metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map)
|
|
|
| summary_ops = []
|
| for metric_name, metric_value in six.iteritems(metrics_to_values):
|
| op = tf.summary.scalar(metric_name, metric_value)
|
| op = tf.Print(op, [metric_value], metric_name)
|
| summary_ops.append(op)
|
|
|
| summary_op = tf.summary.merge(summary_ops)
|
| summary_hook = contrib_training.SummaryAtEndHook(
|
| log_dir=FLAGS.eval_logdir, summary_op=summary_op)
|
| hooks = [summary_hook]
|
|
|
| num_eval_iters = None
|
| if FLAGS.max_number_of_evaluations > 0:
|
| num_eval_iters = FLAGS.max_number_of_evaluations
|
|
|
| if FLAGS.quantize_delay_step >= 0:
|
| contrib_quantize.create_eval_graph()
|
|
|
| contrib_tfprof.model_analyzer.print_model_analysis(
|
| tf.get_default_graph(),
|
| tfprof_options=contrib_tfprof.model_analyzer
|
| .TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
|
| contrib_tfprof.model_analyzer.print_model_analysis(
|
| tf.get_default_graph(),
|
| tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
|
| contrib_training.evaluate_repeatedly(
|
| checkpoint_dir=FLAGS.checkpoint_dir,
|
| master=FLAGS.master,
|
| eval_ops=list(metrics_to_updates.values()),
|
| max_number_of_evaluations=num_eval_iters,
|
| hooks=hooks,
|
| eval_interval_secs=FLAGS.eval_interval_secs)
|
|
|
|
|
| if __name__ == '__main__':
|
| flags.mark_flag_as_required('checkpoint_dir')
|
| flags.mark_flag_as_required('eval_logdir')
|
| flags.mark_flag_as_required('dataset_dir')
|
| tf.app.run()
|
|
|