Spaces:
Configuration error
Configuration error
| import os | |
| import sys | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) | |
| from eval.video_depth.tools import depth_evaluation, group_by_directory | |
| import numpy as np | |
| import cv2 | |
| from tqdm import tqdm | |
| import glob | |
| from PIL import Image | |
| import argparse | |
| import json | |
| from eval.video_depth.metadata import dataset_metadata | |
| def get_args_parser(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--output_dir", | |
| type=str, | |
| default="", | |
| help="value for outdir", | |
| ) | |
| parser.add_argument( | |
| "--eval_dataset", type=str, default="nyu", choices=list(dataset_metadata.keys()) | |
| ) | |
| parser.add_argument( | |
| "--align", | |
| type=str, | |
| default="scale&shift", | |
| choices=["scale&shift", "scale", "metric"], | |
| ) | |
| return parser | |
| def main(args): | |
| if args.eval_dataset == "sintel": | |
| TAG_FLOAT = 202021.25 | |
| def depth_read(filename): | |
| """Read depth data from file, return as numpy array.""" | |
| f = open(filename, "rb") | |
| check = np.fromfile(f, dtype=np.float32, count=1)[0] | |
| assert ( | |
| check == TAG_FLOAT | |
| ), " depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? ".format( | |
| TAG_FLOAT, check | |
| ) | |
| width = np.fromfile(f, dtype=np.int32, count=1)[0] | |
| height = np.fromfile(f, dtype=np.int32, count=1)[0] | |
| size = width * height | |
| assert ( | |
| width > 0 and height > 0 and size > 1 and size < 100000000 | |
| ), " depth_read:: Wrong input size (width = {0}, height = {1}).".format( | |
| width, height | |
| ) | |
| depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width)) | |
| return depth | |
| pred_pathes = glob.glob( | |
| f"{args.output_dir}/*/frame_*.npy" | |
| ) # TODO: update the path to your prediction | |
| pred_pathes = sorted(pred_pathes) | |
| if len(pred_pathes) > 643: | |
| full = True | |
| else: | |
| full = False | |
| if full: | |
| depth_pathes = glob.glob(f"data/sintel/training/depth/*/*.dpt") | |
| depth_pathes = sorted(depth_pathes) | |
| else: | |
| seq_list = [ | |
| "alley_2", | |
| "ambush_4", | |
| "ambush_5", | |
| "ambush_6", | |
| "cave_2", | |
| "cave_4", | |
| "market_2", | |
| "market_5", | |
| "market_6", | |
| "shaman_3", | |
| "sleeping_1", | |
| "sleeping_2", | |
| "temple_2", | |
| "temple_3", | |
| ] | |
| depth_pathes_folder = [ | |
| f"data/sintel/training/depth/{seq}" for seq in seq_list | |
| ] | |
| depth_pathes = [] | |
| for depth_pathes_folder_i in depth_pathes_folder: | |
| depth_pathes += glob.glob(depth_pathes_folder_i + "/*.dpt") | |
| depth_pathes = sorted(depth_pathes) | |
| def get_video_results(): | |
| grouped_pred_depth = group_by_directory(pred_pathes) | |
| grouped_gt_depth = group_by_directory(depth_pathes) | |
| gathered_depth_metrics = [] | |
| for key in tqdm(grouped_pred_depth.keys()): | |
| pd_pathes = grouped_pred_depth[key] | |
| gt_pathes = grouped_gt_depth[key.replace("_pred_depth", "")] | |
| gt_depth = np.stack( | |
| [depth_read(gt_path) for gt_path in gt_pathes], axis=0 | |
| ) | |
| pr_depth = np.stack( | |
| [ | |
| cv2.resize( | |
| np.load(pd_path), | |
| (gt_depth.shape[2], gt_depth.shape[1]), | |
| interpolation=cv2.INTER_CUBIC, | |
| ) | |
| for pd_path in pd_pathes | |
| ], | |
| axis=0, | |
| ) | |
| # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment | |
| if args.align == "scale&shift": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| align_with_lad2=True, | |
| use_gpu=True, | |
| post_clip_max=70, | |
| ) | |
| ) | |
| elif args.align == "scale": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| align_with_scale=True, | |
| use_gpu=True, | |
| post_clip_max=70, | |
| ) | |
| ) | |
| elif args.align == "metric": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| metric_scale=True, | |
| use_gpu=True, | |
| post_clip_max=70, | |
| ) | |
| ) | |
| gathered_depth_metrics.append(depth_results) | |
| depth_log_path = f"{args.output_dir}/result_{args.align}.json" | |
| average_metrics = { | |
| key: np.average( | |
| [metrics[key] for metrics in gathered_depth_metrics], | |
| weights=[ | |
| metrics["valid_pixels"] for metrics in gathered_depth_metrics | |
| ], | |
| ) | |
| for key in gathered_depth_metrics[0].keys() | |
| if key != "valid_pixels" | |
| } | |
| print("Average depth evaluation metrics:", average_metrics) | |
| with open(depth_log_path, "w") as f: | |
| f.write(json.dumps(average_metrics)) | |
| get_video_results() | |
| elif args.eval_dataset == "bonn": | |
| def depth_read(filename): | |
| # loads depth map D from png file | |
| # and returns it as a numpy array | |
| depth_png = np.asarray(Image.open(filename)) | |
| # make sure we have a proper 16bit depth map here.. not 8bit! | |
| assert np.max(depth_png) > 255 | |
| depth = depth_png.astype(np.float64) / 5000.0 | |
| depth[depth_png == 0] = -1.0 | |
| return depth | |
| seq_list = ["balloon2", "crowd2", "crowd3", "person_tracking2", "synchronous"] | |
| img_pathes_folder = [ | |
| f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/rgb_110/*.png" | |
| for seq in seq_list | |
| ] | |
| img_pathes = [] | |
| for img_pathes_folder_i in img_pathes_folder: | |
| img_pathes += glob.glob(img_pathes_folder_i) | |
| img_pathes = sorted(img_pathes) | |
| depth_pathes_folder = [ | |
| f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/depth_110/*.png" | |
| for seq in seq_list | |
| ] | |
| depth_pathes = [] | |
| for depth_pathes_folder_i in depth_pathes_folder: | |
| depth_pathes += glob.glob(depth_pathes_folder_i) | |
| depth_pathes = sorted(depth_pathes) | |
| pred_pathes = glob.glob( | |
| f"{args.output_dir}/*/frame*.npy" | |
| ) # TODO: update the path to your prediction | |
| pred_pathes = sorted(pred_pathes) | |
| def get_video_results(): | |
| grouped_pred_depth = group_by_directory(pred_pathes) | |
| grouped_gt_depth = group_by_directory(depth_pathes, idx=-2) | |
| gathered_depth_metrics = [] | |
| for key in tqdm(grouped_gt_depth.keys()): | |
| pd_pathes = grouped_pred_depth[key[10:]] | |
| gt_pathes = grouped_gt_depth[key] | |
| gt_depth = np.stack( | |
| [depth_read(gt_path) for gt_path in gt_pathes], axis=0 | |
| ) | |
| pr_depth = np.stack( | |
| [ | |
| cv2.resize( | |
| np.load(pd_path), | |
| (gt_depth.shape[2], gt_depth.shape[1]), | |
| interpolation=cv2.INTER_CUBIC, | |
| ) | |
| for pd_path in pd_pathes | |
| ], | |
| axis=0, | |
| ) | |
| # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment | |
| if args.align == "scale&shift": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| align_with_lad2=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| elif args.align == "scale": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| align_with_scale=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| elif args.align == "metric": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=70, | |
| metric_scale=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| gathered_depth_metrics.append(depth_results) | |
| # seq_len = gt_depth.shape[0] | |
| # error_map = error_map.reshape(seq_len, -1, error_map.shape[-1]).cpu() | |
| # error_map_colored = colorize(error_map, range=(error_map.min(), error_map.max()), append_cbar=True) | |
| # ImageSequenceClip([x for x in (error_map_colored.numpy()*255).astype(np.uint8)], fps=10).write_videofile(f'{args.output_dir}/errormap_{key}_{args.align}.mp4', fps=10) | |
| depth_log_path = f"{args.output_dir}/result_{args.align}.json" | |
| average_metrics = { | |
| key: np.average( | |
| [metrics[key] for metrics in gathered_depth_metrics], | |
| weights=[ | |
| metrics["valid_pixels"] for metrics in gathered_depth_metrics | |
| ], | |
| ) | |
| for key in gathered_depth_metrics[0].keys() | |
| if key != "valid_pixels" | |
| } | |
| print("Average depth evaluation metrics:", average_metrics) | |
| with open(depth_log_path, "w") as f: | |
| f.write(json.dumps(average_metrics)) | |
| get_video_results() | |
| elif args.eval_dataset == "kitti": | |
| def depth_read(filename): | |
| # loads depth map D from png file | |
| # and returns it as a numpy array, | |
| # for details see readme.txt | |
| img_pil = Image.open(filename) | |
| depth_png = np.array(img_pil, dtype=int) | |
| # make sure we have a proper 16bit depth map here.. not 8bit! | |
| assert np.max(depth_png) > 255 | |
| depth = depth_png.astype(float) / 256.0 | |
| depth[depth_png == 0] = -1.0 | |
| return depth | |
| depth_pathes = glob.glob( | |
| "data/kitti/depth_selection/val_selection_cropped/groundtruth_depth_gathered/*/*.png" | |
| ) | |
| depth_pathes = sorted(depth_pathes) | |
| pred_pathes = glob.glob( | |
| f"{args.output_dir}/*/frame_*.npy" | |
| ) # TODO: update the path to your prediction | |
| pred_pathes = sorted(pred_pathes) | |
| def get_video_results(): | |
| grouped_pred_depth = group_by_directory(pred_pathes) | |
| grouped_gt_depth = group_by_directory(depth_pathes) | |
| gathered_depth_metrics = [] | |
| for key in tqdm(grouped_pred_depth.keys()): | |
| pd_pathes = grouped_pred_depth[key] | |
| gt_pathes = grouped_gt_depth[key] | |
| gt_depth = np.stack( | |
| [depth_read(gt_path) for gt_path in gt_pathes], axis=0 | |
| ) | |
| pr_depth = np.stack( | |
| [ | |
| cv2.resize( | |
| np.load(pd_path), | |
| (gt_depth.shape[2], gt_depth.shape[1]), | |
| interpolation=cv2.INTER_CUBIC, | |
| ) | |
| for pd_path in pd_pathes | |
| ], | |
| axis=0, | |
| ) | |
| # for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment | |
| if args.align == "scale&shift": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=None, | |
| align_with_lad2=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| elif args.align == "scale": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=None, | |
| align_with_scale=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| elif args.align == "metric": | |
| depth_results, error_map, depth_predict, depth_gt = ( | |
| depth_evaluation( | |
| pr_depth, | |
| gt_depth, | |
| max_depth=None, | |
| metric_scale=True, | |
| use_gpu=True, | |
| ) | |
| ) | |
| gathered_depth_metrics.append(depth_results) | |
| depth_log_path = f"{args.output_dir}/result_{args.align}.json" | |
| average_metrics = { | |
| key: np.average( | |
| [metrics[key] for metrics in gathered_depth_metrics], | |
| weights=[ | |
| metrics["valid_pixels"] for metrics in gathered_depth_metrics | |
| ], | |
| ) | |
| for key in gathered_depth_metrics[0].keys() | |
| if key != "valid_pixels" | |
| } | |
| print("Average depth evaluation metrics:", average_metrics) | |
| with open(depth_log_path, "w") as f: | |
| f.write(json.dumps(average_metrics)) | |
| get_video_results() | |
| if __name__ == "__main__": | |
| args = get_args_parser() | |
| args = args.parse_args() | |
| main(args) | |