Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from PIL import Image | |
| try: | |
| from tqdm import trange | |
| except: | |
| from builtins import range as trange | |
| import torch, gc | |
| import cv2 | |
| import os.path | |
| import numpy as np | |
| import copy | |
| import platform | |
| import math | |
| # Our code | |
| from src.misc import * | |
| from src.common_constants import GenerationOptions as go | |
| from src.common_constants import * | |
| from src.stereoimage_generation import create_stereoimages | |
| from src.normalmap_generation import create_normalmap | |
| from src.depthmap_generation import ModelHolder | |
| from src import backbone | |
| try: | |
| # 3d-photo-inpainting imports | |
| from inpaint.mesh import write_mesh, read_mesh, output_3d_photo | |
| from inpaint.networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net | |
| from inpaint.utils import path_planning | |
| from inpaint.bilateral_filtering import sparse_bilateral_filtering | |
| except Exception as e: | |
| print('Impaint import failed. Impaint will not work.') | |
| import traceback | |
| traceback.print_exc() | |
| global video_mesh_data, video_mesh_fn | |
| video_mesh_data = None | |
| video_mesh_fn = None | |
| model_holder = ModelHolder() | |
| def convert_to_i16(arr): | |
| # Single channel, 16 bit image. This loses some precision! | |
| # uint16 conversion uses round-down, therefore values should be [0; 2**16) | |
| numbytes = 2 | |
| max_val = (2 ** (8 * numbytes)) | |
| out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing | |
| return out.astype("uint16") | |
| def convert_i16_to_rgb(image, like): | |
| # three channel, 8 bits per channel image | |
| output = np.zeros_like(like) | |
| output[:, :, 0] = image / 256.0 | |
| output[:, :, 1] = image / 256.0 | |
| output[:, :, 2] = image / 256.0 | |
| return output | |
| class CoreGenerationFunnelInp: | |
| """This class takes a dictionary and creates a core_generation_funnel inp. | |
| Non-applicable parameters are silently discarded (no error)""" | |
| def __init__(self, values): | |
| if isinstance(values, CoreGenerationFunnelInp): | |
| values = values.values | |
| values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()} | |
| self.values = {} | |
| for setting in GenerationOptions: | |
| name = setting.name.lower() | |
| self.values[name] = values[name] if name in values else setting.df | |
| def __getitem__(self, item): | |
| if isinstance(item, GenerationOptions): | |
| return self.values[item.name.lower()] | |
| return self.values[item] | |
| def __getattr__(self, item): | |
| return self[item] | |
| def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): | |
| if len(inputimages) == 0 or inputimages[0] is None: | |
| return | |
| if inputdepthmaps is None or len(inputdepthmaps) == 0: | |
| inputdepthmaps: list[Image] = [None for _ in range(len(inputimages))] | |
| inputdepthmaps_complete = all([x is not None for x in inputdepthmaps]) | |
| inp = CoreGenerationFunnelInp(inp) | |
| if ops is None: | |
| ops = backbone.gather_ops() | |
| model_holder.update_settings(**ops) | |
| # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure | |
| print(SCRIPT_FULL_NAME) | |
| print(f'Backbone: {backbone.USED_BACKBONE.name}') | |
| backbone.unload_sd_model() | |
| # TODO: this still should not be here | |
| background_removed_images = [] | |
| # remove on base image before depth calculation | |
| if inp[go.GEN_REMBG]: | |
| if inp[go.PRE_DEPTH_BACKGROUND_REMOVAL]: | |
| inputimages = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) | |
| background_removed_images = inputimages | |
| else: | |
| background_removed_images = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) | |
| # init torch device | |
| if inp[go.COMPUTE_DEVICE] == 'GPU': | |
| if torch.cuda.is_available(): | |
| device = torch.device("cuda") | |
| else: | |
| print('WARNING: Cuda device was not found, cpu will be used') | |
| device = torch.device("cpu") | |
| else: | |
| device = torch.device("cpu") | |
| print("device: %s" % device) | |
| # TODO: This should not be here | |
| inpaint_imgs = [] | |
| inpaint_depths = [] | |
| try: | |
| if not inputdepthmaps_complete: | |
| print("Loading model(s) ..") | |
| model_holder.ensure_models(inp[go.MODEL_TYPE], device, inp[go.BOOST], inp[go.TILING_MODE]) | |
| print("Computing output(s) ..") | |
| # iterate over input images | |
| for count in trange(0, len(inputimages)): | |
| # Convert single channel input (PIL) images to rgb | |
| if inputimages[count].mode == 'I': | |
| inputimages[count].point(lambda p: p * 0.0039063096, mode='RGB') | |
| inputimages[count] = inputimages[count].convert('RGB') | |
| raw_prediction = None | |
| """Raw prediction, as returned by a model. None if input depthmap is used.""" | |
| raw_prediction_invert = False | |
| """True if near=dark on raw_prediction""" | |
| out = None | |
| if inputdepthmaps is not None and inputdepthmaps[count] is not None: | |
| # use custom depthmap | |
| dp = inputdepthmaps[count] | |
| if isinstance(dp, Image.Image): | |
| if dp.width != inputimages[count].width or dp.height != inputimages[count].height: | |
| try: # LANCZOS may fail on some formats | |
| dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS) | |
| except: | |
| dp = dp.resize((inputimages[count].width, inputimages[count].height)) | |
| # Trying desperately to rescale image to [0;1) without actually normalizing it | |
| # Normalizing is avoided, because we want to preserve the scale of the original depthmaps | |
| # (batch mode, video mode). | |
| if len(dp.getbands()) == 1: | |
| out = np.asarray(dp, dtype="float") | |
| out_max = out.max() | |
| if out_max < 256: | |
| bit_depth = 8 | |
| elif out_max < 65536: | |
| bit_depth = 16 | |
| else: | |
| bit_depth = 32 | |
| out /= 2.0 ** bit_depth | |
| else: | |
| out = np.asarray(dp, dtype="float")[:, :, 0] | |
| out /= 256.0 | |
| else: | |
| # Should be in interval [0; 1], values outside of this range will be clipped. | |
| out = np.asarray(dp, dtype="float") | |
| assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch" | |
| assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch" | |
| else: | |
| # override net size (size may be different for different images) | |
| if inp[go.NET_SIZE_MATCH]: | |
| # Round up to a multiple of 32 to avoid potential issues | |
| # TODO: buggs for Depth Anything | |
| net_width = (inputimages[count].width + 31) // 32 * 32 | |
| net_height = (inputimages[count].height + 31) // 32 * 32 | |
| else: | |
| net_width = inp[go.NET_WIDTH] | |
| net_height = inp[go.NET_HEIGHT] | |
| raw_prediction, raw_prediction_invert = \ | |
| model_holder.get_raw_prediction(inputimages[count], net_width, net_height) | |
| # output | |
| if abs(raw_prediction.max() - raw_prediction.min()) > np.finfo("float").eps: | |
| out = np.copy(raw_prediction) | |
| # TODO: some models may output negative values, maybe these should be clamped to zero. | |
| if raw_prediction_invert: | |
| out *= -1 | |
| if inp[go.DO_OUTPUT_DEPTH_PREDICTION]: | |
| yield count, 'depth_prediction', np.copy(out) | |
| if inp[go.CLIPDEPTH]: | |
| if inp[go.CLIPDEPTH_MODE] == 'Range': | |
| out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] | |
| out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]) | |
| elif inp[go.CLIPDEPTH_MODE] == 'Outliers': | |
| fb, nb = np.percentile(out, [inp[go.CLIPDEPTH_FAR] * 100.0, inp[go.CLIPDEPTH_NEAR] * 100.0]) | |
| out = np.clip(out, fb, nb) | |
| out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] | |
| else: | |
| # Regretfully, the depthmap is broken and will be replaced with a black image | |
| out = np.zeros(raw_prediction.shape) | |
| # Maybe we should not use img_output for everything, since we get better accuracy from | |
| # the raw_prediction. However, it is not always supported. We maybe would like to achieve | |
| # reproducibility, so depthmap of the image should be the same as generating the depthmap one more time. | |
| img_output = convert_to_i16(out) | |
| """Depthmap (near=bright), as uint16""" | |
| # if 3dinpainting, store maps for processing in second pass | |
| if inp[go.GEN_INPAINTED_MESH]: | |
| inpaint_imgs.append(inputimages[count]) | |
| inpaint_depths.append(img_output) | |
| # applying background masks after depth | |
| if inp[go.GEN_REMBG]: | |
| print('applying background masks') | |
| background_removed_image = background_removed_images[count] | |
| # maybe a threshold cut would be better on the line below. | |
| background_removed_array = np.array(background_removed_image) | |
| bg_mask = (background_removed_array[:, :, 0] == 0) & (background_removed_array[:, :, 1] == 0) & ( | |
| background_removed_array[:, :, 2] == 0) & (background_removed_array[:, :, 3] <= 0.2) | |
| img_output[bg_mask] = 0 # far value | |
| yield count, 'background_removed', background_removed_image | |
| if inp[go.SAVE_BACKGROUND_REMOVAL_MASKS]: | |
| bg_array = (1 - bg_mask.astype('int8')) * 255 | |
| mask_array = np.stack((bg_array, bg_array, bg_array, bg_array), axis=2) | |
| mask_image = Image.fromarray(mask_array.astype(np.uint8)) | |
| yield count, 'foreground_mask', mask_image | |
| # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, | |
| # custom depthmap will be outputed | |
| if inp[go.DO_OUTPUT_DEPTH]: | |
| img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output | |
| if inp[go.OUTPUT_DEPTH_COMBINE]: | |
| axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 | |
| img_concat = Image.fromarray(np.concatenate( | |
| (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), | |
| axis=axis)) | |
| yield count, 'concat_depth', img_concat | |
| else: | |
| yield count, 'depth', Image.fromarray(img_depth) | |
| if inp[go.GEN_STEREO]: | |
| # print("Generating stereoscopic image(s)..") | |
| stereoimages = create_stereoimages( | |
| inputimages[count], img_output, | |
| inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], | |
| inp[go.STEREO_MODES], | |
| inp[go.STEREO_BALANCE], inp[go.STEREO_OFFSET_EXPONENT], inp[go.STEREO_FILL_ALGO]) | |
| for c in range(0, len(stereoimages)): | |
| yield count, inp[go.STEREO_MODES][c], stereoimages[c] | |
| if inp[go.GEN_NORMALMAP]: | |
| normalmap = create_normalmap( | |
| img_output, | |
| inp[go.NORMALMAP_PRE_BLUR_KERNEL] if inp[go.NORMALMAP_PRE_BLUR] else None, | |
| inp[go.NORMALMAP_SOBEL_KERNEL] if inp[go.NORMALMAP_SOBEL] else None, | |
| inp[go.NORMALMAP_POST_BLUR_KERNEL] if inp[go.NORMALMAP_POST_BLUR] else None, | |
| inp[go.NORMALMAP_INVERT] | |
| ) | |
| yield count, 'normalmap', normalmap | |
| if inp[go.GEN_HEATMAP]: | |
| from dzoedepth.utils.misc import colorize | |
| heatmap = Image.fromarray(colorize(img_output, cmap='inferno')) | |
| yield count, 'heatmap', heatmap | |
| # gen mesh | |
| if inp[go.GEN_SIMPLE_MESH]: | |
| print(f"\nGenerating (occluded) mesh ..") | |
| basename = 'depthmap' | |
| meshsimple_fi = get_uniquefn(outpath, basename, 'obj', 'simple') | |
| depthi = raw_prediction if raw_prediction is not None else out | |
| depthi_min, depthi_max = depthi.min(), depthi.max() | |
| # try to map output to sensible values for non zoedepth models, boost, or custom maps | |
| if inp[go.MODEL_TYPE] not in [7, 8, 9] or inp[go.BOOST] or inputdepthmaps[count] is not None: | |
| # invert if midas | |
| if inp[go.MODEL_TYPE] > 0 or inputdepthmaps[count] is not None: # TODO: Weird | |
| depthi = depthi_max - depthi + depthi_min | |
| depth_max = depthi.max() | |
| depth_min = depthi.min() | |
| # make positive | |
| if depthi_min < 0: | |
| depthi = depthi - depthi_min | |
| depth_max = depthi.max() | |
| depth_min = depthi.min() | |
| # scale down | |
| if depthi.max() > 10.0: | |
| depthi = 4.0 * (depthi - depthi_min) / (depthi_max - depthi_min) | |
| # offset | |
| depthi = depthi + 1.0 | |
| mesh = create_mesh(inputimages[count], depthi, keep_edges=not inp[go.SIMPLE_MESH_OCCLUDE], | |
| spherical=(inp[go.SIMPLE_MESH_SPHERICAL])) | |
| mesh.export(meshsimple_fi) | |
| yield count, 'simple_mesh', meshsimple_fi | |
| print("Computing output(s) done.") | |
| except Exception as e: | |
| import traceback | |
| if 'out of memory' in str(e).lower(): | |
| print(str(e)) | |
| suggestion = "out of GPU memory, could not generate depthmap! " \ | |
| "Here are some suggestions to work around this issue:\n" | |
| if inp[go.BOOST]: | |
| suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n" | |
| if backbone.USED_BACKBONE != backbone.BackboneType.STANDALONE: | |
| suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n" | |
| if device != torch.device("cpu"): | |
| suggestion += " * Select CPU as the processing device (this will be slower)\n" | |
| if inp[go.MODEL_TYPE] != 6: | |
| suggestion +=\ | |
| " * Use a different model (generally, more memory-consuming models produce better depthmaps)\n" | |
| if not inp[go.BOOST]: | |
| suggestion += " * Reduce net size (this could reduce quality)\n" | |
| print('Fail.\n') | |
| raise Exception(suggestion) | |
| else: | |
| print('Fail.\n') | |
| raise e | |
| finally: | |
| if backbone.get_opt('depthmap_script_keepmodels', True): | |
| model_holder.offload() # Swap to CPU memory | |
| else: | |
| model_holder.unload_models() | |
| gc.collect() | |
| backbone.torch_gc() | |
| # TODO: This should not be here | |
| if inp[go.GEN_INPAINTED_MESH]: | |
| try: | |
| mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath, | |
| inp[go.GEN_INPAINTED_MESH_DEMOS], | |
| 1, "mp4") | |
| yield 0, 'inpainted_mesh', mesh_fi | |
| except Exception as e: | |
| print(f'{str(e)}, some issue with generating inpainted mesh') | |
| backbone.reload_sd_model() | |
| print("All done.\n") | |
| def get_uniquefn(outpath, basename, ext, suffix=''): | |
| basecount = backbone.get_next_sequence_number(outpath, basename) | |
| if basecount > 0: | |
| basecount -= 1 | |
| if suffix != '': | |
| suffix = f'-{suffix}' # Dash is important for selecting unique filenames (see get_next_sequence_number) | |
| for i in range(500): | |
| fullfn = os.path.join(outpath, f"{basename}-{basecount + i:04}{suffix}.{ext}") | |
| if not os.path.exists(fullfn): | |
| return fullfn | |
| return f"{basename}-99999{suffix}.{ext}" # Failback, should never be executed | |
| def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, gen_inpainted_mesh_demos, vid_ssaa, vid_format): | |
| mesh_fi = '' | |
| try: | |
| print("Running 3D Photo Inpainting .. ") | |
| edgemodel_path = './models/3dphoto/edge_model.pth' | |
| depthmodel_path = './models/3dphoto/depth_model.pth' | |
| colormodel_path = './models/3dphoto/color_model.pth' | |
| # create paths to model if not present | |
| os.makedirs('./models/3dphoto/', exist_ok=True) | |
| ensure_file_downloaded( | |
| edgemodel_path, | |
| ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/edge-model.pth", | |
| "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth"], | |
| "b1d768bd008ad5fe9f540004f870b8c3d355e4939b2009aa4db493fd313217c9") | |
| ensure_file_downloaded( | |
| depthmodel_path, | |
| ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/depth-model.pth", | |
| "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth"], | |
| "2d0e63e89a22762ddfa8bc8c9f8c992e5532b140123274ffc6e4171baa1b76f8") | |
| ensure_file_downloaded( | |
| colormodel_path, | |
| ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/color-model.pth", | |
| "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth"], | |
| "383c9b1db70097907a6f9c8abb0303e7056f50d5456a36f34ab784592b8b2c20" | |
| ) | |
| print("Loading edge model ..") | |
| depth_edge_model = Inpaint_Edge_Net(init_weights=True) | |
| depth_edge_weight = torch.load(edgemodel_path, map_location=torch.device(device)) | |
| depth_edge_model.load_state_dict(depth_edge_weight) | |
| depth_edge_model = depth_edge_model.to(device) | |
| depth_edge_model.eval() | |
| print("Loading depth model ..") | |
| depth_feat_model = Inpaint_Depth_Net() | |
| depth_feat_weight = torch.load(depthmodel_path, map_location=torch.device(device)) | |
| depth_feat_model.load_state_dict(depth_feat_weight, strict=True) | |
| depth_feat_model = depth_feat_model.to(device) | |
| depth_feat_model.eval() | |
| depth_feat_model = depth_feat_model.to(device) | |
| print("Loading rgb model ..") | |
| rgb_model = Inpaint_Color_Net() | |
| rgb_feat_weight = torch.load(colormodel_path, map_location=torch.device(device)) | |
| rgb_model.load_state_dict(rgb_feat_weight) | |
| rgb_model.eval() | |
| rgb_model = rgb_model.to(device) | |
| config = {} | |
| config["gpu_ids"] = 0 | |
| config['extrapolation_thickness'] = 60 | |
| config['extrapolate_border'] = True | |
| config['depth_threshold'] = 0.04 | |
| config['redundant_number'] = 12 | |
| config['ext_edge_threshold'] = 0.002 | |
| config['background_thickness'] = 70 | |
| config['context_thickness'] = 140 | |
| config['background_thickness_2'] = 70 | |
| config['context_thickness_2'] = 70 | |
| config['log_depth'] = True | |
| config['depth_edge_dilate'] = 10 | |
| config['depth_edge_dilate_2'] = 5 | |
| config['largest_size'] = 512 | |
| config['repeat_inpaint_edge'] = True | |
| config['ply_fmt'] = "bin" | |
| config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False) | |
| config['save_obj'] = True | |
| if device == torch.device("cpu"): | |
| config["gpu_ids"] = -1 | |
| for count in trange(0, len(img_rgb)): | |
| basename = 'depthmap' | |
| if inputnames is not None: | |
| if inputnames[count] is not None: | |
| p = Path(inputnames[count]) | |
| basename = p.stem | |
| mesh_fi = get_uniquefn(outpath, basename, 'obj') | |
| print(f"\nGenerating inpainted mesh .. (go make some coffee) ..") | |
| # from inpaint.utils.get_MiDaS_samples | |
| W = img_rgb[count].width | |
| H = img_rgb[count].height | |
| int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) | |
| if int_mtx.max() > 1: | |
| int_mtx[0, :] = int_mtx[0, :] / float(W) | |
| int_mtx[1, :] = int_mtx[1, :] / float(H) | |
| # how inpaint.utils.read_MiDaS_depth() imports depthmap | |
| disp = img_depth[count].astype(np.float32) | |
| disp = disp - disp.min() | |
| disp = cv2.blur(disp / disp.max(), ksize=(3, 3)) * disp.max() | |
| disp = (disp / disp.max()) * 3.0 | |
| depth = 1. / np.maximum(disp, 0.05) | |
| # rgb input | |
| img = np.asarray(img_rgb[count]) | |
| if len(img.shape) > 2 and img.shape[2] == 4: | |
| # convert the image from RGBA2RGB | |
| img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) | |
| # run sparse bilateral filter | |
| config['sparse_iter'] = 5 | |
| config['filter_size'] = [7, 7, 5, 5, 5] | |
| config['sigma_s'] = 4.0 | |
| config['sigma_r'] = 0.5 | |
| vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), img.copy(), config, | |
| num_iter=config['sparse_iter'], spdb=False) | |
| depth = vis_depths[-1] | |
| # bilat_fn = os.path.join(outpath, basename +'_bilatdepth.png') | |
| # cv2.imwrite(bilat_fn, depth) | |
| rt_info = write_mesh(img, | |
| depth, | |
| int_mtx, | |
| mesh_fi, | |
| config, | |
| rgb_model, | |
| depth_edge_model, | |
| depth_edge_model, | |
| depth_feat_model) | |
| if rt_info is not False and gen_inpainted_mesh_demos: | |
| run_3dphoto_videos(mesh_fi, basename, outpath, 300, 40, | |
| [0.03, 0.03, 0.05, 0.03], | |
| ['double-straight-line', 'double-straight-line', 'circle', 'circle'], | |
| [0.00, 0.00, -0.015, -0.015], | |
| [0.00, 0.00, -0.015, -0.00], | |
| [-0.05, -0.05, -0.05, -0.05], | |
| ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa) | |
| backbone.torch_gc() | |
| finally: | |
| del rgb_model | |
| rgb_model = None | |
| del depth_edge_model | |
| depth_edge_model = None | |
| del depth_feat_model | |
| depth_feat_model = None | |
| backbone.torch_gc() | |
| return mesh_fi | |
| def run_3dphoto_videos(mesh_fi, basename, outpath, num_frames, fps, crop_border, traj_types, x_shift_range, | |
| y_shift_range, z_shift_range, video_postfix, vid_dolly, vid_format, vid_ssaa): | |
| import vispy | |
| try: | |
| if platform.system() == 'Windows': | |
| vispy.use(app='PyQt5') | |
| elif platform.system() == 'Darwin': | |
| vispy.use('PyQt6') | |
| else: | |
| vispy.use(app='egl') | |
| except: | |
| import traceback | |
| print(traceback.format_exc()) | |
| print('Trying an alternative...') | |
| for u in ['PyQt5', 'PyQt6', 'egl']: | |
| try: | |
| vispy.use(app=u) | |
| break | |
| except: | |
| print(f'On {u}') | |
| print(traceback.format_exc()) | |
| # Honestly, I don't know if it actually helps at all | |
| # read ply | |
| global video_mesh_data, video_mesh_fn | |
| if video_mesh_fn is None or video_mesh_fn != mesh_fi: | |
| try: | |
| del video_mesh_data | |
| except: | |
| print("del video_mesh_data failed") | |
| video_mesh_fn = mesh_fi | |
| video_mesh_data = read_mesh(mesh_fi) | |
| verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth = video_mesh_data | |
| original_w = output_w = W = Width | |
| original_h = output_h = H = Height | |
| int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) | |
| if int_mtx.max() > 1: | |
| int_mtx[0, :] = int_mtx[0, :] / float(W) | |
| int_mtx[1, :] = int_mtx[1, :] / float(H) | |
| config = {} | |
| config['video_folder'] = outpath | |
| config['num_frames'] = num_frames | |
| config['fps'] = fps | |
| config['crop_border'] = crop_border | |
| config['traj_types'] = traj_types | |
| config['x_shift_range'] = x_shift_range | |
| config['y_shift_range'] = y_shift_range | |
| config['z_shift_range'] = z_shift_range | |
| config['video_postfix'] = video_postfix | |
| config['ssaa'] = vid_ssaa | |
| # from inpaint.utils.get_MiDaS_samples | |
| generic_pose = np.eye(4) | |
| assert len(config['traj_types']) == len(config['x_shift_range']) == \ | |
| len(config['y_shift_range']) == len(config['z_shift_range']) == len(config['video_postfix']), \ | |
| "The number of elements in 'traj_types', 'x_shift_range', 'y_shift_range', 'z_shift_range' and \ | |
| 'video_postfix' should be equal." | |
| tgt_pose = [[generic_pose * 1]] | |
| tgts_poses = [] | |
| for traj_idx in range(len(config['traj_types'])): | |
| tgt_poses = [] | |
| sx, sy, sz = path_planning(config['num_frames'], config['x_shift_range'][traj_idx], | |
| config['y_shift_range'][traj_idx], | |
| config['z_shift_range'][traj_idx], path_type=config['traj_types'][traj_idx]) | |
| for xx, yy, zz in zip(sx, sy, sz): | |
| tgt_poses.append(generic_pose * 1.) | |
| tgt_poses[-1][:3, -1] = np.array([xx, yy, zz]) | |
| tgts_poses += [tgt_poses] | |
| tgt_pose = generic_pose * 1 | |
| # seems we only need the depthmap to calc mean_loc_depth, which is only used when doing 'dolly' | |
| # width and height are already in the ply file in the comments .. | |
| # might try to add the mean_loc_depth to it too | |
| # did just that | |
| # mean_loc_depth = img_depth[img_depth.shape[0]//2, img_depth.shape[1]//2] | |
| print("Generating videos ..") | |
| normal_canvas, all_canvas = None, None | |
| videos_poses, video_basename = copy.deepcopy(tgts_poses), basename | |
| top = (original_h // 2 - int_mtx[1, 2] * output_h) | |
| left = (original_w // 2 - int_mtx[0, 2] * output_w) | |
| down, right = top + output_h, left + output_w | |
| border = [int(xx) for xx in [top, down, left, right]] | |
| normal_canvas, all_canvas, fn_saved = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), | |
| copy.deepcopy(Height), copy.deepcopy(Width), | |
| copy.deepcopy(hFov), copy.deepcopy(vFov), | |
| copy.deepcopy(tgt_pose), config['video_postfix'], | |
| copy.deepcopy(generic_pose), | |
| copy.deepcopy(config['video_folder']), | |
| None, copy.deepcopy(int_mtx), config, None, | |
| videos_poses, video_basename, original_h, original_w, | |
| border=border, depth=None, normal_canvas=normal_canvas, | |
| all_canvas=all_canvas, | |
| mean_loc_depth=mean_loc_depth, dolly=vid_dolly, | |
| fnExt=vid_format) | |
| return fn_saved | |
| def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa, | |
| outpath=None, basename=None): | |
| if len(fn_mesh) == 0 or not os.path.exists(fn_mesh): | |
| raise Exception("Could not open mesh.") | |
| vid_ssaa = int(vid_ssaa) | |
| # traj type | |
| if vid_traj == 0: | |
| vid_traj = ['straight-line'] | |
| elif vid_traj == 1: | |
| vid_traj = ['double-straight-line'] | |
| elif vid_traj == 2: | |
| vid_traj = ['circle'] | |
| num_fps = int(vid_fps) | |
| num_frames = int(vid_numframes) | |
| shifts = vid_shift.split(',') | |
| if len(shifts) != 3: | |
| raise Exception("Translate requires 3 elements.") | |
| x_shift_range = [float(shifts[0])] | |
| y_shift_range = [float(shifts[1])] | |
| z_shift_range = [float(shifts[2])] | |
| borders = vid_border.split(',') | |
| if len(borders) != 4: | |
| raise Exception("Crop Border requires 4 elements.") | |
| crop_border = [float(borders[0]), float(borders[1]), float(borders[2]), float(borders[3])] | |
| if not outpath: | |
| outpath = backbone.get_outpath() | |
| if not basename: | |
| # output path and filename mess .. | |
| basename = Path(fn_mesh).stem | |
| # unique filename | |
| basecount = backbone.get_next_sequence_number(outpath, basename) | |
| if basecount > 0: basecount = basecount - 1 | |
| fullfn = None | |
| for i in range(500): | |
| fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}" | |
| fullfn = os.path.join(outpath, f"{fn}_." + vid_format) | |
| if not os.path.exists(fullfn): | |
| break | |
| basename = Path(fullfn).stem | |
| basename = basename[:-1] | |
| print("Loading mesh ..") | |
| fn_saved = run_3dphoto_videos(fn_mesh, basename, outpath, num_frames, num_fps, crop_border, vid_traj, x_shift_range, | |
| y_shift_range, z_shift_range, [''], dolly, vid_format, vid_ssaa) | |
| return fn_saved[-1], fn_saved[-1], '' | |
| def unload_models(): | |
| model_holder.unload_models() | |
| # TODO: code borrowed from the internet to be marked as such and to reside in separate files | |
| def batched_background_removal(inimages, model_name): | |
| from rembg import new_session, remove | |
| print('creating background masks') | |
| outimages = [] | |
| # model path and name | |
| bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg") | |
| os.makedirs(bg_model_dir, exist_ok=True) | |
| os.environ["U2NET_HOME"] = str(bg_model_dir) | |
| # starting a session | |
| background_removal_session = new_session(model_name) | |
| for count in range(0, len(inimages)): | |
| bg_remove_img = np.array(remove(inimages[count], session=background_removal_session)) | |
| outimages.append(Image.fromarray(bg_remove_img)) | |
| # The line below might be redundant | |
| del background_removal_session | |
| return outimages | |
| def pano_depth_to_world_points(depth): | |
| """ | |
| 360 depth to world points | |
| given 2D depth is an equirectangular projection of a spherical image | |
| Treat depth as radius | |
| longitude : -pi to pi | |
| latitude : -pi/2 to pi/2 | |
| """ | |
| # Convert depth to radius | |
| radius = depth.flatten() | |
| lon = np.linspace(-np.pi, np.pi, depth.shape[1]) | |
| lat = np.linspace(-np.pi / 2, np.pi / 2, depth.shape[0]) | |
| lon, lat = np.meshgrid(lon, lat) | |
| lon = lon.flatten() | |
| lat = lat.flatten() | |
| # Convert to cartesian coordinates | |
| x = radius * np.cos(lat) * np.cos(lon) | |
| y = radius * np.cos(lat) * np.sin(lon) | |
| z = radius * np.sin(lat) | |
| pts3d = np.stack([x, y, z], axis=1) | |
| return pts3d | |
| def depth_edges_mask(depth): | |
| """Returns a mask of edges in the depth map. | |
| Args: | |
| depth: 2D numpy array of shape (H, W) with dtype float32. | |
| Returns: | |
| mask: 2D numpy array of shape (H, W) with dtype bool. | |
| """ | |
| # Compute the x and y gradients of the depth map. | |
| depth_dx, depth_dy = np.gradient(depth) | |
| # Compute the gradient magnitude. | |
| depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) | |
| # Compute the edge mask. | |
| mask = depth_grad > 0.05 | |
| return mask | |
| def create_mesh(image, depth, keep_edges=False, spherical=False): | |
| import trimesh | |
| from dzoedepth.utils.geometry import depth_to_points, create_triangles | |
| maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048) | |
| # limit the size of the input image | |
| image.thumbnail((maxsize, maxsize)) | |
| if not spherical: | |
| pts3d = depth_to_points(depth[None]) | |
| else: | |
| pts3d = pano_depth_to_world_points(depth) | |
| pts3d = pts3d.reshape(-1, 3) | |
| verts = pts3d.reshape(-1, 3) | |
| image = np.array(image) | |
| if keep_edges: | |
| triangles = create_triangles(image.shape[0], image.shape[1]) | |
| else: | |
| triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) | |
| colors = image.reshape(-1, 3) | |
| mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) | |
| # rotate 90deg over X when spherical | |
| if spherical: | |
| angle = math.pi / 2 | |
| direction = [1, 0, 0] | |
| center = [0, 0, 0] | |
| rot_matrix = trimesh.transformations.rotation_matrix(angle, direction, center) | |
| mesh.apply_transform(rot_matrix) | |
| return mesh | |