Spaces:
Runtime error
Runtime error
| import cv2 | |
| import torch | |
| import torch.nn.functional as F | |
| import numpy as np | |
| from PIL import Image | |
| from modules import devices, masking | |
| from modules.shared import opts | |
| class DepthAnythingDetector: | |
| """https://github.com/LiheYoung/Depth-Anything""" | |
| def __init__(self, model): | |
| from torchvision.transforms import Compose | |
| from modules.control.proc.depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet | |
| self.model = model | |
| self.transform = Compose([ | |
| Resize( | |
| width=518, | |
| height=518, | |
| resize_target=False, | |
| keep_aspect_ratio=True, | |
| ensure_multiple_of=14, | |
| resize_method="lower_bound", | |
| image_interpolation_method=cv2.INTER_CUBIC, | |
| ), | |
| NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| PrepareForNet()]) | |
| def from_pretrained(cls, pretrained_model_or_path: str, cache_dir: str) -> str: | |
| from modules.control.proc.depth_anything.dpt import DPT_DINOv2 | |
| import huggingface_hub as hf | |
| model = ( | |
| DPT_DINOv2( | |
| encoder="vitl", | |
| features=256, | |
| out_channels=[256, 512, 1024, 1024], | |
| localhub=False, | |
| ) | |
| .to(devices.device) | |
| .eval() | |
| ) | |
| model_path = hf.hf_hub_download(repo_id=pretrained_model_or_path, filename="pytorch_model.bin", cache_dir=cache_dir) | |
| model_dict = torch.load(model_path) | |
| model.load_state_dict(model_dict) | |
| return cls(model) | |
| def __call__(self, image, color_map: str = "none", output_type: str = 'pil'): | |
| self.model.to(devices.device) | |
| if isinstance(image, Image.Image): | |
| image = np.array(image) | |
| h, w = image.shape[:2] | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 | |
| image = self.transform({ "image": image })["image"] | |
| image = torch.from_numpy(image).unsqueeze(0).to(devices.device) | |
| with devices.inference_context(): | |
| depth = self.model(image) | |
| if opts.control_move_processor: | |
| self.model.to('cpu') | |
| depth = F.interpolate(depth[None], (h, w), mode="bilinear", align_corners=False)[0, 0] | |
| depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 | |
| depth = depth.cpu().numpy().astype(np.uint8) | |
| if color_map != 'none': | |
| depth = cv2.applyColorMap(depth, masking.COLORMAP.index(color_map))[:, :, ::-1] | |
| if output_type == "pil": | |
| depth = Image.fromarray(depth) | |
| return depth | |
| # def unload_model(self): | |
| # self.model.to("cpu") | |