| |
| |
| |
| |
| |
| |
|
|
| import copy |
|
|
| import cv2 |
| import numpy as np |
| import torch |
|
|
|
|
| class BaseModel(object): |
| """ |
| Simple BaseModel |
| """ |
|
|
| def cuda(self): |
| self.model.cuda() |
| return self |
|
|
| def cpu(self): |
| self.model.cpu() |
| return self |
|
|
| def float(self): |
| self.model.float() |
| return self |
|
|
| def to(self, device): |
| self.model.to(device) |
| return self |
|
|
| def eval(self): |
| self.model.eval() |
|
|
| return self |
|
|
| def train(self): |
| self.model.train() |
| return self |
|
|
| def __call__(self, x): |
| raise NotImplementedError |
|
|
| def __repr__(self): |
|
|
| return f"model: \n{self.model}" |
|
|
|
|
| def get_dtype_string(arr): |
| if arr.dtype == np.uint8: |
| return "uint8" |
| elif arr.dtype == np.float32: |
| return "float32" |
| elif arr.dtype == np.float64: |
| return "float" |
| else: |
| return "unknow" |
|
|
|
|
| class BaseSeg(BaseModel): |
| def __init__(self): |
| pass |
|
|
|
|
| class Bbox: |
| def __init__(self, box, mode="whwh"): |
|
|
| assert len(box) == 4 |
| assert mode in ["whwh", "xywh"] |
| self.box = box |
| self.mode = mode |
|
|
| def to_xywh(self): |
|
|
| if self.mode == "whwh": |
|
|
| l, t, r, b = self.box |
|
|
| center_x = (l + r) / 2 |
| center_y = (t + b) / 2 |
| width = r - l |
| height = b - t |
| return Bbox([center_x, center_y, width, height], mode="xywh") |
| else: |
| return self |
|
|
| def to_whwh(self): |
|
|
| if self.mode == "whwh": |
| return self |
| else: |
|
|
| cx, cy, w, h = self.box |
| l = cx - w // 2 |
| t = cy - h // 2 |
| r = cx + w - (w // 2) |
| b = cy + h - (h // 2) |
|
|
| return Bbox([l, t, r, b], mode="whwh") |
|
|
| def area(self): |
|
|
| box = self.to_xywh() |
| _, __, w, h = box.box |
|
|
| return w * h |
|
|
| def get_box(self): |
| return list(map(int, self.box)) |
|
|
| def scale(self, scale, width, height): |
| new_box = self.to_xywh() |
| cx, cy, w, h = new_box.get_box() |
| w = w * scale |
| h = h * scale |
|
|
| l = cx - w // 2 |
| t = cy - h // 2 |
| r = cx + w - (w // 2) |
| b = cy + h - (h // 2) |
|
|
| l = int(max(l, 0)) |
| t = int(max(t, 0)) |
| r = int(min(r, width)) |
| b = int(min(b, height)) |
|
|
| return Bbox([l, t, r, b], mode="whwh") |
|
|
| def __repr__(self): |
| box = self.to_whwh() |
| l, t, r, b = box.box |
|
|
| return f"BBox(left={l}, top={t}, right={r}, bottom={b})" |
|
|
|
|
| class Image: |
| """TODO need to debug""" |
|
|
| TYPE_ORDER = ["uint8", "float32", "float"] |
| ORDER = ["RGB", "BGR"] |
| MODE = ["numpy"] |
|
|
| def __init__(self, input, order="RGB", type_mode="uint8"): |
| """Only support 3 Channel Image""" |
| if isinstance(input, str): |
| self.data = self.read_image(input, type_mode, order) |
| else: |
| self.data = self.get_image(input, type_mode, order) |
|
|
| self.order = order |
| self.type_mode = type_mode |
|
|
| def get_image(self, input, type_mode, order): |
| if isinstance(input, Image): |
| return input.to_numpy(type_mode, order) |
| elif isinstance(input, np.ndarray): |
| self.data = input |
| self.order = "RGB" |
| self.type_mode = get_dtype_string(input) |
|
|
| return self.to_numpy(type_mode, order) |
| else: |
| raise NotImplementedError |
|
|
| def to_numpy(self, type_mode="uint8", order="RGB"): |
|
|
| data = copy.deepcopy(self.data) |
|
|
| if not order == self.order: |
| return data[..., ::-1] |
|
|
| if self.type_mode == type_mode: |
| return data |
| else: |
| if self.type_mode == "float32": |
| return (self.data / 255.0).astype(np.float32) |
| elif self.type_mode == "float": |
| return (self.data / 255.0).astype(np.float64) |
|
|
| def to_tensor(self, order): |
| data = self.to_numpy(type_mode="float32", order=order) |
| return torch.from_numpy(data) |
|
|
| def read_image( |
| self, |
| path, |
| mode, |
| order, |
| ): |
| """read an image file into various formats and color mode. |
| |
| Args: |
| path (str): path to the image file. |
| mode (Literal["float", "uint8", "pil", "torch", "tensor"], optional): returned image format. Defaults to "float". |
| float: float32 numpy array, range [0, 1]; |
| uint8: uint8 numpy array, range [0, 255]; |
| pil: PIL image; |
| torch/tensor: float32 torch tensor, range [0, 1]; |
| order (Literal["RGB", "RGBA", "BGR", "BGRA"], optional): channel order. Defaults to "RGB". |
| |
| Note: |
| By default this function will convert RGBA image to white-background RGB image. Use ``order="RGBA"`` to keep the alpha channel. |
| |
| Returns: |
| Union[np.ndarray, PIL.Image, torch.Tensor]: the image array. |
| """ |
|
|
| if mode == "pil": |
| return Image.open(path).convert(order) |
|
|
| img = cv2.imread(path, cv2.IMREAD_UNCHANGED) |
|
|
| |
| if len(img.shape) == 3: |
| if order in ["RGB", "RGBA"]: |
| if img.shape[-1] == 4: |
| img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) |
| elif img.shape[-1] == 3: |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
| |
| if img.shape[-1] == 4 and "A" not in order: |
| img = img.astype(np.float32) / 255 |
| img = img[..., :3] * img[..., 3:] + (1 - img[..., 3:]) |
|
|
| |
| if mode == "uint8": |
| if img.dtype != np.uint8: |
| img = (img * 255).astype(np.uint8) |
| elif mode == "float": |
| if img.dtype == np.uint8: |
| img = img.astype(np.float32) / 255 |
| else: |
| raise ValueError(f"Unknown read_image mode {mode}") |
|
|
| return img |
|
|