Spaces:

i4ata
/

CustomUnetSegmentation

Sleeping

App Files Files Community

i4ata commited on Jun 8, 2024

Commit

4fcc913

1 Parent(s): 7e91fbc

updated

Browse files

Files changed (9) hide show

app.py +24 -16
custom_unet.py +6 -76
early_stopper.py +0 -23
model.py +0 -130
models/{custom_unet.pth → custom_unet.pt} +2 -2
models/{unet.pth → pretrained_unet.pt} +2 -2
requirements.txt +1 -2
unet.py +0 -67
utils.py +20 -0

app.py CHANGED Viewed

@@ -1,36 +1,44 @@
 import gradio as gr
 from PIL import Image
 import os
 import torch
 import numpy as np
-from model import SegmentationModel
 from custom_unet import CustomUnet
-from unet import Unet
-from typing import Dict, Union, Tuple, List
 class GradioApp:
     def __init__(self) -> None:
-        self.models: Dict[str, Union[str, SegmentationModel]] = {
-            'Custom': 'custom_unet',
-            'Pretrained': 'unet'
         }
     def predict(self, img_file: str, model_name: str) -> Tuple[str, List[Tuple[np.ndarray, str]]]:
-        # Lazy loading of models
-        if isinstance(self.models[model_name], str):
-            model_class = CustomUnet if model_name == 'Custom' else Unet
-            self.models[model_name] = model_class(self.models[model_name], from_file=True, device='cpu')
-            self.models[model_name].eval()
-        prediction = self.models[model_name].predict(img_file, option='mask')[0] * 1
-        return img_file, [(prediction, 'person')]
     def launch(self):
         examples_list = [['examples/' + example] for example in os.listdir('examples')]

 import gradio as gr
 from PIL import Image
 import os
 import torch
 import numpy as np
+import torchvision.transforms as transforms
+from torchvision.transforms.functional import resize
+from typing import Tuple, List
 from custom_unet import CustomUnet
+from utils import val_transform, get_pretrained_unet
 class GradioApp:
     def __init__(self) -> None:
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        custom = CustomUnet().to(device).eval()
+        custom.load_state_dict(torch.load('models/custom_unet.pt', map_location=device))
+        pretrained = get_pretrained_unet().to(device).eval()
+        pretrained.load_state_dict(torch.load('models/pretrained_unet.pt', map_location=device))
+        self.models = {
+            'Custom': custom,
+            'Pretrained': pretrained
         }
     def predict(self, img_file: str, model_name: str) -> Tuple[str, List[Tuple[np.ndarray, str]]]:
+        image = image=np.asarray(Image.open(img_file))
+        h,w = image.shape[:-1]
+        image = torch.from_numpy(val_transform(image=image)['image']).float().permute(2,0,1) / 255.
+        with torch.inference_mode():
+            prediction = self.models[model_name](image.to(self.device).unsqueeze(0))[0].sigmoid().round().cpu()
+        mask = resize(img=prediction, size=(h,w), interpolation=transforms.InterpolationMode.NEAREST)[0].numpy()
+        return img_file, [(mask, 'person')]
     def launch(self):
         examples_list = [['examples/' + example] for example in os.listdir('examples')]

custom_unet.py CHANGED Viewed

@@ -7,46 +7,30 @@ Additional things: https://towardsdatascience.com/understanding-u-net-61276b10f3
 import torch
 import torch.nn as nn
-from torchinfo import summary
-from model import SegmentationModel
-from early_stopper import EarlyStopper
-from typing import Tuple, Union, Optional
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-class DiceLoss(nn.Module):
-    def forward(self, logits: torch.Tensor, mask_true: torch.Tensor):
-        logits = torch.sigmoid(logits) > .5
-        intersection = (logits * mask_true).sum()
-        union = logits.sum() + mask_true.sum()
-        return 2 * intersection / union
 class DoubleConv(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
-        self.relu = nn.ReLU()
         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding='same')
         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding='same')
     def forward(self, x: torch.Tensor):
-        return self.relu(self.conv2(self.relu(self.conv1(x))))
 class Up(nn.Module):
-    def __init__(self, in_channels, out_channels) -> None:
         super().__init__()
         self.upconv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=2, stride=2)
         self.conv = DoubleConv(in_channels=in_channels, out_channels=out_channels)
-    def forward(self, x_left, x_right):
         return self.conv(torch.cat((x_left, self.upconv(x_right)), dim=1))
-class UnetModel(nn.Module):
     def __init__(self, in_channels: int = 3, depth: int = 3, start_channels: int = 16) -> None:
@@ -65,8 +49,6 @@ class UnetModel(nn.Module):
             start_channels //= 2
         self.output_conv = nn.Conv2d(start_channels, 1, kernel_size=1)
-        self.pool = nn.MaxPool2d(2, 2)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -74,62 +56,10 @@ class UnetModel(nn.Module):
         xs = [x]
         for encoding_layer in self.encoder_layers:
-            x = encoding_layer(self.pool(x))
             xs.append(x)
         for decoding_layer, x_left in zip(self.decoder_layers, reversed(xs[:-1])):
             x = decoding_layer(x_left, x)
         return self.output_conv(x)
-class CustomUnet(SegmentationModel):
-    def __init__(self,
-                 name: str = 'default_name',
-                 from_file: bool = True,
-                 image_size: Tuple[int, int] = (320, 320),
-                 in_channels: int = 3,
-                 start_channels: int = 16,
-                 encoder_depth: int = 5,
-                 device: str = 'cuda' if torch.cuda.is_available() else 'cpu') -> None:
-        super().__init__()
-        assert image_size[0] % (2**encoder_depth) == 0
-        assert image_size[1] % (2**encoder_depth) == 0
-        self.name = name
-        self.image_size = image_size
-        self.in_channels = in_channels
-        self.device = device
-        self.save_path = f'models/{name}.pth'
-        if from_file:
-            self.unet = torch.load(self.save_path, map_location=device)
-        else:
-            self.unet = UnetModel(in_channels=in_channels, depth=encoder_depth, start_channels=start_channels).to(device)
-        self.bce_loss = nn.BCEWithLogitsLoss()
-        self.dice_loss = DiceLoss()
-        self.loss_fn = lambda logits, masks: self.bce_loss(logits, masks) + self.dice_loss(logits, masks)
-    def configure_optimizers(self, **kwargs):
-        self.optimizer = torch.optim.Adam(params=self.unet.parameters(), lr=kwargs['lr'])
-        self.early_stopper = EarlyStopper(patience=kwargs['patience'])
-    def forward(self, images: torch.Tensor, masks: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-        logits = self.unet(images)
-        if masks is None:
-            return logits
-        return logits, self.loss_fn(logits, masks)
-    def save(self) -> None:
-        # Save the whole model, not only the state dict, so that it will work for different unets
-        torch.save(self.unet, self.save_path)
-    def print_summary(self, batch_size: int = 16) -> None:
-        print(summary(self.unet, input_size=(batch_size, self.in_channels, *self.image_size),
-                      col_names=['input_size', 'output_size', 'num_params'],
-                      row_settings=['var_names']))

 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 class DoubleConv(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding='same')
         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding='same')
     def forward(self, x: torch.Tensor):
+        return F.relu(self.conv2(F.relu(self.conv1(x))))
 class Up(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.upconv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=2, stride=2)
         self.conv = DoubleConv(in_channels=in_channels, out_channels=out_channels)
+    def forward(self, x_left: torch.Tensor, x_right: torch.Tensor) -> torch.Tensor:
         return self.conv(torch.cat((x_left, self.upconv(x_right)), dim=1))
+class CustomUnet(nn.Module):
     def __init__(self, in_channels: int = 3, depth: int = 3, start_channels: int = 16) -> None:
             start_channels //= 2
         self.output_conv = nn.Conv2d(start_channels, 1, kernel_size=1)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         xs = [x]
         for encoding_layer in self.encoder_layers:
+            x = encoding_layer(F.max_pool2d(x, 2))
             xs.append(x)
         for decoding_layer, x_left in zip(self.decoder_layers, reversed(xs[:-1])):
             x = decoding_layer(x_left, x)
         return self.output_conv(x)

early_stopper.py DELETED Viewed

@@ -1,23 +0,0 @@
-"""This module contains a class that implements early stopping regularization technique"""
-class EarlyStopper:
-    def __init__(self, patience: int = 2):
-        self.patience = patience
-        self.best_loss = float('inf')
-        self.counter = 0
-        self.save_model = False
-    def check(self, validation_loss: float) -> bool:
-        self.save_model = False
-        if validation_loss > self.best_loss:
-            self.counter += 1
-            if self.counter == self.patience:
-                return True
-        else:
-            self.best_loss = validation_loss
-            self.counter = 0
-            self.save_model = True
-        return False

model.py DELETED Viewed

@@ -1,130 +0,0 @@
-"""This module contains the base class for segmentation models"""
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader
-from torchvision.utils import draw_segmentation_masks
-from torchvision.transforms.functional import resize
-from torch.utils.tensorboard.writer import SummaryWriter
-import numpy as np
-import cv2 as cv
-import albumentations as A
-from typing import Optional, Union, Tuple, Literal
-from early_stopper import EarlyStopper
-class SegmentationModel(nn.Module):
-    name: str = "base name"
-    device: Literal['cpu', 'cuda'] = None
-    optimizer: torch.optim.Optimizer = None
-    early_stopper: EarlyStopper = None
-    lr_scheduler: torch.optim.lr_scheduler.LRScheduler = None
-    save_path: str = None
-    image_size: Tuple[int, int] = None
-    def configure_optimizers(self, **kwargs) -> None:
-        raise NotImplementedError()
-    def forward(self, images: torch.Tensor, masks: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-        raise NotImplementedError()
-    def _train_step(self, data_loader: DataLoader) -> float:
-        self.train()
-        total_loss = 0.
-        for images, masks in data_loader:
-            images, masks = images.to(self.device), masks.to(self.device)
-            self.optimizer.zero_grad()
-            logits, loss = self(images, masks)
-            loss.backward()
-            self.optimizer.step()
-            total_loss += loss.item()
-        return total_loss / len(data_loader)
-    def _test_step(self, data_loader: DataLoader) -> float:
-        self.eval()
-        total_loss = 0.
-        with torch.inference_mode():
-            for images, masks in data_loader:
-                images, masks = images.to(self.device), masks.to(self.device)
-                logits, loss = self(images, masks)
-                total_loss += loss.item()
-        return total_loss / len(data_loader)
-    def train_model(self, train_loader: DataLoader, test_loader: DataLoader, epochs: int, log_dir: str) -> None:
-        writer = SummaryWriter(log_dir=f'{log_dir}/{self.name}')
-        for i in range(epochs):
-            train_loss = self._train_step(train_loader)
-            test_loss = self._test_step(test_loader)
-            if self.early_stopper is not None:
-                if self.early_stopper.check(test_loss):
-                    print(f'Model stopped early due to risk of overfitting')
-                    break
-                if self.early_stopper.save_model:
-                    self.save()
-                    print('saved model')
-            if self.lr_scheduler is not None:
-                self.lr_scheduler.step()
-            print(f'{i}: Train loss: {train_loss :.2} | Test loss: {test_loss :.2}')
-            writer.add_scalars(main_tag='Loss over time',
-                               tag_scalar_dict={'train loss': train_loss, 'test loss': test_loss},
-                               global_step=i)
-        else:
-            if self.early_stopper is not None:
-                print('Model did not converge. Possibility of underfitting')
-            self.save()
-        writer.close()
-    def save(self) -> None:
-        raise NotImplementedError()
-    def predict(self,
-                test_image_path: str,
-                option: Literal['mask', 'image_with_mask', 'mask_and_image_with_mask'] = 'image_with_mask'
-        ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
-        self.eval()
-        input_resizer = A.Resize(*self.image_size)
-        original_image = cv.cvtColor(cv.imread(test_image_path), cv.COLOR_BGR2RGB)
-        original_image_tensor = torch.from_numpy(original_image).permute(2,0,1).type(torch.uint8)
-        resized_image_tensor = (torch.from_numpy(input_resizer(image=original_image)['image']).float() / 255.).permute(2,0,1)
-        with torch.inference_mode():
-            logits = self(resized_image_tensor.unsqueeze(0).to(self.device)).squeeze(0).cpu().detach()
-        probs = torch.sigmoid(logits)
-        resized_mask_tensor = probs > .5
-        original_mask_tensor = resize(resized_mask_tensor, size=original_image.shape[:-1], antialias=True)
-        image_with_mask = draw_segmentation_masks(image=original_image_tensor,
-                                                  masks=original_mask_tensor,
-                                                  alpha=.5,
-                                                  colors='white')
-        if option == 'mask':
-            return original_mask_tensor.numpy()
-        if option == 'image_with_mask':
-            return image_with_mask.permute(1,2,0).numpy()
-        if option == 'mask_and_image_with_mask':
-            return original_mask_tensor.numpy(), image_with_mask.permute(1,2,0).numpy()
-    def print_summary(self) -> None:
-        raise NotImplementedError()

models/{custom_unet.pth → custom_unet.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eeca616e3026a77a2125e4c880f5335e1efa4a2c53b1ad4dad0082e227e49b85
-size 7812958

 version https://git-lfs.github.com/spec/v1
+oid sha256:a227b749031ac42b97c9833bd18e8c37b5104bb94546f2063310a73a9b912fe5
+size 1941386

models/{unet.pth → pretrained_unet.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d26354f766301bc980c66f3984599491a4c3dc35706dc3e31a95f115e30a74c6
-size 25378610

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b3fe6a191c10e927901529593049ba76ccbab708ec62a8d25fa2f8b46cb4ac2
+size 25339050

requirements.txt CHANGED Viewed

@@ -1,9 +1,8 @@
 torch
-torchinfo
 segmentation-models-pytorch
 albumentations
 opencv-python
 gradio
 numpy
 matplotlib
-tensorboard

 torch
 segmentation-models-pytorch
 albumentations
 opencv-python
 gradio
 numpy
 matplotlib
+tensorboard

unet.py DELETED Viewed

@@ -1,67 +0,0 @@
-"""This module defines a Unet architecture"""
-import torch.nn as nn
-import torch
-from torchinfo import summary
-import segmentation_models_pytorch as smp
-from early_stopper import EarlyStopper
-from model import SegmentationModel
-from typing import Optional, Union, Tuple
-class Unet(SegmentationModel):
-    def __init__(self,
-                 name: str = 'default_name',
-                 from_file: bool = True,
-                 image_size: Tuple[int, int] = (320, 320),
-                 encoder_name: str = 'timm-efficientnet-b0',
-                 pretrained: bool = True,
-                 in_channels: int = 3,
-                 encoder_depth: int = 5,
-                 device: str = 'cuda' if torch.cuda.is_available() else 'cpu') -> None:
-        super().__init__()
-        self.name = name
-        self.image_size = image_size
-        self.in_channels = in_channels
-        self.device = device
-        self.save_path = f'models/{name}.pth'
-        if from_file:
-            self.unet = torch.load(self.save_path, map_location=device)
-        else:
-            self.unet = smp.Unet(
-                encoder_name=encoder_name,
-                encoder_weights='imagenet' if pretrained else None,
-                in_channels=in_channels,
-                encoder_depth=encoder_depth,
-                classes=1,
-                activation=None
-            ).to(device)
-        bce_loss_fn = nn.BCEWithLogitsLoss()
-        dice_loss_fn = smp.losses.DiceLoss(mode='binary')
-        self.loss_fn = lambda logits, masks: bce_loss_fn(logits, masks) + dice_loss_fn(logits, masks)
-    def configure_optimizers(self, **kwargs):
-        self.optimizer = torch.optim.Adam(params=self.unet.parameters(), lr=kwargs['lr'])
-        self.early_stopper = EarlyStopper(patience=kwargs['patience'])
-    def forward(self, images: torch.Tensor, masks: Optional[torch.Tensor] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-        logits = self.unet(images)
-        if masks is None:
-            return logits
-        return logits, self.loss_fn(logits, masks)
-    def save(self) -> None:
-        # Save the whole model, not only the state dict, so that it will work for different unets
-        torch.save(self.unet, self.save_path)
-    def print_summary(self, batch_size: int = 16) -> None:
-        print(summary(self.unet, input_size=(batch_size, self.in_channels, *self.image_size),
-                      col_names=['input_size', 'output_size', 'num_params'],
-                      row_settings=['var_names']))

utils.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import albumentations as A
+from segmentation_models_pytorch import Unet
+val_transform = A.Compose(
+    transforms=[
+        A.Resize(320, 320)
+    ],
+    is_check_shapes=False
+)
+def get_pretrained_unet() -> Unet:
+    unet = Unet(
+        encoder_name='timm-efficientnet-b0',
+        encoder_weights='imagenet',
+        in_channels=3,
+        encoder_depth=5,
+        classes=1,
+        activation=None
+    )
+    return unet