Spaces:

i4ata
/

CustomUnetSegmentation

Sleeping

App Files Files Community

i4ata commited on Apr 13, 2025

Commit

ed1f711

1 Parent(s): fb9b166

updated

Browse files

Files changed (7) hide show

__pycache__/custom_unet.cpython-310.pyc +0 -0
__pycache__/early_stopper.cpython-310.pyc +0 -0
__pycache__/model.cpython-310.pyc +0 -0
__pycache__/unet.cpython-310.pyc +0 -0
app.py +15 -20
custom_unet.py +6 -23
utils.py +0 -20

__pycache__/custom_unet.cpython-310.pyc DELETED Viewed

Binary file (5.68 kB)

__pycache__/early_stopper.cpython-310.pyc DELETED Viewed

Binary file (956 Bytes)

__pycache__/model.cpython-310.pyc DELETED Viewed

Binary file (4.59 kB)

__pycache__/unet.cpython-310.pyc DELETED Viewed

Binary file (2.81 kB)

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import gradio as gr
 from PIL import Image
-import os
 import torch
 import numpy as np
 import torchvision.transforms as transforms
 from torchvision.transforms.functional import resize
 from typing import Tuple, List
 from custom_unet import CustomUnet
-from utils import val_transform, get_pretrained_unet
 class GradioApp:
@@ -17,45 +18,39 @@ class GradioApp:
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        custom = CustomUnet().to(self.device).eval()
-        custom.load_state_dict(torch.load('models/custom_unet.pt', map_location=self.device))
-        pretrained = get_pretrained_unet().to(self.device).eval()
-        pretrained.load_state_dict(torch.load('models/pretrained_unet.pt', map_location=self.device))
-        self.models = {
-            'Custom': custom,
-            'Pretrained': pretrained
-        }
     def predict(self, img_file: str, model_name: str) -> Tuple[str, List[Tuple[np.ndarray, str]]]:
-        image = image=np.asarray(Image.open(img_file))
         h,w = image.shape[:-1]
-        image = torch.from_numpy(val_transform(image=image)['image']).float().permute(2,0,1) / 255.
         with torch.inference_mode():
             prediction = self.models[model_name](image.to(self.device).unsqueeze(0))[0].sigmoid().round().cpu()
         mask = resize(img=prediction, size=(h,w), interpolation=transforms.InterpolationMode.NEAREST)[0].numpy()
         return img_file, [(mask, 'person')]
     def launch(self):
-        examples_list = [['examples/' + example] for example in os.listdir('examples')]
         demo = gr.Interface(
             fn=self.predict,
             inputs=[
                 gr.Image(type='filepath', label='Input image to segment'),
-                gr.Radio(choices=('Custom', 'Pretrained'), label='Available models')
             ],
             outputs=gr.AnnotatedImage(label='Model predictions'),
-            examples=examples_list,
             cache_examples=False,
             title='Person Segmentation',
             description=f'This model performs segmentation on people in images. A Unet neural network architecture is used. \
                 The dataset can be found [here](https://github.com/VikramShenoy97/Human-Segmentation-Dataset) \
-                and the source code is on [GitHub](https://github.com/i4ata/UnetSegmentation).',
         )
         demo.launch()

 import gradio as gr
 from PIL import Image
 import torch
 import numpy as np
 import torchvision.transforms as transforms
 from torchvision.transforms.functional import resize
+import albumentations as A
+from segmentation_models_pytorch import Unet
 from typing import Tuple, List
+import os
+from glob import glob
 from custom_unet import CustomUnet
 class GradioApp:
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        custom = CustomUnet(in_channels=3, depth=3, start_channels=16).to(self.device).eval()
+        custom.load_state_dict(torch.load(os.path.join('models', 'custom_unet.pt'), map_location=self.device, weights_only=False))
+        pretrained = Unet(encoder_name='timm-efficientnet-b0', in_channels=3, encoder_depth=5, classes=1).to(self.device).eval()
+        pretrained.load_state_dict(torch.load(os.path.join('models', 'pretrained_unet.pt'), map_location=self.device, weights_only=False))
+        self.models = {'Custom': custom, 'Pretrained': pretrained}
+        self.transform = A.Compose(transforms=[A.Resize(320, 320)])
     def predict(self, img_file: str, model_name: str) -> Tuple[str, List[Tuple[np.ndarray, str]]]:
+        image = np.asarray(Image.open(img_file))
         h,w = image.shape[:-1]
+        image = torch.from_numpy(self.transform(image=image)['image']).float().permute(2,0,1) / 255.
         with torch.inference_mode():
             prediction = self.models[model_name](image.to(self.device).unsqueeze(0))[0].sigmoid().round().cpu()
         mask = resize(img=prediction, size=(h,w), interpolation=transforms.InterpolationMode.NEAREST)[0].numpy()
         return img_file, [(mask, 'person')]
     def launch(self):
         demo = gr.Interface(
             fn=self.predict,
             inputs=[
                 gr.Image(type='filepath', label='Input image to segment'),
+                gr.Radio(choices=('Custom', 'Pretrained'), label='Available models', value='Custom')
             ],
             outputs=gr.AnnotatedImage(label='Model predictions'),
+            examples=[[example_path] for example_path in glob('examples/*.jpg')],
             cache_examples=False,
             title='Person Segmentation',
             description=f'This model performs segmentation on people in images. A Unet neural network architecture is used. \
                 The dataset can be found [here](https://github.com/VikramShenoy97/Human-Segmentation-Dataset) \
+                and the source code is on [GitHub](https://github.com/i4ata/UnetSegmentation).'
         )
         demo.launch()

custom_unet.py CHANGED Viewed

@@ -1,18 +1,10 @@
-"""This python module impements the Unet architecture as defined in https://arxiv.org/pdf/1505.04597.
-Only, I use padded convolutions. That way, there is no need for center cropping and the output mask
-is the same shape as the input image.
-Additional things: https://towardsdatascience.com/understanding-u-net-61276b10f360
-"""
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class DoubleConv(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding='same')
         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding='same')
@@ -21,45 +13,36 @@ class DoubleConv(nn.Module):
         return F.relu(self.conv2(F.relu(self.conv1(x))))
 class Up(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.upconv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=2, stride=2)
         self.conv = DoubleConv(in_channels=in_channels, out_channels=out_channels)
     def forward(self, x_left: torch.Tensor, x_right: torch.Tensor) -> torch.Tensor:
         return self.conv(torch.cat((x_left, self.upconv(x_right)), dim=1))
 class CustomUnet(nn.Module):
-    def __init__(self, in_channels: int = 3, depth: int = 3, start_channels: int = 16) -> None:
         super().__init__()
         self.input_conv = DoubleConv(in_channels, start_channels)
         self.encoder_layers = nn.ModuleList()
         for i in range(depth):
             self.encoder_layers.append(DoubleConv(start_channels, start_channels * 2))
             start_channels *= 2
         self.decoder_layers = nn.ModuleList()
         for i in range(depth):
             self.decoder_layers.append(Up(start_channels, start_channels // 2))
             start_channels //= 2
         self.output_conv = nn.Conv2d(start_channels, 1, kernel_size=1)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.input_conv(x)
         xs = [x]
         for encoding_layer in self.encoder_layers:
             x = encoding_layer(F.max_pool2d(x, 2))
-            xs.append(x)
-        for decoding_layer, x_left in zip(self.decoder_layers, reversed(xs[:-1])):
             x = decoding_layer(x_left, x)
         return self.output_conv(x)

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class DoubleConv(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding='same')
         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding='same')
         return F.relu(self.conv2(F.relu(self.conv1(x))))
 class Up(nn.Module):
     def __init__(self, in_channels: int, out_channels: int) -> None:
         super().__init__()
         self.upconv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=2, stride=2)
         self.conv = DoubleConv(in_channels=in_channels, out_channels=out_channels)
     def forward(self, x_left: torch.Tensor, x_right: torch.Tensor) -> torch.Tensor:
         return self.conv(torch.cat((x_left, self.upconv(x_right)), dim=1))
 class CustomUnet(nn.Module):
+    def __init__(self, in_channels: int, depth: int, start_channels: int) -> None:
         super().__init__()
         self.input_conv = DoubleConv(in_channels, start_channels)
         self.encoder_layers = nn.ModuleList()
         for i in range(depth):
             self.encoder_layers.append(DoubleConv(start_channels, start_channels * 2))
             start_channels *= 2
         self.decoder_layers = nn.ModuleList()
         for i in range(depth):
             self.decoder_layers.append(Up(start_channels, start_channels // 2))
             start_channels //= 2
         self.output_conv = nn.Conv2d(start_channels, 1, kernel_size=1)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.input_conv(x)
         xs = [x]
         for encoding_layer in self.encoder_layers:
             x = encoding_layer(F.max_pool2d(x, 2))
+            xs.append(x)
+        for decoding_layer, x_left in zip(self.decoder_layers, reversed(xs[:-1]), strict=True):
             x = decoding_layer(x_left, x)
         return self.output_conv(x)

utils.py DELETED Viewed

@@ -1,20 +0,0 @@
-import albumentations as A
-from segmentation_models_pytorch import Unet
-val_transform = A.Compose(
-    transforms=[
-        A.Resize(320, 320)
-    ],
-    is_check_shapes=False
-)
-def get_pretrained_unet() -> Unet:
-    unet = Unet(
-        encoder_name='timm-efficientnet-b0',
-        encoder_weights='imagenet',
-        in_channels=3,
-        encoder_depth=5,
-        classes=1,
-        activation=None
-    )
-    return unet