Spaces:

KuunVoKL
/

SuperResolutionApp

Runtime error

App Files Files Community

KuunVo commited on Jun 29, 2024

Commit

b77fd1a

1 Parent(s): ea2173c

First Commit

Browse files

Files changed (11) hide show

.gitignore +4 -0
main.py +14 -0
models/base_model.py +253 -0
pretrained/SRUNET_scale_x2.pt +3 -0
pretrained/SRUNET_scale_x234.pt +3 -0
pretrained/SRUNET_scale_x3.pt +3 -0
pretrained/SRUNET_scale_x4.pt +3 -0
requirement.txt +3 -0
ui/enhancer_ui.py +2 -0
ui/upscaler_ui.py +83 -0
utils.py +75 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/*
+temp/*
+models/__pycache__/*
+ui/__pycache__/*

main.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+from ui import upscaler_ui, enhancer_ui
+st.set_page_config(layout="wide")
+# st.title("Image Upscaler and Enhancer")
+tab1, tab2 = st.tabs(["Upscaler", "Enhancer"])
+with tab1:
+    upscaler_ui.ui()
+with tab2:
+    enhancer_ui.ui()

models/base_model.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class AttentionBlock(nn.Module):
+    def __init__(self, in_ch):
+        super().__init__()
+        self.group_norm = nn.GroupNorm(32, in_ch)
+        self.proj_q = nn.Conv2d(in_ch, in_ch, 1, stride=1, padding=0)
+        self.proj_k = nn.Conv2d(in_ch, in_ch, 1, stride=1, padding=0)
+        self.proj_v = nn.Conv2d(in_ch, in_ch, 1, stride=1, padding=0)
+        self.proj = nn.Conv2d(in_ch, in_ch, 1, stride=1, padding=0)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        h = self.group_norm(x)
+        q = self.proj_q(h)
+        k = self.proj_k(h)
+        v = self.proj_v(h)
+        q = q.permute(0, 2, 3, 1).view(B, H * W, C)
+        k = k.view(B, C, H * W)
+        w = torch.bmm(q, k) * (int(C) ** (-0.5))
+        w = F.softmax(w, dim=-1)
+        v = v.permute(0, 2, 3, 1).view(B, H * W, C)
+        h = torch.bmm(w, v)
+        assert list(h.shape) == [B, H * W, C]
+        h = h.view(B, H, W, C).permute(0, 3, 1, 2)
+        h = self.proj(h)
+        return x + h
+class ResidualBlock(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 dropout: float,
+                 n_groups: int = 32,
+                 has_attn: bool = False):
+        super().__init__()
+        self.norm1 = nn.GroupNorm(n_groups, in_channels)
+        self.act1 = nn.SiLU()
+        self.conv1 = nn.Conv2d(in_channels, out_channels,
+                               kernel_size=(3, 3), padding=(1, 1))
+        self.norm2 = nn.GroupNorm(n_groups, out_channels)
+        self.act2 = nn.SiLU()
+        self.conv2 = nn.Conv2d(out_channels, out_channels,
+                               kernel_size=(3, 3), padding=(1, 1))
+        if in_channels != out_channels:
+            self.shortcut = nn.Conv2d(
+                in_channels, out_channels, kernel_size=(1, 1))
+        else:
+            self.shortcut = nn.Identity()
+        if has_attn:
+            self.attn = AttentionBlock(out_channels)
+        else:
+            self.attn = nn.Identity()
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x: torch.Tensor):
+        h = self.conv1(self.act1(self.norm1(x)))
+        h = self.conv2(self.dropout(self.act2(self.norm2(h))))
+        return self.attn(h + self.shortcut(x))
+class DownBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int, has_attn: bool, dropout: int):
+        super().__init__()
+        self.res = ResidualBlock(
+            in_channels, out_channels, dropout=dropout, has_attn=has_attn)
+    def forward(self, x: torch.Tensor):
+        return self.res(x)
+class UpBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int, has_attn: bool, dropout: int):
+        super().__init__()
+        self.res = ResidualBlock(
+            in_channels, out_channels, dropout=dropout, has_attn=has_attn)
+    def forward(self, x: torch.Tensor):
+        return self.res(x)
+class MiddleBlock(nn.Module):
+    def __init__(self, n_channels: int, dropout: int):
+        super().__init__()
+        self.res1 = ResidualBlock(
+            n_channels, n_channels, dropout=dropout, has_attn=True)
+        self.res2 = ResidualBlock(n_channels, n_channels, dropout=dropout)
+    def forward(self, x: torch.Tensor):
+        x = self.res1(x)
+        x = self.res2(x)
+        return x
+class Downsample(nn.Module):
+    def __init__(self, n_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(n_channels, n_channels,
+                              kernel_size=3, stride=2, padding=1)
+    def forward(self, x: torch.Tensor):
+        return self.conv(x)
+class Upsample(nn.Module):
+    def __init__(self, n_channels):
+        super().__init__()
+        self.convT = nn.ConvTranspose2d(
+            n_channels, n_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
+        self.conv = nn.Conv2d(n_channels, n_channels,
+                              kernel_size=3, stride=1, padding=1)
+    def forward(self, x: torch.Tensor):
+        # Bx, Cx, Hx, Wx = x.size()
+        # x = F.interpolate(x, size=(2*Hx, 2*Wx), mode='bicubic', align_corners=False)
+        return self.conv(self.convT(x))
+class MeanShift(nn.Conv2d):
+    def __init__(
+            self, rgb_range,
+            rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):
+        super(MeanShift, self).__init__(3, 3, kernel_size=1)
+        std = torch.Tensor(rgb_std)
+        self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
+        self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
+        for p in self.parameters():
+            p.requires_grad = False
+class UNET(nn.Module):
+    def __init__(self,
+                 in_channels: int = 3,
+                 out_channels: int = 3,
+                 n_features: int = 64,
+                 dropout: int = 0.1,
+                 block_out_channels=[64, 128, 128, 256],
+                 layers_per_block=4,
+                 is_attn_layers=(False, False, True, False),
+                 ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.n_features = n_features
+        self.dropout = dropout
+        self.block_out_channels = block_out_channels
+        self.layers_per_block = layers_per_block
+        self.is_attn_layers = is_attn_layers
+        self.sub_mean = MeanShift(255)
+        self.add_mean = MeanShift(255, sign=1)
+        self.shallow_feature_extraction = nn.Conv2d(
+            in_channels, n_features, kernel_size=3, padding=1)
+        self.image_rescontruction = nn.Conv2d(
+            n_features, in_channels, kernel_size=3, padding=1)
+        self.left_model = self.left_unet()
+        self.middle_model = MiddleBlock(
+            block_out_channels[-1], dropout=self.dropout)
+        self.right_model = self.right_unet()
+    def left_unet(self):
+        left_model = []
+        in_channel = out_channel = self.n_features
+        for i in range(len(self.block_out_channels)):
+            out_channel = self.block_out_channels[i]
+            down_block = [DownBlock(in_channel, out_channel, dropout=self.dropout, has_attn=self.is_attn_layers[i])] \
+                + [DownBlock(out_channel, out_channel, dropout=self.dropout,
+                             has_attn=self.is_attn_layers[i])] * (self.layers_per_block - 1)
+            in_channel = out_channel
+            left_model.append(nn.Sequential(*down_block))
+            if i < len(self.block_out_channels):
+                left_model.append(Downsample(out_channel))
+        return nn.ModuleList(left_model)
+    def right_unet(self):
+        right_unet = []
+        in_channel = out_channel = self.block_out_channels[-1]
+        for i in reversed(range(len(self.block_out_channels))):
+            out_channel = self.block_out_channels[i]
+            up_block = [UpBlock(in_channel, out_channel, dropout=self.dropout, has_attn=self.is_attn_layers[i - 1])] \
+                + [UpBlock(out_channel, out_channel, dropout=self.dropout, has_attn=self.is_attn_layers[i - 1])
+                   ] * (self.layers_per_block - 1)
+            in_channel = out_channel * 2
+            right_unet.append(nn.Sequential(*up_block))
+            right_unet.append(Upsample(out_channel))
+        in_channel, out_channel = self.block_out_channels[0] * \
+            2, self.n_features
+        up_block = [UpBlock(in_channel, out_channel, dropout=self.dropout, has_attn=self.is_attn_layers[0])] \
+            + [UpBlock(out_channel, out_channel, dropout=self.dropout, has_attn=self.is_attn_layers[0])
+               ] * (self.layers_per_block - 1)
+        right_unet.append(nn.Sequential(*up_block))
+        return nn.ModuleList(right_unet)
+    def forward(self, x):
+        x = x * 255
+        x = self.sub_mean(x)
+        feature_maps = self.shallow_feature_extraction(x)
+        feature_x = [feature_maps]
+        # print(feature_maps.shape)
+        feature_block = feature_maps
+        for block in self.left_model:
+            feature_block = block(feature_block)
+            if not isinstance(block, Downsample):
+                # print(feature_block.shape)
+                feature_x.append(feature_block)
+        bottleneck = self.middle_model(feature_block)
+        feature_x.reverse()
+        # print('Middle::: ', feature_maps.shape)
+        recover = bottleneck
+        d = 0
+        for block in self.right_model:
+            if isinstance(block, Upsample):
+                # print('UP-CAT::: ', recover.shape)
+                recover = block(recover)
+                # print('UP-CAT-END::: ', recover.shape, feature_x[d].shape)
+                recover = torch.cat([recover, feature_x[d]], 1)
+                # print('UP-CAT-END::: ', recover.shape, feature_x[d].shape)
+                d += 1
+            else:
+                recover = block(recover)
+                # print('UP-RES::: ', recover.shape)
+        recover = self.image_rescontruction(recover)
+        recover = self.add_mean(recover) / 255
+        return recover

pretrained/SRUNET_scale_x2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5868beb1c314915e26a20aa21d6a3b4583c1bd30f9a761998e5bce1deea67f40
+size 56710930

pretrained/SRUNET_scale_x234.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ced8f065681ddf24272fbfce70ae55b0c3c3afe601bdeaf6012ed01f4ddb907
+size 56711450

pretrained/SRUNET_scale_x3.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cb821b0b33d54dc092b18b7a03fbcf889213c02230a70459bd6d2b2d5877acb
+size 56710930

pretrained/SRUNET_scale_x4.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aee39dc4142754c6f883e417cda5e68c0a56de0bfe81d20619496bbdd44fc6ae
+size 56710930

requirement.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+torchvision
+torchaudio

ui/enhancer_ui.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def ui():
2	+ pass

ui/upscaler_ui.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import streamlit as st
+from PIL import Image
+import requests
+from io import BytesIO
+from streamlit_image_comparison import image_comparison
+from utils import upscale_image
+def ui():
+    image = None
+    input_text = None
+    uploaded_file = None
+    input_area = st.columns([2, 1, 1])
+    with input_area[0]:
+        option = st.selectbox(
+            "How do you want to provide the image?",
+            ("Fetch from URL", "Upload from local machine")
+        )
+    with input_area[2]:
+        option_scale = st.selectbox(
+            "Which factors do you want to upscale?",
+            (2, 3, 4)
+        )
+    with input_area[1]:
+        # , 'SRUNET_interpolation', 'SRUNET_x234_interpolation'
+        option_model = st.selectbox(
+            "Which model do you want to use?",
+            ('SRUNET_x2', 'SRUNET_x3', 'SRUNET_x4', 'SRUNET_x234')
+        )
+    picture_url_area = st.columns([2, 2], vertical_alignment="top")
+    with picture_url_area[0]:
+        if option == "Upload from local machine":
+            uploaded_file = st.file_uploader(
+                "Choose an image...", type=["jpg", "jpeg", "png"])
+        elif option == "Fetch from URL":
+            input_text = st.text_input("Enter the image URL")
+        if st.button("Submit"):
+            if option == "Upload from local machine" and uploaded_file is not None:
+                try:
+                    image = Image.open(uploaded_file)
+                    # st.image(image, caption="Uploaded Image", use_column_width=True)
+                except Exception as e:
+                    st.error(f"Error opening image: {e}")
+            elif option == "Fetch from URL" and input_text:
+                try:
+                    response = requests.get(input_text)
+                    response.raise_for_status()
+                    image = Image.open(BytesIO(response.content))
+                    # st.image(image, caption="Image from URL", use_column_width=True)
+                except requests.exceptions.RequestException as e:
+                    st.error(f"Error fetching image: {e}")
+            if image:
+                width, height = image.size
+                if width * int(option_scale) > 1000 or height * int(option_scale) > 1000:
+                    st.error(
+                        "Unable to upscale. The size of upscaled image should be less than 1000x1000")
+                    image = None
+                    # pass
+    if image:
+        st.header('Results')
+        # image_resize = image.resize((1024, 1024))
+        width, height = image.size
+        picture_url_area[1].text(
+            f"Image size: {width}x{height} --> {width*int(option_scale)}x{height*int(option_scale)}")
+        picture_url_area[1].image(
+            image, caption="Original", use_column_width=True)
+        img_1 = image.resize(
+            (width*int(option_scale), height*int(option_scale)), Image.BICUBIC)
+        img_2 = upscale_image(image, option_model, int(option_scale))
+        image_comparison(
+            img1=img_1,
+            img2=img_2,
+        )

utils.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import torch
+import torch.nn.functional as F
+import torchvision.transforms.v2 as transforms
+from models.base_model import UNET
+def find_padding(img, depth=2**4):
+    B, C, H, W = img.shape
+    h_pad = (depth - H % depth) % depth
+    w_pad = (depth - W % depth) % depth
+    return h_pad, w_pad
+def get_pretrained_path(model_name):
+    # 'SRUNET_x2', 'SRUNET_x3', 'SRUNET_x4', 'SRUNET_x234', 'SRUNET_interpolation', 'SRUNET_x234_interpolation'
+    current_path = os.path.dirname(os.path.abspath(__file__)).replace("\\", "/")
+    if model_name == 'SRUNET_x2':
+        return current_path + '/pretrained/SRUNET_scale_x2.pt'
+    elif model_name == 'SRUNET_x3':
+        return current_path + '/pretrained/SRUNET_scale_x3.pt'
+    elif model_name == 'SRUNET_x4':
+        return current_path + '/pretrained/SRUNET_scale_x4.pt'
+    elif model_name == 'SRUNET_x234':
+        return current_path + '/pretrained/SRUNET_scale_x234.pt'
+    # elif model_name == 'SRUNET_interpolation':
+    #     return current_path + '/pretrained/SRUNET_x3.pt'
+    # elif model_name == 'SRUNET_x234_interpolation':
+    #     return current_path + '/pretrained/SRUNET_x3.pt'
+    else:
+        raise Exception('Model not found')
+def upscale_image(img, model_name, scale_factor):
+    # get img width height
+    width, height = img.size
+    img_mode = img.mode
+    if img.mode != "RGB":
+        img = img.convert("RGB")
+    transform = transforms.Compose([
+        transforms.Resize((height * scale_factor, width * scale_factor),
+                          interpolation=transforms.InterpolationMode.BICUBIC),
+        transforms.ToImage(),
+        transforms.ToDtype(torch.float32, scale=True),
+    ])
+    #Load Model
+    checkpoint = torch.load(get_pretrained_path(
+        model_name), map_location=torch.device('cpu'))
+    model = UNET()
+    model.load_state_dict(checkpoint['best_model_state_dict'])
+    model.eval()
+    data = transform(img).clamp(0, 1).unsqueeze(0)
+    # print(data.shape, img.mode)
+    # return img
+    h_pad, w_pad = find_padding(data)
+    data = F.pad(data, (0, w_pad, 0, h_pad), mode='reflect')
+    with torch.no_grad():
+        img_scale_pred = model(data).clamp(0, 1)
+        if h_pad > 0 and w_pad > 0:
+            img_scale_pred = img_scale_pred[..., :-h_pad, :-w_pad]
+        elif h_pad > 0:
+            img_scale_pred = img_scale_pred[..., :-h_pad, :]
+        elif w_pad > 0:
+            img_scale_pred = img_scale_pred[..., :, :-w_pad]
+        else:
+            img_scale_pred = img_scale_pred
+        img_scale_pred = img_scale_pred.squeeze(0)
+    return transforms.ToPILImage()(img_scale_pred).convert(img_mode)