Spaces:

jiachenjiang
/

Cat-AIR

Paused

App Files Files Community

jiachen commited on Aug 6, 2024

Commit

0e9d4e8

1 Parent(s): 42558c6

promptxrestormer

Browse files

Files changed (9) hide show

__pycache__/app.cpython-38.pyc +0 -0
app.py +113 -4
ckpt/promptxrestormer_epoch=64-step=578630.ckpt +3 -0
flagged/log.csv +3 -0
net/__pycache__/prompt_xrestormer.cpython-38.pyc +0 -0
net/prompt_xrestormer.py +559 -0
output.png +0 -0
test.py +133 -0
test_images/rain-070.png +0 -0

__pycache__/app.cpython-38.pyc ADDED Viewed

Binary file (361 Bytes). View file

app.py CHANGED Viewed

@@ -1,7 +1,116 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import numpy as np
 import gradio as gr
+import numpy as np
+import torch
+from PIL import Image
+from torchvision.transforms import ToTensor
+from net.prompt_xrestormer import PromptXRestormer
+import lightning.pytorch as pl
+# crop an image to the multiple of base
+def crop_img(image, base=64):
+    h = image.shape[0]
+    w = image.shape[1]
+    crop_h = h % base
+    crop_w = w % base
+    return image[crop_h // 2:h - crop_h + crop_h // 2, crop_w // 2:w - crop_w + crop_w // 2, :]
+class PromptXRestormerIRModel(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.net = PromptXRestormer(
+                    inp_channels=3,
+                    out_channels=3,
+                    dim = 48,
+                    num_blocks = [2,4,4,4],
+                    num_refinement_blocks = 4,
+                    channel_heads= [1,1,1,1],
+                    spatial_heads= [1,2,4,8],
+                    overlap_ratio= [0.5, 0.5, 0.5, 0.5],
+                    ffn_expansion_factor = 2.66,
+                    bias = False,
+                    LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+                    dual_pixel_task = False,        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+                    scale = 1,prompt = True
+                    )
+    def forward(self,x):
+        return self.net(x)
+def np_to_pil(img_np):
+    """
+    Converts image in np.array format to PIL image.
+    From C x W x H [0..1] to  W x H x C [0...255]
+    :param img_np:
+    :return:
+    """
+    ar = np.clip(img_np * 255, 0, 255).astype(np.uint8)
+    if img_np.shape[0] == 1:
+        ar = ar[0]
+    else:
+        assert img_np.shape[0] == 3, img_np.shape
+        ar = ar.transpose(1, 2, 0)
+    return Image.fromarray(ar)
+def torch_to_np(img_var):
+    """
+    Converts an image in torch.Tensor format to np.array.
+    From 1 x C x W x H [0..1] to  C x W x H [0..1]
+    :param img_var:
+    :return:
+    """
+    return img_var.detach().cpu().numpy()[0]
+def restore_image(input_img):
+    np.random.seed(0)
+    torch.manual_seed(0)
+    torch.cuda.set_device(0)
+    ckpt_path = "/home/jiachen/MyGradio/ckpt/promptxrestormer_epoch=64-step=578630.ckpt"
+    print("CKPT name : {}".format(ckpt_path))
+    net  = PromptXRestormerIRModel().load_from_checkpoint(ckpt_path).cuda()
+    net.eval()
+    #degraded_path = "/home/jiachen/MyGradio/test_images/rain-070.png"
+    degraded_img = crop_img(input_img.convert('RGB'), base=16)
+    toTensor = ToTensor()
+    degraded_img = toTensor(degraded_img)
+    print(degraded_img.shape)
+    with torch.no_grad():
+        degraded_img = degraded_img.unsqueeze(0).cuda()
+        _, _, H_old, W_old = degraded_img.shape
+        h_pad = (H_old // 64 + 1) * 64 - H_old
+        w_pad = (W_old // 64 + 1) * 64 - W_old
+        degrad_img = torch.cat([degraded_img, torch.flip(degraded_img, [2])], 2)[:,:,:H_old+h_pad,:]
+        degrad_img = torch.cat([degraded_img, torch.flip(degraded_img, [3])], 3)[:,:,:,:W_old+w_pad]
+        print(degrad_img.shape)
+        restored = net(degrad_img)
+        restored = restored[:,:,:H_old:,:W_old]
+        restored_image = torch_to_np(restored)
+    return restored_image
+demo = gr.Interface(restore_image, gr.Image(), "image")
+demo.launch()

ckpt/promptxrestormer_epoch=64-step=578630.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31eeeab21dace516dec55e5d51e97f4ce30c0fcce86ce36b729ca480175e23c7
+size 424348801

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+name,output,flag,username,timestamp
+jiachen  fdsf,Hello jiachen  fdsf!!,,,2024-08-06 11:38:35.977480
+jiachen  fdsf,Hello jiachen  fdsf!!,,,2024-08-06 11:38:38.296024

net/__pycache__/prompt_xrestormer.cpython-38.pyc ADDED Viewed

Binary file (19.1 kB). View file

net/prompt_xrestormer.py ADDED Viewed

	@@ -0,0 +1,559 @@

+import torch
+import torch.nn as nn
+from torch import einsum
+import torch.nn.functional as F
+from pdb import set_trace as stx
+import numbers
+from einops import rearrange
+import math
+def to(x):
+    return {'device': x.device, 'dtype': x.dtype}
+def pair(x):
+    return (x, x) if not isinstance(x, tuple) else x
+def expand_dim(t, dim, k):
+    t = t.unsqueeze(dim = dim)
+    expand_shape = [-1] * len(t.shape)
+    expand_shape[dim] = k
+    return t.expand(*expand_shape)
+def rel_to_abs(x):
+    b, l, m = x.shape
+    r = (m + 1) // 2
+    col_pad = torch.zeros((b, l, 1), **to(x))
+    x = torch.cat((x, col_pad), dim = 2)
+    flat_x = rearrange(x, 'b l c -> b (l c)')
+    flat_pad = torch.zeros((b, m - l), **to(x))
+    flat_x_padded = torch.cat((flat_x, flat_pad), dim = 1)
+    final_x = flat_x_padded.reshape(b, l + 1, m)
+    final_x = final_x[:, :l, -r:]
+    return final_x
+def relative_logits_1d(q, rel_k):
+    b, h, w, _ = q.shape
+    r = (rel_k.shape[0] + 1) // 2
+    logits = einsum('b x y d, r d -> b x y r', q, rel_k)
+    logits = rearrange(logits, 'b x y r -> (b x) y r')
+    logits = rel_to_abs(logits)
+    logits = logits.reshape(b, h, w, r)
+    logits = expand_dim(logits, dim = 2, k = r)
+    return logits
+class RelPosEmb(nn.Module):
+    def __init__(
+        self,
+        block_size,
+        rel_size,
+        dim_head
+    ):
+        super().__init__()
+        height = width = rel_size
+        scale = dim_head ** -0.5
+        self.block_size = block_size
+        self.rel_height = nn.Parameter(torch.randn(height * 2 - 1, dim_head) * scale)
+        self.rel_width = nn.Parameter(torch.randn(width * 2 - 1, dim_head) * scale)
+    def forward(self, q):
+        block = self.block_size
+        q = rearrange(q, 'b (x y) c -> b x y c', x = block)
+        rel_logits_w = relative_logits_1d(q, self.rel_width)
+        rel_logits_w = rearrange(rel_logits_w, 'b x i y j-> b (x y) (i j)')
+        q = rearrange(q, 'b x y d -> b y x d')
+        rel_logits_h = relative_logits_1d(q, self.rel_height)
+        rel_logits_h = rearrange(rel_logits_h, 'b x i y j -> b (y x) (j i)')
+        return rel_logits_w + rel_logits_h
+##########################################################################
+## Layer Norm
+def to_3d(x):
+    return rearrange(x, 'b c h w -> b (h w) c')
+def to_4d(x,h,w):
+    return rearrange(x, 'b (h w) c -> b c h w',h=h,w=w)
+class BiasFree_LayerNorm(nn.Module):
+    def __init__(self, normalized_shape):
+        super(BiasFree_LayerNorm, self).__init__()
+        if isinstance(normalized_shape, numbers.Integral):
+            normalized_shape = (normalized_shape,)
+        normalized_shape = torch.Size(normalized_shape)
+        assert len(normalized_shape) == 1
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.normalized_shape = normalized_shape
+    def forward(self, x):
+        sigma = x.var(-1, keepdim=True, unbiased=False)
+        return x / torch.sqrt(sigma+1e-5) * self.weight
+class WithBias_LayerNorm(nn.Module):
+    def __init__(self, normalized_shape):
+        super(WithBias_LayerNorm, self).__init__()
+        if isinstance(normalized_shape, numbers.Integral):
+            normalized_shape = (normalized_shape,)
+        normalized_shape = torch.Size(normalized_shape)
+        assert len(normalized_shape) == 1
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.normalized_shape = normalized_shape
+    def forward(self, x):
+        mu = x.mean(-1, keepdim=True)
+        sigma = x.var(-1, keepdim=True, unbiased=False)
+        return (x - mu) / torch.sqrt(sigma+1e-5) * self.weight + self.bias
+class LayerNorm(nn.Module):
+    def __init__(self, dim, LayerNorm_type):
+        super(LayerNorm, self).__init__()
+        if LayerNorm_type =='BiasFree':
+            self.body = BiasFree_LayerNorm(dim)
+        else:
+            self.body = WithBias_LayerNorm(dim)
+    def forward(self, x):
+        h, w = x.shape[-2:]
+        return to_4d(self.body(to_3d(x)), h, w)
+##########################################################################
+## Gated-Dconv Feed-Forward Network (GDFN)
+class FeedForward(nn.Module):
+    def __init__(self, dim, ffn_expansion_factor, bias):
+        super(FeedForward, self).__init__()
+        hidden_features = int(dim*ffn_expansion_factor)
+        self.project_in = nn.Conv2d(dim, hidden_features*2, kernel_size=1, bias=bias)
+        self.dwconv = nn.Conv2d(hidden_features*2, hidden_features*2, kernel_size=3, stride=1, padding=1, groups=hidden_features*2, bias=bias)
+        self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        x = self.project_in(x)
+        x1, x2 = self.dwconv(x).chunk(2, dim=1)
+        x = F.gelu(x1) * x2
+        x = self.project_out(x)
+        return x
+##########################################################################
+## Multi-DConv Head Transposed Self-Attention (MDTA)
+class ChannelAttention(nn.Module):
+    def __init__(self, dim, num_heads, bias):
+        super(ChannelAttention, self).__init__()
+        self.num_heads = num_heads
+        self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
+        self.qkv = nn.Conv2d(dim, dim*3, kernel_size=1, bias=bias)
+        self.qkv_dwconv = nn.Conv2d(dim*3, dim*3, kernel_size=3, stride=1, padding=1, groups=dim*3, bias=bias)
+        self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        b,c,h,w = x.shape
+        qkv = self.qkv_dwconv(self.qkv(x))
+        q,k,v = qkv.chunk(3, dim=1)
+        q = rearrange(q, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        k = rearrange(k, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        v = rearrange(v, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        q = torch.nn.functional.normalize(q, dim=-1)
+        k = torch.nn.functional.normalize(k, dim=-1)
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        attn = attn.softmax(dim=-1)
+        out = (attn @ v)
+        out = rearrange(out, 'b head c (h w) -> b (head c) h w', head=self.num_heads, h=h, w=w)
+        out = self.project_out(out)
+        return out
+##########################################################################
+## Overlapping Cross-Attention (OCA)
+class OCAB(nn.Module):
+    def __init__(self, dim, window_size, overlap_ratio, num_heads, dim_head, bias):
+        super(OCAB, self).__init__()
+        self.num_spatial_heads = num_heads
+        self.dim = dim
+        self.window_size = window_size
+        self.overlap_win_size = int(window_size * overlap_ratio) + window_size
+        self.dim_head = dim_head
+        self.inner_dim = self.dim_head * self.num_spatial_heads
+        self.scale = self.dim_head**-0.5
+        self.unfold = nn.Unfold(kernel_size=(self.overlap_win_size, self.overlap_win_size), stride=window_size, padding=(self.overlap_win_size-window_size)//2)
+        self.qkv = nn.Conv2d(self.dim, self.inner_dim*3, kernel_size=1, bias=bias)
+        self.project_out = nn.Conv2d(self.inner_dim, dim, kernel_size=1, bias=bias)
+        self.rel_pos_emb = RelPosEmb(
+            block_size = window_size,
+            rel_size = window_size + (self.overlap_win_size - window_size),
+            dim_head = self.dim_head
+        )
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.qkv(x)
+        qs, ks, vs = qkv.chunk(3, dim=1)
+        # spatial attention
+        qs = rearrange(qs, 'b c (h p1) (w p2) -> (b h w) (p1 p2) c', p1 = self.window_size, p2 = self.window_size)
+        ks, vs = map(lambda t: self.unfold(t), (ks, vs))
+        ks, vs = map(lambda t: rearrange(t, 'b (c j) i -> (b i) j c', c = self.inner_dim), (ks, vs))
+        # print(f'qs.shape:{qs.shape}, ks.shape:{ks.shape}, vs.shape:{vs.shape}')
+        #split heads
+        qs, ks, vs = map(lambda t: rearrange(t, 'b n (head c) -> (b head) n c', head = self.num_spatial_heads), (qs, ks, vs))
+        # attention
+        #print(f'qs.shape:{qs.shape}, ks.shape:{ks.shape}, vs.shape:{vs.shape}')
+        qs = qs * self.scale
+        spatial_attn = (qs @ ks.transpose(-2, -1))
+        spatial_attn += self.rel_pos_emb(qs)
+        spatial_attn = spatial_attn.softmax(dim=-1)
+        out = (spatial_attn @ vs)
+        out = rearrange(out, '(b h w head) (p1 p2) c -> b (head c) (h p1) (w p2)', head = self.num_spatial_heads, h = h // self.window_size, w = w // self.window_size, p1 = self.window_size, p2 = self.window_size)
+        # merge spatial and channel
+        out = self.project_out(out)
+        return out
+##########################################################################
+class TransformerBlock(nn.Module):
+    def __init__(self, dim, window_size, overlap_ratio, num_channel_heads, num_spatial_heads, spatial_dim_head, ffn_expansion_factor, bias, LayerNorm_type):
+        super(TransformerBlock, self).__init__()
+        self.spatial_attn = OCAB(dim, window_size, overlap_ratio, num_spatial_heads, spatial_dim_head, bias)
+        self.channel_attn = ChannelAttention(dim, num_channel_heads, bias)
+        self.norm1 = LayerNorm(dim, LayerNorm_type)
+        self.norm2 = LayerNorm(dim, LayerNorm_type)
+        self.norm3 = LayerNorm(dim, LayerNorm_type)
+        self.norm4 = LayerNorm(dim, LayerNorm_type)
+        self.channel_ffn = FeedForward(dim, ffn_expansion_factor, bias)
+        self.spatial_ffn = FeedForward(dim, ffn_expansion_factor, bias)
+    def forward(self, x):
+        x = x + self.channel_attn(self.norm1(x))
+        x = x + self.channel_ffn(self.norm2(x))
+        x = x + self.spatial_attn(self.norm3(x))
+        x = x + self.spatial_ffn(self.norm4(x))
+        return x
+##########################################################################
+class ChannelTransformerBlock(nn.Module):
+    def __init__(self, dim, num_channel_heads, ffn_expansion_factor, bias, LayerNorm_type):
+        super(ChannelTransformerBlock, self).__init__()
+        self.channel_attn = ChannelAttention(dim, num_channel_heads, bias)
+        self.norm1 = LayerNorm(dim, LayerNorm_type)
+        self.norm2 = LayerNorm(dim, LayerNorm_type)
+        self.channel_ffn = FeedForward(dim, ffn_expansion_factor, bias)
+    def forward(self, x):
+        x = x + self.channel_attn(self.norm1(x))
+        x = x + self.channel_ffn(self.norm2(x))
+        return x
+##########################################################################
+## Overlapped image patch embedding with 3x3 Conv
+class OverlapPatchEmbed(nn.Module):
+    def __init__(self, in_c=3, embed_dim=48, bias=False):
+        super(OverlapPatchEmbed, self).__init__()
+        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=3, stride=1, padding=1, bias=bias)
+    def forward(self, x):
+        x = self.proj(x)
+        return x
+##########################################################################
+## Resizing modules
+class Downsample(nn.Module):
+    def __init__(self, n_feat):
+        super(Downsample, self).__init__()
+        self.body = nn.Sequential(nn.Conv2d(n_feat, n_feat//2, kernel_size=3, stride=1, padding=1, bias=False),
+                                  nn.PixelUnshuffle(2))
+    def forward(self, x):
+        return self.body(x)
+class Upsample(nn.Module):
+    def __init__(self, n_feat):
+        super(Upsample, self).__init__()
+        self.body = nn.Sequential(nn.Conv2d(n_feat, n_feat*2, kernel_size=3, stride=1, padding=1, bias=False),
+                                  nn.PixelShuffle(2))
+    def forward(self, x):
+        return self.body(x)
+class SR_Upsample(nn.Sequential):
+    """SR_Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, kernel_size = 3, stride = 1, padding = 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
+        super(SR_Upsample, self).__init__(*m)
+##---------- Prompt Module -----------------------
+class PromptBlock(nn.Module):
+    def __init__(self,  window_size, overlap_ratio, num_channel_heads, num_spatial_heads,
+                 spatial_dim_head, ffn_expansion_factor, bias, LayerNorm_type,
+                 prompt_dim=128,prompt_len=5,prompt_size = 96,lin_dim = 192,
+                ):
+        super(PromptBlock,self).__init__()
+        # prompt generation
+        self.prompt_param = nn.Parameter(torch.rand(1,prompt_len,prompt_dim,prompt_size,prompt_size))
+        self.linear_layer = nn.Linear(lin_dim,prompt_len)
+        self.conv3x3 = nn.Conv2d(prompt_dim,prompt_dim,kernel_size=3,stride=1,padding=1,bias=False)
+        # prompt interaction
+        self.attn = ChannelTransformerBlock(dim=lin_dim + prompt_dim, window_size = window_size,
+                                     overlap_ratio=overlap_ratio,  num_channel_heads=num_channel_heads,
+                                     num_spatial_heads=num_spatial_heads, spatial_dim_head = spatial_dim_head,
+                                     ffn_expansion_factor=ffn_expansion_factor, bias=bias,
+                                     LayerNorm_type=LayerNorm_type)
+        self.conv = nn.Conv2d(prompt_dim+lin_dim,lin_dim,kernel_size=3,stride=1,padding=1,bias=False)
+    def forward(self,x):
+        # input x shape is [B, HW, C]
+        B, C, H, W = x.shape
+        # prompt generation
+        emb = x.mean(dim=(-2,-1))
+        prompt_weights = F.softmax(self.linear_layer(emb),dim=1)
+        prompt = prompt_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) * self.prompt_param.unsqueeze(0).repeat(B,1,1,1,1,1).squeeze(1)
+        prompt = torch.sum(prompt,dim=1)
+        prompt = F.interpolate(prompt,(H,W),mode="bilinear", align_corners=True)
+        prompt = self.conv3x3(prompt)
+        # x shape [B, C + C_p, H, W]
+        x = torch.cat([x, prompt], 1)
+        x = self.attn(x)
+        x = self.conv(x)
+        return x
+##---------- Prompt Gen Module -----------------------
+class PromptGenBlock(nn.Module):
+    def __init__(self,prompt_dim=128,prompt_len=5,prompt_size = 96,lin_dim = 192):
+        super(PromptGenBlock,self).__init__()
+        self.prompt_param = nn.Parameter(torch.rand(1,prompt_len,prompt_dim,prompt_size,prompt_size))
+        self.linear_layer = nn.Linear(lin_dim,prompt_len)
+        self.conv3x3 = nn.Conv2d(prompt_dim,prompt_dim,kernel_size=3,stride=1,padding=1,bias=False)
+    def forward(self,x):
+        B,C,H,W = x.shape
+        emb = x.mean(dim=(-2,-1))
+        prompt_weights = F.softmax(self.linear_layer(emb),dim=1)
+        prompt = prompt_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) * self.prompt_param.unsqueeze(0).repeat(B,1,1,1,1,1).squeeze(1)
+        prompt = torch.sum(prompt,dim=1)
+        prompt = F.interpolate(prompt,(H,W),mode="bilinear")
+        prompt = self.conv3x3(prompt)
+        return prompt
+##########################################################################
+class PromptXRestormer(nn.Module):
+    def __init__(self,
+        inp_channels=3,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [4,6,6,8],
+        num_refinement_blocks = 4,
+        channel_heads = [1,2,4,8],
+        spatial_heads = [2,2,3,4],
+        overlap_ratio=[0.5, 0.5, 0.5, 0.5],
+        window_size = 8,
+        spatial_dim_head = 16,
+        bias = False,
+        ffn_expansion_factor = 2.66,
+        LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+        dual_pixel_task = False,        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+        scale = 1,
+        prompt = True
+    ):
+        super(PromptXRestormer, self).__init__()
+        print("Initializing XRestormer")
+        self.scale = scale
+        self.patch_embed = OverlapPatchEmbed(inp_channels, dim)
+        self.encoder_level1 = nn.Sequential(*[TransformerBlock(dim=dim, window_size = window_size, overlap_ratio=overlap_ratio[0],  num_channel_heads=channel_heads[0], num_spatial_heads=spatial_heads[0], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
+        self.down1_2 = Downsample(dim) ## From Level 1 to Level 2
+        self.encoder_level2 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), window_size = window_size, overlap_ratio=overlap_ratio[1],  num_channel_heads=channel_heads[1], num_spatial_heads=spatial_heads[1], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1])])
+        self.down2_3 = Downsample(int(dim*2**1)) ## From Level 2 to Level 3
+        self.encoder_level3 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**2), window_size = window_size, overlap_ratio=overlap_ratio[2],  num_channel_heads=channel_heads[2], num_spatial_heads=spatial_heads[2], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2])])
+        self.down3_4 = Downsample(int(dim*2**2)) ## From Level 3 to Level 4
+        self.latent = nn.Sequential(*[TransformerBlock(dim=int(dim*2**3), window_size = window_size, overlap_ratio=overlap_ratio[3],  num_channel_heads=channel_heads[3], num_spatial_heads=spatial_heads[3], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[3])])
+        self.up4_3 = Upsample(int(dim*2**2)) ## From Level 4 to Level 3
+        self.reduce_chan_level3 = nn.Conv2d(int(dim*2**1) + 192, int(dim*2**2), kernel_size=1, bias=bias)
+        self.decoder_level3 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**2), window_size = window_size, overlap_ratio=overlap_ratio[2],  num_channel_heads=channel_heads[2], num_spatial_heads=spatial_heads[2], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2])])
+        self.up3_2 = Upsample(int(dim*2**2)) ## From Level 3 to Level 2
+        self.reduce_chan_level2 = nn.Conv2d(int(dim*2**2), int(dim*2**1), kernel_size=1, bias=bias)
+        self.decoder_level2 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), window_size = window_size, overlap_ratio=overlap_ratio[1],  num_channel_heads=channel_heads[1], num_spatial_heads=spatial_heads[1], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1])])
+        self.up2_1 = Upsample(int(dim*2**1))  ## From Level 2 to Level 1  (NO 1x1 conv to reduce channels)
+        self.decoder_level1 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), window_size = window_size, overlap_ratio=overlap_ratio[0],  num_channel_heads=channel_heads[0], num_spatial_heads=spatial_heads[0], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
+        self.refinement = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), window_size = window_size, overlap_ratio=overlap_ratio[0],  num_channel_heads=channel_heads[0], num_spatial_heads=spatial_heads[0], spatial_dim_head = spatial_dim_head, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_refinement_blocks)])
+        self.output = nn.Conv2d(int(dim*2**1), out_channels, kernel_size=3, stride=1, padding=1, bias=bias)
+        self.prompt = prompt
+        if prompt:
+            self.prompt1 = PromptGenBlock(prompt_dim=64,prompt_len=5,prompt_size = 64,lin_dim = 96)
+            self.prompt2 = PromptGenBlock(prompt_dim=128,prompt_len=5,prompt_size = 32,lin_dim = 192)
+            self.prompt3 = PromptGenBlock(prompt_dim=320,prompt_len=5,prompt_size = 16,lin_dim = 384)
+            self.noise_level1 = ChannelTransformerBlock(dim=int(dim*2**1)+64, num_channel_heads = 1, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type)
+            self.reduce_noise_level1 = nn.Conv2d(int(dim*2**1)+64,int(dim*2**1),kernel_size=1,bias=bias)
+            self.noise_level2 = ChannelTransformerBlock(dim=int(dim*2**1) + 224,  num_channel_heads = 1, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type)
+            self.reduce_noise_level2 = nn.Conv2d(int(dim*2**1)+224,int(dim*2**2),kernel_size=1,bias=bias)
+            self.noise_level3 = ChannelTransformerBlock(dim=int(dim*2**2) + 512,  num_channel_heads = 1, ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type)
+            self.reduce_noise_level3 = nn.Conv2d(int(dim*2**2)+512,int(dim*2**2),kernel_size=1,bias=bias)
+    def forward(self, inp_img):
+        if self.scale > 1:
+            inp_img = F.interpolate(inp_img, scale_factor=self.scale, mode='bilinear', align_corners=False)
+        inp_enc_level1 = self.patch_embed(inp_img)
+        out_enc_level1 = self.encoder_level1(inp_enc_level1)
+        inp_enc_level2 = self.down1_2(out_enc_level1)
+        out_enc_level2 = self.encoder_level2(inp_enc_level2)
+        inp_enc_level3 = self.down2_3(out_enc_level2)
+        out_enc_level3 = self.encoder_level3(inp_enc_level3)
+        inp_enc_level4 = self.down3_4(out_enc_level3)
+        latent = self.latent(inp_enc_level4)
+        #print(latent.shape)
+        if self.prompt:
+            dec3_param = self.prompt3(latent)
+            latent = torch.cat([latent, dec3_param], 1)
+            latent = self.noise_level3(latent)
+            latent = self.reduce_noise_level3(latent)
+        #print(latent.shape)
+        inp_dec_level3 = self.up4_3(latent)
+        inp_dec_level3 = torch.cat([inp_dec_level3, out_enc_level3], 1)
+        inp_dec_level3 = self.reduce_chan_level3(inp_dec_level3)
+        out_dec_level3 = self.decoder_level3(inp_dec_level3)
+        if self.prompt:
+            dec2_param = self.prompt2(out_dec_level3)
+            out_dec_level3 = torch.cat([out_dec_level3, dec2_param], 1)
+            out_dec_level3 = self.noise_level2(out_dec_level3)
+            out_dec_level3 = self.reduce_noise_level2(out_dec_level3)
+        inp_dec_level2 = self.up3_2(out_dec_level3)
+        inp_dec_level2 = torch.cat([inp_dec_level2, out_enc_level2], 1)
+        inp_dec_level2 = self.reduce_chan_level2(inp_dec_level2)
+        out_dec_level2 = self.decoder_level2(inp_dec_level2)
+        if self.prompt:
+            dec1_param = self.prompt1(out_dec_level2)
+            out_dec_level2 = torch.cat([out_dec_level2, dec1_param], 1)
+            out_dec_level2 = self.noise_level1(out_dec_level2)
+            out_dec_level2 = self.reduce_noise_level1(out_dec_level2)
+        inp_dec_level1 = self.up2_1(out_dec_level2)
+        inp_dec_level1 = torch.cat([inp_dec_level1, out_enc_level1], 1)
+        out_dec_level1 = self.decoder_level1(inp_dec_level1)
+        out_dec_level1 = self.refinement(out_dec_level1)
+        out_dec_level1 = self.output(out_dec_level1) + inp_img
+        return out_dec_level1
+if __name__ == "__main__":
+    model = PromptXRestormer(
+        inp_channels=3,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [2,4,4,4],
+        num_refinement_blocks = 4,
+        channel_heads= [1,1,1,1],
+        spatial_heads= [1,2,4,8],
+        overlap_ratio= [0.5, 0.5, 0.5, 0.5],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+        dual_pixel_task = False,        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+        scale = 1,prompt = True
+        )
+    # torchstat
+    x = torch.randn(1, 3, 320, 512)
+    y = model(x)
+    print(y.shape)
+    # print('# model_restoration parameters: %.2f M'%(sum(param.numel() for param in model.parameters())/ 1e6))
+    # # stat(model, (3, 512, 512))
+    # from fvcore.nn import FlopCountAnalysis, flop_count_table
+    # input = torch.randn(1,3,64,64)
+    # flops = FlopCountAnalysis(model, input)
+    # print(flop_count_table(flops))
+    # print(flops.total()/1e9)

output.png ADDED Viewed

test.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import argparse
+import subprocess
+from tqdm import tqdm
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+import os
+import torch.nn as nn
+# from utils.dataset_utils import DenoiseTestDataset, DerainDehazeDataset
+# from utils.val_utils import AverageMeter, compute_psnr_ssim
+# from utils.image_io import save_image_tensor
+from PIL import Image
+from torchvision.transforms import ToTensor
+import lightning.pytorch as pl
+import torch.nn.functional as F
+from net.prompt_xrestormer import PromptXRestormer
+import json
+# crop an image to the multiple of base
+def crop_img(image, base=64):
+    h = image.shape[0]
+    w = image.shape[1]
+    crop_h = h % base
+    crop_w = w % base
+    return image[crop_h // 2:h - crop_h + crop_h // 2, crop_w // 2:w - crop_w + crop_w // 2, :]
+class PromptXRestormerIRModel(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.net = PromptXRestormer(
+                    inp_channels=3,
+                    out_channels=3,
+                    dim = 48,
+                    num_blocks = [2,4,4,4],
+                    num_refinement_blocks = 4,
+                    channel_heads= [1,1,1,1],
+                    spatial_heads= [1,2,4,8],
+                    overlap_ratio= [0.5, 0.5, 0.5, 0.5],
+                    ffn_expansion_factor = 2.66,
+                    bias = False,
+                    LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+                    dual_pixel_task = False,        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+                    scale = 1,prompt = True
+                    )
+        self.loss_fn  = nn.L1Loss()
+    def forward(self,x):
+        return self.net(x)
+def np_to_pil(img_np):
+    """
+    Converts image in np.array format to PIL image.
+    From C x W x H [0..1] to  W x H x C [0...255]
+    :param img_np:
+    :return:
+    """
+    ar = np.clip(img_np * 255, 0, 255).astype(np.uint8)
+    if img_np.shape[0] == 1:
+        ar = ar[0]
+    else:
+        assert img_np.shape[0] == 3, img_np.shape
+        ar = ar.transpose(1, 2, 0)
+    return Image.fromarray(ar)
+def torch_to_np(img_var):
+    """
+    Converts an image in torch.Tensor format to np.array.
+    From 1 x C x W x H [0..1] to  C x W x H [0..1]
+    :param img_var:
+    :return:
+    """
+    return img_var.detach().cpu().numpy()[0]
+def save_image_tensor(image_tensor, output_path="output/"):
+    image_np = torch_to_np(image_tensor)
+    # print(image_np.shape)
+    p = np_to_pil(image_np)
+    p.save(output_path)
+if __name__ == '__main__':
+    np.random.seed(0)
+    torch.manual_seed(0)
+    torch.cuda.set_device(0)
+    ckpt_path = "/home/jiachen/MyGradio/ckpt/promptxrestormer_epoch=64-step=578630.ckpt"
+    print("CKPT name : {}".format(ckpt_path))
+    net  = PromptXRestormerIRModel().load_from_checkpoint(ckpt_path).cuda()
+    net.eval()
+    degraded_path = "/home/jiachen/MyGradio/test_images/rain-070.png"
+    degraded_img = crop_img(np.array(Image.open(degraded_path).convert('RGB')), base=16)
+    toTensor = ToTensor()
+    degraded_img = toTensor(degraded_img)
+    print(degraded_img.shape)
+    with torch.no_grad():
+        degraded_img = degraded_img.unsqueeze(0).cuda()
+        _, _, H_old, W_old = degraded_img.shape
+        h_pad = (H_old // 64 + 1) * 64 - H_old
+        w_pad = (W_old // 64 + 1) * 64 - W_old
+        degrad_img = torch.cat([degraded_img, torch.flip(degraded_img, [2])], 2)[:,:,:H_old+h_pad,:]
+        degrad_img = torch.cat([degraded_img, torch.flip(degraded_img, [3])], 3)[:,:,:,:W_old+w_pad]
+        print(degrad_img.shape)
+        restored = net(degrad_img)
+        restored = restored[:,:,:H_old:,:W_old]
+        save_image_tensor(restored, "output.png")

test_images/rain-070.png ADDED Viewed