Spaces:

lime-j
/

RDNet

Runtime error

App Files Files Community

lime-j commited on Mar 5, 2025

Commit

15a930e

1 Parent(s): e0037be

Upload 89 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +9 -0
RDNet-main/RDNet-main/.gitignore +1 -0
RDNet-main/RDNet-main/README.md +99 -0
RDNet-main/RDNet-main/VOC2012_224_train_png.txt +0 -0
RDNet-main/RDNet-main/data/VOC2012_224_train_png.txt +0 -0
RDNet-main/RDNet-main/data/__pycache__/dataset_sir.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/data/__pycache__/image_folder.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/data/__pycache__/torchdata.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/data/__pycache__/transforms.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/data/dataset_sir.py +332 -0
RDNet-main/RDNet-main/data/image_folder.py +51 -0
RDNet-main/RDNet-main/data/real_test.txt +20 -0
RDNet-main/RDNet-main/data/torchdata.py +67 -0
RDNet-main/RDNet-main/data/transforms.py +301 -0
RDNet-main/RDNet-main/engine.py +178 -0
RDNet-main/RDNet-main/figures/Input_car.jpg +0 -0
RDNet-main/RDNet-main/figures/Input_class.png +3 -0
RDNet-main/RDNet-main/figures/Input_green.png +3 -0
RDNet-main/RDNet-main/figures/Ours_car.png +3 -0
RDNet-main/RDNet-main/figures/Ours_class.png +3 -0
RDNet-main/RDNet-main/figures/Ours_green.png +3 -0
RDNet-main/RDNet-main/figures/Ours_white.png +3 -0
RDNet-main/RDNet-main/figures/Title.png +0 -0
RDNet-main/RDNet-main/figures/input_white.jpg +0 -0
RDNet-main/RDNet-main/figures/net.png +3 -0
RDNet-main/RDNet-main/figures/result.png +3 -0
RDNet-main/RDNet-main/figures/vis.png +3 -0
RDNet-main/RDNet-main/models/__init__.py +11 -0
RDNet-main/RDNet-main/models/__pycache__/__init__.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/base_model.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/cls_model_eval_nocls_reg.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/losses.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/networks.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/vgg.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/__pycache__/vit_feature_extractor.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/NAFNET.py +480 -0
RDNet-main/RDNet-main/models/arch/RDnet_.py +202 -0
RDNet-main/RDNet-main/models/arch/__pycache__/RDnet_.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/__pycache__/classifier.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/__pycache__/focalnet.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/__pycache__/modules_sig.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/__pycache__/reverse_function.cpython-38.pyc +0 -0
RDNet-main/RDNet-main/models/arch/classifier.py +49 -0
RDNet-main/RDNet-main/models/arch/decode.py +36 -0
RDNet-main/RDNet-main/models/arch/focalnet.py +589 -0
RDNet-main/RDNet-main/models/arch/modules_sig.py +304 -0
RDNet-main/RDNet-main/models/arch/reverse_function.py +153 -0
RDNet-main/RDNet-main/models/arch/vgg.py +90 -0
RDNet-main/RDNet-main/models/base_model.py +71 -0
RDNet-main/RDNet-main/models/cls_model_eval_nocls_reg.py +517 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Input_class.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Input_green.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/net.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Ours_car.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Ours_class.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Ours_green.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/Ours_white.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/result.png filter=lfs diff=lfs merge=lfs -text
+RDNet-main/RDNet-main/figures/vis.png filter=lfs diff=lfs merge=lfs -text

RDNet-main/RDNet-main/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .DS_Store

RDNet-main/RDNet-main/README.md ADDED Viewed

	@@ -0,0 +1,99 @@

+<p align="center">
+<img src="https://github.com/lime-j/RDNet/blob/main/figures/Title.png?raw=true" width=95%>
+<p>
+# Reversible Decoupling Network for Single Image Reflection Removal
+<div align="center">
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/reversible-decoupling-network-for-single/reflection-removal-on-sir-2-objects)](https://paperswithcode.com/sota/reflection-removal-on-sir-2-objects?p=reversible-decoupling-network-for-single)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/reversible-decoupling-network-for-single/reflection-removal-on-sir-2-wild)](https://paperswithcode.com/sota/reflection-removal-on-sir-2-wild?p=reversible-decoupling-network-for-single)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/reversible-decoupling-network-for-single/reflection-removal-on-sir-2-postcard)](https://paperswithcode.com/sota/reflection-removal-on-sir-2-postcard?p=reversible-decoupling-network-for-single)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/reversible-decoupling-network-for-single/reflection-removal-on-nature)](https://paperswithcode.com/sota/reflection-removal-on-nature?p=reversible-decoupling-network-for-single)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/reversible-decoupling-network-for-single/reflection-removal-on-real20)](https://paperswithcode.com/sota/reflection-removal-on-real20?p=reversible-decoupling-network-for-single)
+</div>
+<p align="center" style="font-size: larger;">
+  <a href="https://arxiv.org/abs/2410.08063"> Reversible Decoupling Network for Single Image Reflection Removal</a>
+</p>
+<p align="center">
+  <a href="https://github.com/WHiTEWoLFJ"> Hao Zhao</a> ⚔️,
+  <a href="https://github.com/lime-j"> Mingjia Li</a> ⚔️,
+  <a href="https://github.com/mingcv"> Qiming Hu</a>,
+  <a href="https://sites.google.com/view/xjguo"> Xiaojie Guo</a> 🦅,
+  <p align="center">(⚔️: equal contribution, 🦅 : corresponding author)</p>
+</p>
+<p align="center">
+<img src="https://github.com/lime-j/RDNet/blob/main/figures/net.png?raw=true" width=95%>
+<p>
+**Our work is accepted by CVPR 2025! See you at the conference!**
+<details>
+  <summary>Click for the Abstract of RDNet</summary>
+  We present a Reversible Decoupling Network (RDNet), which employs a reversible encoder to secure valuable information while flexibly decoupling transmission-and-reflection-relevant features during the forward pass. Furthermore, we customize a transmission-rate-aware prompt generator to dynamically calibrate features, further boosting performance. Extensive experiments demonstrate the superiority of RDNet over existing SOTA methods on five widely-adopted benchmark datasets.
+</details>
+## 🚀Todo
+- [ ] Release the Training code of RDNet.
+## 🌠 Gallery
+<table class="center">
+    <tr>
+      <td><p style="text-align: center">Class Room</p></td>
+      <td><p style="text-align: center">White Wall Chamber</p></td>
+    </tr>
+    <tr>
+      <td>
+        <div style="width: 100%; max-width: 600px; position: relative;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Input_class.png?raw=true" style="width: 100%; height: 300px; display: block;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Ours_class.png?raw=true" style="width: 100%; height: 300px; display: block; position: absolute; top: 0; left: 0; opacity: 0; transition: opacity 0.5s;" onmouseover="this.style.opacity=1;" onmouseout="this.style.opacity=0;">
+        </div>
+      </td>
+      <td>
+        <div style="width: 100%; max-width: 600px; position: relative;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/input_white.jpg?raw=true" style="width: 100%; height: 300px; display: block;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Ours_white.png?raw=true" style="width: 100%; height: 300px; display: block; position: absolute; top: 0; left: 0; opacity: 0; transition: opacity 0.5s;" onmouseover="this.style.opacity=1;" onmouseout="this.style.opacity=0;">
+        </div>
+      </td>
+    </tr>
+    <tr>
+      <td><p style="text-align: center">Car Window</p></td>
+      <td><p style="text-align: center">Very Green Office</p></td>
+    </tr>
+    <tr>
+      <td>
+        <div style="width: 100%; max-width: 600px; position: relative;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Input_car.jpg?raw=true" style="width: 100%; height: 300px; display: block;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Ours_car.png?raw=true" style="width: 100%; height: 300px; display: block; position: absolute; top: 0; left: 0; opacity: 0; transition: opacity 0.5s;" onmouseover="this.style.opacity=1;" onmouseout="this.style.opacity=0;">
+        </div>
+      </td>
+      <td>
+        <div style="width: 100%; max-width: 600px; position: relative;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Input_green.png?raw=true" style="width: 100%; height: 300px; display: block;">
+          <img src="https://github.com/lime-j/RDNet/blob/main/figures/Ours_green.png?raw=true" style="width: 100%; height: 300px; display: block; position: absolute; top: 0; left: 0; opacity: 0; transition: opacity 0.5s;" onmouseover="this.style.opacity=1;" onmouseout="this.style.opacity=0;">
+        </div>
+      </td>
+    </tr>
+</table>
+## Requirements
+We recommend torch 2.x for our code, but it should works fine with most of the modern versions.
+```
+pip install torch>=2.0 torchvision
+pip install einops ema-pytorch fsspec fvcore huggingface-hub matplotlib numpy opencv-python omegaconf pytorch-msssim scikit-image scikit-learn scipy tensorboard tensorboardx wandb timm
+```
+# Testing
+The checkpoint for the main network is available at https://checkpoints.mingjia.li/rdnet.pth ; while the model for cls_model is at https://checkpoints.mingjia.li/cls_model.pth . Please put the cls_model.pth under "pretrained" folder.
+```python
+python3 test_sirs.py --icnn_path <path to the main checkpoint> --resume
+```
+# Training
+Training script will be released in a few days.

RDNet-main/RDNet-main/VOC2012_224_train_png.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

RDNet-main/RDNet-main/data/VOC2012_224_train_png.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

RDNet-main/RDNet-main/data/__pycache__/dataset_sir.cpython-38.pyc ADDED Viewed

Binary file (10.9 kB). View file

RDNet-main/RDNet-main/data/__pycache__/image_folder.cpython-38.pyc ADDED Viewed

Binary file (1.58 kB). View file

RDNet-main/RDNet-main/data/__pycache__/torchdata.cpython-38.pyc ADDED Viewed

Binary file (2.86 kB). View file

RDNet-main/RDNet-main/data/__pycache__/transforms.cpython-38.pyc ADDED Viewed

Binary file (9.37 kB). View file

RDNet-main/RDNet-main/data/dataset_sir.py ADDED Viewed

	@@ -0,0 +1,332 @@

+import math
+import os.path
+import os.path
+import random
+from os.path import join
+import cv2
+import numpy as np
+import torch.utils.data
+import torchvision.transforms.functional as TF
+from PIL import Image
+from scipy.signal import convolve2d
+from data.image_folder import make_dataset
+from data.torchdata import Dataset as BaseDataset
+from data.transforms import to_tensor
+def __scale_width(img, target_width):
+    ow, oh = img.size
+    if (ow == target_width):
+        return img
+    w = target_width
+    h = int(target_width * oh / ow)
+    h = math.ceil(h / 2.) * 2  # round up to even
+    return img.resize((w, h), Image.BICUBIC)
+def __scale_height(img, target_height):
+    ow, oh = img.size
+    if (oh == target_height):
+        return img
+    h = target_height
+    w = int(target_height * ow / oh)
+    w = math.ceil(w / 2.) * 2
+    return img.resize((w, h), Image.BICUBIC)
+def paired_data_transforms(img_1, img_2, unaligned_transforms=False):
+    def get_params(img, output_size):
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+        i = random.randint(0, h - th)
+        j = random.randint(0, w - tw)
+        return i, j, th, tw
+    target_size = int(random.randint(320, 640) / 2.) * 2
+    ow, oh = img_1.size
+    if ow >= oh:
+        img_1 = __scale_height(img_1, target_size)
+        img_2 = __scale_height(img_2, target_size)
+    else:
+        img_1 = __scale_width(img_1, target_size)
+        img_2 = __scale_width(img_2, target_size)
+    if random.random() < 0.5:
+        img_1 = TF.hflip(img_1)
+        img_2 = TF.hflip(img_2)
+    if random.random() < 0.5:
+        angle = random.choice([90, 180, 270])
+        img_1 = TF.rotate(img_1, angle)
+        img_2 = TF.rotate(img_2, angle)
+    i, j, h, w = get_params(img_1, (320, 320))
+    img_1 = TF.crop(img_1, i, j, h, w)
+    if unaligned_transforms:
+        # print('random shift')
+        i_shift = random.randint(-10, 10)
+        j_shift = random.randint(-10, 10)
+        i += i_shift
+        j += j_shift
+    img_2 = TF.crop(img_2, i, j, h, w)
+    return img_1, img_2
+class ReflectionSynthesis(object):
+    def __init__(self):
+        # Kernel Size of the Gaussian Blurry
+        self.kernel_sizes = [5, 7, 9, 11]
+        self.kernel_probs = [0.1, 0.2, 0.3, 0.4]
+        # Sigma of the Gaussian Blurry
+        self.sigma_range = [2, 5]
+        self.alpha_range = [0.8, 1.0]
+        self.beta_range = [0.4, 1.0]
+    def __call__(self, T_, R_):
+        T_ = np.asarray(T_, np.float32) / 255.
+        R_ = np.asarray(R_, np.float32) / 255.
+        kernel_size = np.random.choice(self.kernel_sizes, p=self.kernel_probs)
+        sigma = np.random.uniform(self.sigma_range[0], self.sigma_range[1])
+        kernel = cv2.getGaussianKernel(kernel_size, sigma)
+        kernel2d = np.dot(kernel, kernel.T)
+        for i in range(3):
+            R_[..., i] = convolve2d(R_[..., i], kernel2d, mode='same')
+        a = np.random.uniform(self.alpha_range[0], self.alpha_range[1])
+        b = np.random.uniform(self.beta_range[0], self.beta_range[1])
+        T, R = a * T_, b * R_
+        if random.random() < 0.7:
+            I = T + R - T * R
+        else:
+            I = T + R
+            if np.max(I) > 1:
+                m = I[I > 1]
+                m = (np.mean(m) - 1) * 1.3
+                I = np.clip(T + np.clip(R - m, 0, 1), 0, 1)
+        return T_, R_, I
+class DataLoader(torch.utils.data.DataLoader):
+    def __init__(self, dataset, batch_size, shuffle, *args, **kwargs):
+        super(DataLoader, self).__init__(dataset, batch_size, shuffle, *args, **kwargs)
+        self.shuffle = shuffle
+    def reset(self):
+        if self.shuffle:
+            print('Reset Dataset...')
+            self.dataset.reset()
+class DSRDataset(BaseDataset):
+    def __init__(self, datadir, fns=None, size=None, enable_transforms=True):
+        super(DSRDataset, self).__init__()
+        self.size = size
+        self.datadir = datadir
+        self.enable_transforms = enable_transforms
+        sortkey = lambda key: os.path.split(key)[-1]
+        self.paths = sorted(make_dataset(datadir, fns), key=sortkey)
+        if size is not None:
+            self.paths = np.random.choice(self.paths, size)
+        self.syn_model = ReflectionSynthesis()
+        self.reset(shuffle=False)
+    def reset(self, shuffle=True):
+        if shuffle:
+            random.shuffle(self.paths)
+        num_paths = len(self.paths) // 2
+        self.B_paths = self.paths[0:num_paths]
+        self.R_paths = self.paths[num_paths:2 * num_paths]
+    def data_synthesis(self, t_img, r_img):
+        if self.enable_transforms:
+            t_img, r_img = paired_data_transforms(t_img, r_img)
+        t_img, r_img, m_img = self.syn_model(t_img, r_img)
+        B = TF.to_tensor(t_img)
+        R = TF.to_tensor(r_img)
+        M = TF.to_tensor(m_img)
+        return B, R, M
+    def __getitem__(self, index):
+        index_B = index % len(self.B_paths)
+        index_R = index % len(self.R_paths)
+        B_path = self.B_paths[index_B]
+        R_path = self.R_paths[index_R]
+        t_img = Image.open(B_path).convert('RGB')
+        r_img = Image.open(R_path).convert('RGB')
+        B, R, M = self.data_synthesis(t_img, r_img)
+        fn = os.path.basename(B_path)
+        return {'input': M, 'target_t': B, 'target_r': M-B, 'fn': fn, 'real': False}
+    def __len__(self):
+        if self.size is not None:
+            return min(max(len(self.B_paths), len(self.R_paths)), self.size)
+        else:
+            return max(len(self.B_paths), len(self.R_paths))
+class DSRTestDataset(BaseDataset):
+    def __init__(self, datadir, fns=None, size=None, enable_transforms=False, unaligned_transforms=False,
+                 round_factor=1, flag=None, if_align=True):
+        super(DSRTestDataset, self).__init__()
+        self.size = size
+        self.datadir = datadir
+        self.fns = fns or os.listdir(join(datadir, 'blended'))
+        self.enable_transforms = enable_transforms
+        self.unaligned_transforms = unaligned_transforms
+        self.round_factor = round_factor
+        self.flag = flag
+        self.if_align = True # if_align
+        if size is not None:
+            self.fns = self.fns[:size]
+    def align(self, x1, x2):
+        h, w = x1.height, x1.width
+        h, w = h // 32 * 32, w // 32 * 32
+        x1 = x1.resize((w, h))
+        x2 = x2.resize((w, h))
+        return x1, x2
+    def __getitem__(self, index):
+        fn = self.fns[index]
+        t_img = Image.open(join(self.datadir, 'transmission_layer', fn)).convert('RGB')
+        m_img = Image.open(join(self.datadir, 'blended', fn)).convert('RGB')
+        if self.if_align:
+            t_img, m_img = self.align(t_img, m_img)
+        if self.enable_transforms:
+            t_img, m_img = paired_data_transforms(t_img, m_img, self.unaligned_transforms)
+        B = TF.to_tensor(t_img)
+        M = TF.to_tensor(m_img)
+        dic = {'input': M, 'target_t': B, 'fn': fn, 'real': True, 'target_r': M - B}
+        if self.flag is not None:
+            dic.update(self.flag)
+        return dic
+    def __len__(self):
+        if self.size is not None:
+            return min(len(self.fns), self.size)
+        else:
+            return len(self.fns)
+class SIRTestDataset(BaseDataset):
+    def __init__(self, datadir, fns=None, size=None, if_align=True):
+        super(SIRTestDataset, self).__init__()
+        self.size = size
+        self.datadir = datadir
+        self.fns = fns or os.listdir(join(datadir, 'blended'))
+        self.if_align = if_align
+        if size is not None:
+            self.fns = self.fns[:size]
+    def align(self, x1, x2, x3):
+        h, w = x1.height, x1.width
+        h, w = h // 32 * 32, w // 32 * 32
+        x1 = x1.resize((w, h))
+        x2 = x2.resize((w, h))
+        x3 = x3.resize((w, h))
+        return x1, x2, x3
+    def __getitem__(self, index):
+        fn = self.fns[index]
+        t_img = Image.open(join(self.datadir, 'transmission_layer', fn)).convert('RGB')
+        r_img = Image.open(join(self.datadir, 'reflection_layer', fn)).convert('RGB')
+        m_img = Image.open(join(self.datadir, 'blended', fn)).convert('RGB')
+        if self.if_align:
+            t_img, r_img, m_img = self.align(t_img, r_img, m_img)
+        B = TF.to_tensor(t_img)
+        R = TF.to_tensor(r_img)
+        M = TF.to_tensor(m_img)
+        dic = {'input': M, 'target_t': B, 'fn': fn, 'real': True, 'target_r': R, 'target_r_hat': M - B}
+        return dic
+    def __len__(self):
+        if self.size is not None:
+            return min(len(self.fns), self.size)
+        else:
+            return len(self.fns)
+class RealDataset(BaseDataset):
+    def __init__(self, datadir, fns=None, size=None):
+        super(RealDataset, self).__init__()
+        self.size = size
+        self.datadir = datadir
+        self.fns = fns or os.listdir(join(datadir))
+        if size is not None:
+            self.fns = self.fns[:size]
+    def align(self, x):
+        h, w = x.height, x.width
+        h, w = h // 32 * 32, w // 32 * 32
+        x = x.resize((w, h))
+        return x
+    def __getitem__(self, index):
+        fn = self.fns[index]
+        B = -1
+        m_img = Image.open(join(self.datadir, fn)).convert('RGB')
+        M = to_tensor(self.align(m_img))
+        data = {'input': M, 'target_t': B, 'fn': fn}
+        return data
+    def __len__(self):
+        if self.size is not None:
+            return min(len(self.fns), self.size)
+        else:
+            return len(self.fns)
+class FusionDataset(BaseDataset):
+    def __init__(self, datasets, fusion_ratios=None):
+        self.datasets = datasets
+        self.size = sum([len(dataset) for dataset in datasets])
+        self.fusion_ratios = fusion_ratios or [1. / len(datasets)] * len(datasets)
+        print('[i] using a fusion dataset: %d %s imgs fused with ratio %s' % (
+            self.size, [len(dataset) for dataset in datasets], self.fusion_ratios))
+    def reset(self):
+        for dataset in self.datasets:
+            dataset.reset()
+    def __getitem__(self, index):
+        residual = 1
+        for i, ratio in enumerate(self.fusion_ratios):
+            if random.random() < ratio / residual or i == len(self.fusion_ratios) - 1:
+                dataset = self.datasets[i]
+                return dataset[index % len(dataset)]
+            residual -= ratio
+    def __len__(self):
+        return self.size

RDNet-main/RDNet-main/data/image_folder.py ADDED Viewed

	@@ -0,0 +1,51 @@

+###############################################################################
+# Code from
+# https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
+# Modified the original code so that it also loads images from the current
+# directory as well as the subdirectories
+###############################################################################
+import torch.utils.data as data
+from PIL import Image
+import os
+import os.path
+IMG_EXTENSIONS = [
+    '.jpg', '.JPG', '.jpeg', '.JPEG',
+    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
+]
+def read_fns(filename):
+    with open(filename) as f:
+        fns = f.readlines()
+        fns = [fn.strip() for fn in fns]
+    return fns
+def is_image_file(filename):
+    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
+def make_dataset(dir, fns=None):
+    images = []
+    assert os.path.isdir(dir), '%s is not a valid directory' % dir
+    if fns is None:
+        for root, _, fnames in sorted(os.walk(dir)):
+            for fname in fnames:
+                if is_image_file(fname):
+                    path = os.path.join(root, fname)
+                    images.append(path)
+    else:
+        for fname in fns:
+            if is_image_file(fname):
+                path = os.path.join(dir, fname)
+                images.append(path)
+    return images
+def default_loader(path):
+    return Image.open(path).convert('RGB')

RDNet-main/RDNet-main/data/real_test.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+3.jpg
+4.jpg
+9.jpg
+12.jpg
+15.jpg
+22.jpg
+23.jpg
+25.jpg
+29.jpg
+39.jpg
+46.jpg
+47.jpg
+58.jpg
+86.jpg
+87.jpg
+89.jpg
+93.jpg
+103.jpg
+107.jpg
+110.jpg

RDNet-main/RDNet-main/data/torchdata.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import bisect
+import warnings
+class Dataset(object):
+    """An abstract class representing a Dataset.
+    All other datasets should subclass it. All subclasses should override
+    ``__len__``, that provides the size of the dataset, and ``__getitem__``,
+    supporting integer indexing in range from 0 to len(self) exclusive.
+    """
+    def __getitem__(self, index):
+        raise NotImplementedError
+    def __len__(self):
+        raise NotImplementedError
+    def __add__(self, other):
+        return ConcatDataset([self, other])
+    def reset(self):
+        return
+class ConcatDataset(Dataset):
+    """
+    Dataset to concatenate multiple datasets.
+    Purpose: useful to assemble different existing datasets, possibly
+    large-scale datasets as the concatenation operation is done in an
+    on-the-fly manner.
+    Arguments:
+        datasets (sequence): List of datasets to be concatenated
+    """
+    @staticmethod
+    def cumsum(sequence):
+        r, s = [], 0
+        for e in sequence:
+            l = len(e)
+            r.append(l + s)
+            s += l
+        return r
+    def __init__(self, datasets):
+        super(ConcatDataset, self).__init__()
+        assert len(datasets) > 0, 'datasets should not be an empty iterable'
+        self.datasets = list(datasets)
+        self.cumulative_sizes = self.cumsum(self.datasets)
+    def __len__(self):
+        return self.cumulative_sizes[-1]
+    def __getitem__(self, idx):
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        return self.datasets[dataset_idx][sample_idx]
+    @property
+    def cummulative_sizes(self):
+        warnings.warn("cummulative_sizes attribute is renamed to "
+                      "cumulative_sizes", DeprecationWarning, stacklevel=2)
+        return self.cumulative_sizes

RDNet-main/RDNet-main/data/transforms.py ADDED Viewed

	@@ -0,0 +1,301 @@

+from __future__ import division
+import math
+import random
+import torch
+from PIL import Image
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import numpy as np
+import scipy.stats as st
+import cv2
+import collections
+import torchvision.transforms as transforms
+import util.util as util
+from scipy.signal import convolve2d
+# utility
+def _is_pil_image(img):
+    if accimage is not None:
+        return isinstance(img, (Image.Image, accimage.Image))
+    else:
+        return isinstance(img, Image.Image)
+def _is_tensor_image(img):
+    return torch.is_tensor(img) and img.ndimension() == 3
+def _is_numpy_image(img):
+    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+def arrshow(arr):
+    Image.fromarray(arr.astype(np.uint8)).show()
+def get_transform(opt):
+    transform_list = []
+    osizes = util.parse_args(opt.loadSize)
+    fineSize = util.parse_args(opt.fineSize)
+    if opt.resize_or_crop == 'resize_and_crop':
+        transform_list.append(
+            transforms.RandomChoice([
+                transforms.Resize([osize, osize], Image.BICUBIC) for osize in osizes
+            ]))
+        transform_list.append(transforms.RandomCrop(fineSize))
+    elif opt.resize_or_crop == 'crop':
+        transform_list.append(transforms.RandomCrop(fineSize))
+    elif opt.resize_or_crop == 'scale_width':
+        transform_list.append(transforms.Lambda(
+            lambda img: __scale_width(img, fineSize)))
+    elif opt.resize_or_crop == 'scale_width_and_crop':
+        transform_list.append(transforms.Lambda(
+            lambda img: __scale_width(img, opt.loadSize)))
+        transform_list.append(transforms.RandomCrop(opt.fineSize))
+    if opt.isTrain and not opt.no_flip:
+        transform_list.append(transforms.RandomHorizontalFlip())
+    return transforms.Compose(transform_list)
+to_norm_tensor = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize(
+        (0.5, 0.5, 0.5),
+        (0.5, 0.5, 0.5)
+    )
+])
+to_tensor = transforms.ToTensor()
+def __scale_width(img, target_width):
+    ow, oh = img.size
+    if (ow == target_width):
+        return img
+    w = target_width
+    h = int(target_width * oh / ow)
+    h = math.ceil(h / 2.) * 2  # round up to even
+    return img.resize((w, h), Image.BICUBIC)
+# functional
+def gaussian_blur(img, kernel_size, sigma):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    img = np.asarray(img)
+    # the 3rd dimension (i.e. inter-band) would be filtered which is unwanted for our purpose
+    # new = gaussian_filter(img, sigma=sigma, truncate=truncate)
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size)
+    elif isinstance(kernel_size, collections.Sequence):
+        assert len(kernel_size) == 2
+    new = cv2.GaussianBlur(img, kernel_size, sigma)  # apply gaussian filter band by band
+    return Image.fromarray(new)
+# transforms
+class GaussianBlur(object):
+    def __init__(self, kernel_size=11, sigma=3):
+        self.kernel_size = kernel_size
+        self.sigma = sigma
+    def __call__(self, img):
+        return gaussian_blur(img, self.kernel_size, self.sigma)
+class ReflectionSythesis_0(object):
+    """Reflection image data synthesis for weakly-supervised learning
+    of ICCV 2017 paper *"A Generic Deep Architecture for Single Image Reflection Removal and Image Smoothing"*
+    """
+    def __init__(self, kernel_sizes=None, low_sigma=2, high_sigma=5, low_gamma=1.3,
+                 high_gamma=1.3, low_delta=0.4, high_delta=1.8):
+        self.kernel_sizes = kernel_sizes or [11]
+        self.low_sigma = low_sigma
+        self.high_sigma = high_sigma
+        self.low_gamma = low_gamma
+        self.high_gamma = high_gamma
+        self.low_delta = low_delta
+        self.high_delta = high_delta
+        print('[i] reflection sythesis model: {}'.format({
+            'kernel_sizes': kernel_sizes, 'low_sigma': low_sigma, 'high_sigma': high_sigma,
+            'low_gamma': low_gamma, 'high_gamma': high_gamma}))
+    def __call__(self, B, R):
+        if not _is_pil_image(B):
+            raise TypeError('B should be PIL Image. Got {}'.format(type(B)))
+        if not _is_pil_image(R):
+            raise TypeError('R should be PIL Image. Got {}'.format(type(R)))
+        B_ = np.asarray(B, np.float32)
+        if random.random() < 0.4:
+            B_ = np.tile(np.random.uniform(0, 30, (1, 1, 1)), B_.shape) / 255.
+        else:
+            B_ = np.tile(np.random.normal(50, 50, (1, 1, 3)), (B_.shape[0], B_.shape[1], 1)).clip(0, 255) / 255.
+        R_ = np.asarray(R, np.float32) / 255.
+        kernel_size = np.random.choice(self.kernel_sizes)
+        sigma = np.random.uniform(self.low_sigma, self.high_sigma)
+        gamma = np.random.uniform(self.low_gamma, self.high_gamma)
+        delta = np.random.uniform(self.low_delta, self.high_delta)
+        R_blur = R_
+        kernel = cv2.getGaussianKernel(11, sigma)
+        kernel2d = np.dot(kernel, kernel.T)
+        for i in range(3):
+            R_blur[..., i] = convolve2d(R_blur[..., i], kernel2d, mode='same')
+        R_blur = np.clip(R_blur - np.mean(R_blur) * gamma, 0, 1)
+        R_blur = np.clip(R_blur * delta, 0, 1)
+        M_ = np.clip(R_blur + B_, 0, 1)
+        return B_, R_blur, M_
+class ReflectionSythesis_1(object):
+    """Reflection image data synthesis for weakly-supervised learning
+    of ICCV 2017 paper *"A Generic Deep Architecture for Single Image Reflection Removal and Image Smoothing"*
+    """
+    def __init__(self, kernel_sizes=None, low_sigma=2, high_sigma=5, low_gamma=1.3, high_gamma=1.3):
+        self.kernel_sizes = kernel_sizes or [11]
+        self.low_sigma = low_sigma
+        self.high_sigma = high_sigma
+        self.low_gamma = low_gamma
+        self.high_gamma = high_gamma
+        print('[i] reflection sythesis model: {}'.format({
+            'kernel_sizes': kernel_sizes, 'low_sigma': low_sigma, 'high_sigma': high_sigma,
+            'low_gamma': low_gamma, 'high_gamma': high_gamma}))
+    def __call__(self, B, R):
+        if not _is_pil_image(B):
+            raise TypeError('B should be PIL Image. Got {}'.format(type(B)))
+        if not _is_pil_image(R):
+            raise TypeError('R should be PIL Image. Got {}'.format(type(R)))
+        B_ = np.asarray(B, np.float32) / 255.
+        R_ = np.asarray(R, np.float32) / 255.
+        kernel_size = np.random.choice(self.kernel_sizes)
+        sigma = np.random.uniform(self.low_sigma, self.high_sigma)
+        gamma = np.random.uniform(self.low_gamma, self.high_gamma)
+        R_blur = R_
+        kernel = cv2.getGaussianKernel(11, sigma)
+        kernel2d = np.dot(kernel, kernel.T)
+        for i in range(3):
+            R_blur[..., i] = convolve2d(R_blur[..., i], kernel2d, mode='same')
+        M_ = B_ + R_blur
+        if np.max(M_) > 1:
+            m = M_[M_ > 1]
+            m = (np.mean(m) - 1) * gamma
+            R_blur = np.clip(R_blur - m, 0, 1)
+            M_ = np.clip(R_blur + B_, 0, 1)
+        return B_, R_blur, M_
+class Sobel(object):
+    def __call__(self, img):
+        if not _is_pil_image(img):
+            raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+        gray_img = np.array(img.convert('L'))
+        x = cv2.Sobel(gray_img, cv2.CV_16S, 1, 0)
+        y = cv2.Sobel(gray_img, cv2.CV_16S, 0, 1)
+        absX = cv2.convertScaleAbs(x)
+        absY = cv2.convertScaleAbs(y)
+        dst = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
+        return Image.fromarray(dst)
+class ReflectionSythesis_2(object):
+    """Reflection image data synthesis for weakly-supervised learning
+    of CVPR 2018 paper *"Single Image Reflection Separation with Perceptual Losses"*
+    """
+    def __init__(self, kernel_sizes=None):
+        self.kernel_sizes = kernel_sizes or np.linspace(1, 5, 80)
+    @staticmethod
+    def gkern(kernlen=100, nsig=1):
+        """Returns a 2D Gaussian kernel array."""
+        interval = (2 * nsig + 1.) / (kernlen)
+        x = np.linspace(-nsig - interval / 2., nsig + interval / 2., kernlen + 1)
+        kern1d = np.diff(st.norm.cdf(x))
+        kernel_raw = np.sqrt(np.outer(kern1d, kern1d))
+        kernel = kernel_raw / kernel_raw.sum()
+        kernel = kernel / kernel.max()
+        return kernel
+    def __call__(self, t, r):
+        t = np.float32(t) / 255.
+        r = np.float32(r) / 255.
+        ori_t = t
+        # create a vignetting mask
+        g_mask = self.gkern(560, 3)
+        g_mask = np.dstack((g_mask, g_mask, g_mask))
+        sigma = self.kernel_sizes[np.random.randint(0, len(self.kernel_sizes))]
+        t = np.power(t, 2.2)
+        r = np.power(r, 2.2)
+        sz = int(2 * np.ceil(2 * sigma) + 1)
+        r_blur = cv2.GaussianBlur(r, (sz, sz), sigma, sigma, 0)
+        blend = r_blur + t
+        att = 1.08 + np.random.random() / 10.0
+        for i in range(3):
+            maski = blend[:, :, i] > 1
+            mean_i = max(1., np.sum(blend[:, :, i] * maski) / (maski.sum() + 1e-6))
+            r_blur[:, :, i] = r_blur[:, :, i] - (mean_i - 1) * att
+        r_blur[r_blur >= 1] = 1
+        r_blur[r_blur <= 0] = 0
+        h, w = r_blur.shape[0:2]
+        neww = np.random.randint(0, 560 - w - 10)
+        newh = np.random.randint(0, 560 - h - 10)
+        alpha1 = g_mask[newh:newh + h, neww:neww + w, :]
+        alpha2 = 1 - np.random.random() / 5.0
+        r_blur_mask = np.multiply(r_blur, alpha1)
+        blend = r_blur_mask + t * alpha2
+        t = np.power(t, 1 / 2.2)
+        r_blur_mask = np.power(r_blur_mask, 1 / 2.2)
+        blend = np.power(blend, 1 / 2.2)
+        blend[blend >= 1] = 1
+        blend[blend <= 0] = 0
+        return np.float32(ori_t), np.float32(r_blur_mask), np.float32(blend)
+# Examples
+if __name__ == '__main__':
+    """cv2 imread"""
+    # img = cv2.imread('testdata_reflection_real/19-input.png')
+    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    # img2 = cv2.GaussianBlur(img, (11,11), 3)
+    """Sobel Operator"""
+    # img = np.array(Image.open('datasets/VOC224/train/B/2007_000250.png').convert('L'))
+    """Reflection Sythesis"""
+    b = Image.open('')
+    r = Image.open('')
+    G = ReflectionSythesis_0()
+    m, r = G(b, r)
+    r.show()

RDNet-main/RDNet-main/engine.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import torch
+import util.util as util
+from models import make_model
+import time
+import os
+import sys
+from os.path import join
+from util.visualizer import Visualizer
+import tqdm
+import visdom
+import numpy as np
+from tools import mutils
+class Engine(object):
+    def __init__(self, opt,eval_dataset_real,eval_dataset_solidobject,eval_dataset_postcard,eval_dataloader_wild):
+        self.opt = opt
+        self.writer = None
+        self.visualizer = None
+        self.model = None
+        self.best_val_loss = 1e6
+        self.eval_dataset_real = eval_dataset_real
+        self.eval_dataset_solidobject = eval_dataset_solidobject
+        self.eval_dataset_postcard = eval_dataset_postcard
+        self.eval_dataloader_wild = eval_dataloader_wild
+        self.result_dir = os.path.join(f'./experiment/{self.opt.name}/results',
+                          mutils.get_formatted_time())
+        self.biggest_psnr=0
+        self.__setup()
+    def __setup(self):
+        self.basedir = join('experiment', self.opt.name)
+        os.makedirs(self.basedir, exist_ok=True)
+        opt = self.opt
+        """Model"""
+        self.model = make_model(self.opt.model)  # models.__dict__[self.opt.model]()
+        self.model.initialize(opt)
+        if True:
+            print("IN")
+            self.writer = util.get_summary_writer(os.path.join(self.basedir, 'logs'))
+            self.visualizer = Visualizer(opt)
+    def train(self, train_loader, **kwargs):
+        print('\nEpoch: %d' % self.epoch)
+        avg_meters = util.AverageMeters()
+        opt = self.opt
+        model = self.model
+        epoch = self.epoch
+        epoch_start_time = time.time()
+        for i, data in tqdm.tqdm(enumerate(train_loader)):
+            iter_start_time = time.time()
+            iterations = self.iterations
+            model.set_input(data, mode='train')
+            model.optimize_parameters(**kwargs)
+            errors = model.get_current_errors()
+            avg_meters.update(errors)
+            util.progress_bar(i, len(train_loader), str(avg_meters))
+            util.write_loss(self.writer, 'train', avg_meters, iterations)
+            if iterations%100==0:
+                imgs=[]
+                output_clean,output_reflection,input=model.return_output()
+                # output_clean,input=model.return_output()
+                output_clean=np.transpose(output_clean,(2,0,1))/255
+                #output_reflection = np.transpose(output_reflection, (2, 0, 1))/255
+                input = np.transpose(input, (2, 0, 1))/255
+                imgs.append(output_clean)
+                #imgs.append(output_reflection)
+                imgs.append(input)
+                util.get_visual(self.writer,iterations,imgs)
+                if iterations % opt.print_freq == 0 and opt.display_id != 0:
+                    t = (time.time() - iter_start_time)
+            self.iterations += 1
+        self.epoch += 1
+        if True:#not self.opt.no_log:
+            if self.epoch % opt.save_epoch_freq == 0:
+                save_dir = os.path.join(self.result_dir, '%03d' % self.epoch)
+                os.makedirs(save_dir, exist_ok=True)
+                matrix_real=self.eval(self.eval_dataset_real, dataset_name='testdata_real20', savedir=save_dir, suffix='real20')
+                matrix_solid=self.eval(self.eval_dataset_solidobject, dataset_name='testdata_solidobject', savedir=save_dir,
+                    suffix='solidobject')
+                matrix_post=self.eval(self.eval_dataset_postcard, dataset_name='testdata_postcard', savedir=save_dir, suffix='postcard')
+                matrix_wild=self.eval(self.eval_dataloader_wild, dataset_name='testdata_wild', savedir=save_dir, suffix='wild')
+                sum_PSNR_real=matrix_real['PSNR']*20
+                sum_PSNR_solid=matrix_solid['PSNR']*200
+                sum_PSNR_post=matrix_post['PSNR']*199
+                sum_PSNR_wild=matrix_wild['PSNR']*55
+                print("sum_PSNR_real: ",matrix_real['PSNR'],"sum_PSNR_solid: ",matrix_solid['PSNR'],"sum_PSNR_post: ",matrix_post['PSNR'],"sum_PSNR_wild: ",matrix_wild['PSNR'])
+                sum_PSNR = float(sum_PSNR_real + sum_PSNR_solid + sum_PSNR_post + sum_PSNR_wild)/474.0
+                print('总PSNR:', sum_PSNR)
+                if sum_PSNR>self.biggest_psnr:
+                    self.biggest_psnr=sum_PSNR
+                    print('saving the model at epoch %d, iters %d' %(self.epoch, self.iterations))
+                    model.save()
+                print('highest： ',self.biggest_psnr,' name: ',opt.name)
+            print('saving the latest model at the end of epoch %d, iters %d' %
+                  (self.epoch, self.iterations))
+            model.save(label='latest')
+            print('Time Taken: %d sec' %
+                  (time.time() - epoch_start_time))
+        # model.update_learning_rate()
+        try:
+            train_loader.reset()
+        except:
+            pass
+    def eval(self, val_loader, dataset_name, savedir='./tmp', loss_key=None, **kwargs):
+        # print(dataset_name)
+        if savedir is not None:
+            os.makedirs(savedir, exist_ok=True)
+            self.f = open(os.path.join(savedir, 'metrics.txt'), 'w+')
+            self.f.write(dataset_name + '\n')
+        avg_meters = util.AverageMeters()
+        model = self.model
+        opt = self.opt
+        with torch.no_grad():
+            for i, data in enumerate(val_loader):
+                if self.opt.select is not None and data['fn'][0] not in [f'{self.opt.select}.jpg']:
+                    continue
+                #print(data.shape())
+                index = model.eval(data, savedir=savedir, **kwargs)
+                # print(data['fn'][0], index)
+                if savedir is not None:
+                    self.f.write(f"{data['fn'][0]} {index['PSNR']} {index['SSIM']}\n")
+                avg_meters.update(index)
+                util.progress_bar(i, len(val_loader), str(avg_meters))
+        if not opt.no_log:
+            util.write_loss(self.writer, join('eval', dataset_name), avg_meters, self.epoch)
+        if loss_key is not None:
+            val_loss = avg_meters[loss_key]
+            if val_loss < self.best_val_loss:
+                self.best_val_loss = val_loss
+                print('saving the best model at the end of epoch %d, iters %d' %
+                      (self.epoch, self.iterations))
+                model.save(label='best_{}_{}'.format(loss_key, dataset_name))
+        return avg_meters
+    def test(self, test_loader, savedir=None, **kwargs):
+        model = self.model
+        opt = self.opt
+        with torch.no_grad():
+            for i, data in enumerate(test_loader):
+                model.test(data, savedir=savedir, **kwargs)
+                util.progress_bar(i, len(test_loader))
+    def save_eval(self, label):
+        self.model.save_eval(label)
+    @property
+    def iterations(self):
+        return self.model.iterations
+    @iterations.setter
+    def iterations(self, i):
+        self.model.iterations = i
+    @property
+    def epoch(self):
+        return self.model.epoch
+    @epoch.setter
+    def epoch(self, e):
+        self.model.epoch = e

RDNet-main/RDNet-main/figures/Input_car.jpg ADDED Viewed

RDNet-main/RDNet-main/figures/Input_class.png ADDED Viewed

Git LFS Details

SHA256: 9b3823f5b2f4319e23470a1a747bb2974ddc63f323fed61eb8ceedfce4d48343
Pointer size: 131 Bytes
Size of remote file: 246 kB

RDNet-main/RDNet-main/figures/Input_green.png ADDED Viewed

Git LFS Details

SHA256: 62805a64a7167f0000a4ec1c8e92f0b45a2f7f6fdd9ec1bb7d623ae2f5d5cffe
Pointer size: 131 Bytes
Size of remote file: 418 kB

RDNet-main/RDNet-main/figures/Ours_car.png ADDED Viewed

Git LFS Details

SHA256: 313fbf8070c481775b44153eaea645f35ca8112d7616b5af8ab2e982a37e030e
Pointer size: 131 Bytes
Size of remote file: 225 kB

RDNet-main/RDNet-main/figures/Ours_class.png ADDED Viewed

Git LFS Details

SHA256: e4d97e42e8953fb7c5af9b8d7cfd2123ffeb10e734f50f98bd40b7f531f2f02b
Pointer size: 131 Bytes
Size of remote file: 280 kB

RDNet-main/RDNet-main/figures/Ours_green.png ADDED Viewed

Git LFS Details

SHA256: ee3fb53a2f9f410c2e3b8d9679ba3296034786c922fcc70fcd6681af0ce43b36
Pointer size: 131 Bytes
Size of remote file: 414 kB

RDNet-main/RDNet-main/figures/Ours_white.png ADDED Viewed

Git LFS Details

SHA256: 9b79ca2d5c76f21e947ec93752ae21e33c301f4099edb8375925a6bb0274977d
Pointer size: 131 Bytes
Size of remote file: 187 kB

RDNet-main/RDNet-main/figures/Title.png ADDED Viewed

RDNet-main/RDNet-main/figures/input_white.jpg ADDED Viewed

RDNet-main/RDNet-main/figures/net.png ADDED Viewed

Git LFS Details

SHA256: d0293129d5ef9c40eb72c2cb33863f4a37b45062f4369285387081da3644a8bf
Pointer size: 131 Bytes
Size of remote file: 725 kB

RDNet-main/RDNet-main/figures/result.png ADDED Viewed

Git LFS Details

SHA256: 7bf2e5f68b691f3b0f6246d35f88ffe2a36a12b3c79b7020ba9483ce9fef355c
Pointer size: 131 Bytes
Size of remote file: 184 kB

RDNet-main/RDNet-main/figures/vis.png ADDED Viewed

Git LFS Details

SHA256: 325aed759f19aaae59e9a06c1ae4b8c1e4d3adf1cae2d8c092c1c836834828d8
Pointer size: 132 Bytes
Size of remote file: 2.21 MB

RDNet-main/RDNet-main/models/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import importlib
+from models.arch import *
+from models.cls_model_eval_nocls_reg import ClsModel as ClsReg
+def make_model(name: str):
+    model = ClsReg()
+    return model

RDNet-main/RDNet-main/models/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (414 Bytes). View file

RDNet-main/RDNet-main/models/__pycache__/base_model.cpython-38.pyc ADDED Viewed

Binary file (3.02 kB). View file

RDNet-main/RDNet-main/models/__pycache__/cls_model_eval_nocls_reg.cpython-38.pyc ADDED Viewed

Binary file (17.4 kB). View file

RDNet-main/RDNet-main/models/__pycache__/losses.cpython-38.pyc ADDED Viewed

Binary file (15.3 kB). View file

RDNet-main/RDNet-main/models/__pycache__/networks.cpython-38.pyc ADDED Viewed

Binary file (9.34 kB). View file

RDNet-main/RDNet-main/models/__pycache__/vgg.cpython-38.pyc ADDED Viewed

Binary file (2.15 kB). View file

RDNet-main/RDNet-main/models/__pycache__/vit_feature_extractor.cpython-38.pyc ADDED Viewed

Binary file (6.95 kB). View file

RDNet-main/RDNet-main/models/arch/NAFNET.py ADDED Viewed

	@@ -0,0 +1,480 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2022 megvii-model. All Rights Reserved.
+# ------------------------------------------------------------------------
+'''
+Simple Baselines for Image Restoration
+@article{chen2022simple,
+  title={Simple Baselines for Image Restoration},
+  author={Chen, Liangyu and Chu, Xiaojie and Zhang, Xiangyu and Sun, Jian},
+  journal={arXiv preprint arXiv:2204.04676},
+  year={2022}
+}
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# from .models.archs.arch_util import LayerNorm2d
+import sys
+sys.path.append('/ghome/zhuyr/Deref_RW/networks/')
+class LayerNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+class LayerNorm2d(nn.Module):
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+class NAFBlock(nn.Module):
+    def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.):
+        super().__init__()
+        dw_channel = c * DW_Expand
+        self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel,
+                               bias=True)
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+        # SimpleGate
+        self.sg = SimpleGate()
+        ffn_channel = FFN_Expand * c
+        self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.norm1 = LayerNorm2d(c)
+        self.norm2 = LayerNorm2d(c)
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+        self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+    def forward(self, inp):
+        x = inp
+        x = self.norm1(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.sg(x)
+        x = x * self.sca(x)
+        x = self.conv3(x)
+        x = self.dropout1(x)
+        y = inp + x * self.beta
+        x = self.conv4(self.norm2(y))
+        x = self.sg(x)
+        x = self.conv5(x)
+        x = self.dropout2(x)
+        return y + x * self.gamma
+class NAFNet(nn.Module):
+    def __init__(self, img_channel=3, width=32, middle_blk_num=1, enc_blk_nums=[1, 1, 1, 28],
+                 dec_blk_nums=[1, 1, 1, 1], global_residual = False, drop_flag = False, drop_rate = 0.4):
+        super().__init__()
+        self.intro = nn.Conv2d(in_channels=img_channel, out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.ending = nn.Conv2d(in_channels=width, out_channels=3, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+        self.global_residual = global_residual
+        self.drop_flag = drop_flag
+        if drop_flag:
+            self.dropout = nn.Dropout2d(p=drop_rate)
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+        self.middle_blks = \
+            nn.Sequential(
+                *[NAFBlock(chan) for _ in range(middle_blk_num)]
+            )
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+        self.padder_size = 2 ** len(self.encoders)
+    def forward(self, inp):
+        B, C, H, W = inp.shape
+        inp = self.check_image_size(inp)
+        base_inp = inp[:, :3, :, :]
+        x = self.intro(inp)
+        encs = []
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+        x = self.middle_blks(x)
+        for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+        if self.drop_flag:
+            x = self.dropout(x)
+        x = self.ending(x)
+        if self.global_residual:
+            #print(x.shape, inp.shape, base_inp.shape)
+            x = x + base_inp
+        else:
+            x
+        return x[:, :, :H, :W]
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size
+        mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h))
+        return x
+class NAFNet_wDetHead(nn.Module):
+    def __init__(self, img_channel=3, width=32, middle_blk_num=1, enc_blk_nums=[1, 1, 1, 28],
+                 dec_blk_nums=[1, 1, 1, 1], global_residual = False, drop_flag = False, drop_rate = 0.4,
+                 concat = False, merge_manner = 0):
+        super().__init__()
+        self.intro = nn.Conv2d(in_channels=img_channel, out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.ending = nn.Conv2d(in_channels=width, out_channels=3, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+        self.global_residual = global_residual
+        self.drop_flag = drop_flag
+        self.concat = concat
+        self.merge_manner = merge_manner
+        if drop_flag:
+            self.dropout = nn.Dropout2d(p=drop_rate)
+        # --------------------------- Merge sparse & Img -------------------------------------------------------
+        self.intro_Det = nn.Conv2d(in_channels=1, out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.DetEnc = nn.Sequential( *[NAFBlock(width) for _ in range(3)] )
+        if self.concat:
+            self.Merge_conv = nn.Conv2d(in_channels=width *2 , out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        else:
+            self.Merge_conv = nn.Conv2d(in_channels=width , out_channels=width, kernel_size=3, padding=1, stride=1,
+                                        groups=1,
+                                        bias=True)
+        # ---------------------------  Merge sparse & Img -------------------------------------------------------
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+        self.middle_blks = \
+            nn.Sequential(
+                *[NAFBlock(chan) for _ in range(middle_blk_num)]
+            )
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+        self.padder_size = 2 ** len(self.encoders)
+    def forward(self, inp, spare_ref):
+        B, C, H, W = inp.shape
+        inp = self.check_image_size(inp)
+        base_inp = inp #[:, :3, :, :]
+        x = self.intro(inp)
+        fea_sparse = self.DetEnc(self.intro_Det(spare_ref))
+        if self.merge_manner ==0 and self.concat:
+            x = torch.cat([x, fea_sparse], dim=1)
+            x = self.Merge_conv(x)
+        elif self.merge_manner == 1 and not self.concat:
+            x = x + fea_sparse
+            x = self.Merge_conv(x)
+        elif self.merge_manner == 2 and not self.concat:
+            x = x + fea_sparse *x
+            x = self.Merge_conv(x)
+        else:
+            x = x
+            print('Merge Flag Error!!!(No Merge Operation)    ---zyr 1031 ')
+        encs = []
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+        x = self.middle_blks(x)
+        for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+        if self.drop_flag:
+            x = self.dropout(x)
+        x = self.ending(x)
+        if self.global_residual:
+            #print(x.shape, inp.shape, base_inp.shape)
+            x = x + base_inp
+        else:
+            x
+        return x[:, :, :H, :W]
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size
+        mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h))
+        return x
+class NAFNet_refine(nn.Module):
+    def __init__(self, img_channel=6, width=32, middle_blk_num=1, enc_blk_nums=[1, 1, 1, 28],
+                 dec_blk_nums=[1, 1, 1, 1], global_residual = False):
+        super().__init__()
+        self.intro = nn.Conv2d(in_channels=img_channel, out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.ending = nn.Conv2d(in_channels=width, out_channels=3, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+        self.global_residual = global_residual
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+        self.middle_blks = \
+            nn.Sequential(
+                *[NAFBlock(chan) for _ in range(middle_blk_num)]
+            )
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                nn.Sequential(
+                    *[NAFBlock(chan) for _ in range(num)]
+                )
+            )
+        self.padder_size = 2 ** len(self.encoders)
+    def forward(self, inp, pre_pred):
+        B, C, H, W = inp.shape
+        inp = self.check_image_size(inp)
+        pre_pred = self.check_image_size(pre_pred)
+        network_in = torch.cat([inp, pre_pred ], dim= 1)
+        x = self.intro(network_in)
+        encs = []
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+        x = self.middle_blks(x)
+        for decoder, up, enc_skip in zip(self.decoders, self.ups, encs[::-1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+        x = self.ending(x)
+        if self.global_residual:
+            x = x + inp[:3,:,:,:]
+        else:
+            x
+        return x[:, :, :H, :W]
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size
+        mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h))
+        return x
+def print_param_number(net):
+    print('#generator parameters:', sum(param.numel() for param in net.parameters()))
+if __name__ == '__main__':
+    img_channel = 3
+    width = 32
+    # enc_blks = [2, 2, 4, 8]
+    # middle_blk_num = 12
+    # dec_blks = [2, 2, 2, 2]
+    # enc_blks = [2, 2, 4, 8]
+    # middle_blk_num = 12
+    # dec_blks = [2, 2, 2, 2]
+    # enc_blks = [1, 1, 1, 28]
+    # middle_blk_num = 1
+    # dec_blks = [1, 1, 1, 1]
+    enc_blks = [1, 1, 1, 28]
+    middle_blk_num = 1
+    dec_blks = [1, 1, 1, 1]
+    net = NAFNet_wDetHead(img_channel=img_channel, width=width, middle_blk_num=middle_blk_num,
+                      enc_blk_nums=enc_blks, dec_blk_nums=dec_blks,global_residual = True,
+                        concat= True, merge_manner= 2) #.cuda()
+    #print(net)
+    size = 352
+    input = torch.randn([1,3,128, 128])#.cuda()  inp_shape = (5, 3, 128, 128)
+    spare = torch.randn([1,1,128, 128])
+    print(net(input, spare).size())
+    print_param_number(net)
+    #net_local = NAFNetLocal()#.cuda()
+    #print_param_number(net)
+    # print(net_local(input).size())
+    # inp_shape = (3, 256, 256)
+    #
+    # from ptflops import get_model_complexity_info
+    #
+    # macs, params = get_model_complexity_info(net, inp_shape, verbose=False, print_per_layer_stat=False)
+    #
+    # params = float(params[:-3])
+    # macs = float(macs[:-4])
+    #
+    # print(macs, params)

RDNet-main/RDNet-main/models/arch/RDnet_.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import numpy as np
+from models.arch.focalnet import build_focalnet
+import torch
+import torch.nn as nn
+from models.arch.modules_sig import ConvNextBlock, Decoder, LayerNorm, NAFBlock, SimDecoder, UpSampleConvnext
+from models.arch.reverse_function import ReverseFunction
+from timm.models.layers import trunc_normal_
+class Fusion(nn.Module):
+    def __init__(self, level, channels, first_col) -> None:
+        super().__init__()
+        self.level = level
+        self.first_col = first_col
+        self.down = nn.Sequential(
+            nn.Conv2d(channels[level - 1], channels[level], kernel_size=2, stride=2),
+            LayerNorm(channels[level], eps=1e-6, data_format="channels_first"),
+        ) if level in [1, 2, 3] else nn.Identity()
+        if not first_col:
+            self.up = UpSampleConvnext(1, channels[level + 1], channels[level]) if level in [0, 1, 2] else nn.Identity()
+    def forward(self, *args):
+        c_down, c_up = args
+        channels_dowm=c_down.size(1)
+        if self.first_col:
+            x_clean = self.down(c_down)
+            return x_clean
+        if c_up is not None:
+            channels_up=c_up.size(1)
+        if self.level == 3:
+            x_clean = self.down(c_down)
+        else:
+            x_clean = self.up(c_up) + self.down(c_down)
+        return x_clean
+class Level(nn.Module):
+    def __init__(self, level, channels, layers, kernel_size, first_col, dp_rate=0.0, block_type=ConvNextBlock) -> None:
+        super().__init__()
+        countlayer = sum(layers[:level])
+        expansion = 4
+        self.fusion = Fusion(level, channels, first_col)
+        modules = [block_type(channels[level], expansion * channels[level], channels[level], kernel_size=kernel_size,
+                                 layer_scale_init_value=1e-6, drop_path=dp_rate[countlayer + i]) for i in
+                   range(layers[level])]
+        self.blocks = nn.Sequential(*modules)
+    def forward(self, *args):
+        x = self.fusion(*args)
+        x_clean = self.blocks(x)
+        return x_clean
+class SubNet(nn.Module):
+    def __init__(self, channels, layers, kernel_size, first_col, dp_rates, save_memory, block_type=ConvNextBlock) -> None:
+        super().__init__()
+        shortcut_scale_init_value = 0.5
+        self.save_memory = save_memory
+        self.alpha0 = nn.Parameter(shortcut_scale_init_value * torch.ones((1, channels[0], 1, 1)),
+                                   requires_grad=True) if shortcut_scale_init_value > 0 else None
+        self.alpha1 = nn.Parameter(shortcut_scale_init_value * torch.ones((1, channels[1], 1, 1)),
+                                   requires_grad=True) if shortcut_scale_init_value > 0 else None
+        self.alpha2 = nn.Parameter(shortcut_scale_init_value * torch.ones((1, channels[2], 1, 1)),
+                                   requires_grad=True) if shortcut_scale_init_value > 0 else None
+        self.alpha3 = nn.Parameter(shortcut_scale_init_value * torch.ones((1, channels[3], 1, 1)),
+                                   requires_grad=True) if shortcut_scale_init_value > 0 else None
+        self.level0 = Level(0, channels, layers, kernel_size, first_col, dp_rates, block_type=block_type)
+        self.level1 = Level(1, channels, layers, kernel_size, first_col, dp_rates, block_type=block_type)
+        self.level2 = Level(2, channels, layers, kernel_size, first_col, dp_rates, block_type=block_type)
+        self.level3 = Level(3, channels, layers, kernel_size, first_col, dp_rates, block_type=block_type)
+    def _forward_nonreverse(self, *args):
+        x, c0, c1, c2, c3 = args
+        c0 = self.alpha0 * c0 + self.level0(x, c1)
+        c1 = self.alpha1 * c1 + self.level1(c0, c2)
+        c2 = self.alpha2 * c2 + self.level2(c1, c3)
+        c3 = self.alpha3 * c3 + self.level3(c2, None)
+        return c0, c1, c2, c3
+    def _forward_reverse(self, *args):
+        x, c0, c1, c2, c3 = args
+        local_funs = [self.level0, self.level1, self.level2, self.level3]
+        alpha = [self.alpha0, self.alpha1, self.alpha2, self.alpha3]
+        _, c0, c1, c2, c3 = ReverseFunction.apply(
+            local_funs, alpha, *args)
+        return c0, c1, c2, c3
+    def forward(self, *args):
+        self._clamp_abs(self.alpha0.data, 1e-3)
+        self._clamp_abs(self.alpha1.data, 1e-3)
+        self._clamp_abs(self.alpha2.data, 1e-3)
+        self._clamp_abs(self.alpha3.data, 1e-3)
+        if self.save_memory:
+            return self._forward_reverse(*args)
+        else:
+            return self._forward_nonreverse(*args)
+    def _clamp_abs(self, data, value):
+        with torch.no_grad():
+            sign = data.sign()
+            data.abs_().clamp_(value)
+            data *= sign
+class StarReLU(nn.Module):
+    """
+    StarReLU: s * relu(x) ** 2 + b
+    """
+    def __init__(self, scale_value=1.0, bias_value=0.0,
+        scale_learnable=True, bias_learnable=True,
+        mode=None, inplace=True):
+        super().__init__()
+        self.inplace = inplace
+        self.relu = nn.ReLU(inplace=inplace)
+        self.scale = nn.Parameter(scale_value * torch.ones(1),
+            requires_grad=scale_learnable)
+        self.bias = nn.Parameter(bias_value * torch.ones(1),
+            requires_grad=bias_learnable)
+    def forward(self, x):
+        return self.scale * self.relu(x)**2 + self.bias
+class FullNet_NLP(nn.Module):
+    def __init__(self, channels=[32, 64, 96, 128], layers=[2, 3, 6, 3], num_subnet=5,loss_col=4, kernel_size=3, num_classes=1000,
+                 drop_path=0.0, save_memory=True, inter_supv=True, head_init_scale=None, pretrained_cols=16) -> None:
+        super().__init__()
+        self.num_subnet = num_subnet
+        self.Loss_col=(loss_col+1)
+        self.inter_supv = inter_supv
+        self.channels = channels
+        self.layers = layers
+        self.stem_comp = nn.Sequential(
+            nn.Conv2d(3, channels[0], kernel_size=5, stride=2, padding=2),
+            LayerNorm(channels[0], eps=1e-6, data_format="channels_first")
+        )
+        self.prompt=nn.Sequential(nn.Linear(in_features=6,out_features=512),
+                                  StarReLU(),
+                                  nn.Linear(in_features=512,out_features=channels[0]),
+                                  StarReLU(),
+                                  )
+        dp_rate = [x.item() for x in torch.linspace(0, drop_path, sum(layers))]
+        for i in range(num_subnet):
+            first_col = True if i == 0 else False
+            self.add_module(f'subnet{str(i)}', SubNet(
+                channels, layers, kernel_size, first_col,
+                dp_rates=dp_rate, save_memory=save_memory,
+                block_type=NAFBlock))
+        channels.reverse()
+        self.decoder_blocks = nn.ModuleList(
+            [Decoder(depth=[1, 1, 1, 1], dim=channels, block_type=NAFBlock, kernel_size=3) for _ in
+             range(3)])
+        self.apply(self._init_weights)
+        self.baseball = build_focalnet('focalnet_L_384_22k_fl4')
+        self.baseball_adapter = nn.ModuleList()
+        self.baseball_adapter.append(nn.Conv2d(192, 64, kernel_size=1))
+        self.baseball_adapter.append(nn.Conv2d(192, 64, kernel_size=1))
+        self.baseball_adapter.append(nn.Conv2d(192 * 2, 64 * 2, kernel_size=1))
+        self.baseball_adapter.append(nn.Conv2d(192 * 4, 64 * 4, kernel_size=1))
+        self.baseball_adapter.append(nn.Conv2d(192 * 8, 64 * 8, kernel_size=1))
+    def forward(self, x_in,alpha,prompt=True):
+        x_cls_out = []
+        x_img_out = []
+        c0, c1, c2, c3 = 0, 0, 0, 0
+        interval = self.num_subnet // 4
+        x_base, x_stem = self.baseball(x_in)
+        c0, c1, c2, c3 = x_base
+        x_stem = self.baseball_adapter[0](x_stem)
+        c0, c1, c2, c3 = self.baseball_adapter[1](c0),\
+                         self.baseball_adapter[2](c1),\
+                         self.baseball_adapter[3](c2),\
+                         self.baseball_adapter[4](c3)
+        if prompt==True:
+            prompt_alpha=self.prompt(alpha)
+            prompt_alpha = prompt_alpha.unsqueeze(-1).unsqueeze(-1)
+            x=prompt_alpha*x_stem
+        else :
+            x = x_stem
+        for i in range(self.num_subnet):
+            c0, c1, c2, c3 = getattr(self, f'subnet{str(i)}')(x, c0, c1, c2, c3)
+            if i>(self.num_subnet-self.Loss_col):
+                x_img_out.append(torch.cat([x_in, x_in], dim=-3) - self.decoder_blocks[-1](c3, c2, c1, c0) )
+        return x_cls_out, x_img_out
+    def _init_weights(self, module):
+        if isinstance(module, nn.Conv2d):
+            trunc_normal_(module.weight, std=.02)
+            nn.init.constant_(module.bias, 0)
+        elif isinstance(module, nn.Linear):
+            trunc_normal_(module.weight, std=.02)
+            nn.init.constant_(module.bias, 0)

RDNet-main/RDNet-main/models/arch/__pycache__/RDnet_.cpython-38.pyc ADDED Viewed

Binary file (8.23 kB). View file

RDNet-main/RDNet-main/models/arch/__pycache__/classifier.cpython-38.pyc ADDED Viewed

Binary file (2.14 kB). View file

RDNet-main/RDNet-main/models/arch/__pycache__/focalnet.cpython-38.pyc ADDED Viewed

Binary file (15.8 kB). View file

RDNet-main/RDNet-main/models/arch/__pycache__/modules_sig.cpython-38.pyc ADDED Viewed

Binary file (11 kB). View file

RDNet-main/RDNet-main/models/arch/__pycache__/reverse_function.cpython-38.pyc ADDED Viewed

Binary file (4.74 kB). View file

RDNet-main/RDNet-main/models/arch/classifier.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch.nn as nn
+import timm
+import torch
+import torch.nn.functional as F
+class PretrainedConvNext(nn.Module):
+    def __init__(self, model_name='convnext_base', pretrained=True):
+        super(PretrainedConvNext, self).__init__()
+        # Load the pretrained ConvNext model from timm
+        self.model = timm.create_model(model_name, pretrained=False, num_classes=0)
+        self.head = nn.Linear(768, 6)
+    def forward(self, x):
+        with torch.no_grad():
+            cls_input = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=True)
+        # Forward pass through the ConvNext model
+        out = self.model(cls_input)
+        out = self.head(out)
+        # alpha, beta = out[..., :3].unsqueeze(-1).unsqueeze(-1),\
+        #               out[..., 3:].unsqueeze(-1).unsqueeze(-1)
+        #out = alpha * x + beta
+        # print(out.shape)
+        return out#alpha,beta#out #out[..., :3], out[..., 3:]
+class PretrainedConvNext_e2e(nn.Module):
+    def __init__(self, model_name='convnext_base', pretrained=True):
+        super(PretrainedConvNext_e2e, self).__init__()
+        # Load the pretrained ConvNext model from timm
+        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=0)
+        self.head = nn.Linear(768, 6)
+    def forward(self, x):
+        with torch.no_grad():
+            cls_input = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=True)
+        # Forward pass through the ConvNext model
+        out = self.model(cls_input)
+        out = self.head(out)
+        alpha, beta = out[..., :3].unsqueeze(-1).unsqueeze(-1),\
+                      out[..., 3:].unsqueeze(-1).unsqueeze(-1)
+        out = alpha * x + beta
+        #print(out.shape)
+        return out#alpha,beta#out #out[..., :3], out[..., 3:]
+if __name__ == "__main__":
+    model = PretrainedConvNext('convnext_small_in22k')
+    print("Testing PretrainedConvNext model...")
+    # Assuming a dummy input tensor of size (1, 3, 224, 224) similar to an image in the ImageNet dataset
+    dummy_input = torch.randn(20, 3, 224, 224)
+    output_x, output_y = model(dummy_input)
+    print("Output shape:", output_x.shape)
+    print("Test completed successfully.")

RDNet-main/RDNet-main/models/arch/decode.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch.nn as nn
+def make_layers(cfg, batch_norm=False):
+    layers = []
+    in_channels = 3
+    for v in cfg:
+        if v == 'M':
+            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+        else:
+            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
+            if batch_norm:
+                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
+            else:
+                layers += [conv2d, nn.ReLU(inplace=True)]
+            in_channels = v
+    return nn.Sequential(*layers)
+cfgs = {
+    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512],
+}
+class VGG(nn.Module):
+    def __init__(self,features):
+        super(VGG, self).__init__()
+        self.features = features
+    def forward(self, x):
+        x = self.features(x)
+def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs):
+    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
+    return model
+def encoder(pretrained=False, progress=True, **kwargs):
+    return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)

RDNet-main/RDNet-main/models/arch/focalnet.py ADDED Viewed

	@@ -0,0 +1,589 @@

+# --------------------------------------------------------
+# FocalNet for Semantic Segmentation
+# Copyright (c) 2022 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Jianwei Yang
+# --------------------------------------------------------
+import math
+import time
+import numpy as np
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class FocalModulation(nn.Module):
+    """ Focal Modulation
+    Args:
+        dim (int): Number of input channels.
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+        focal_level (int): Number of focal levels
+        focal_window (int): Focal window size at focal level 1
+        focal_factor (int, default=2): Step to increase the focal window
+        use_postln (bool, default=False): Whether use post-modulation layernorm
+    """
+    def __init__(self, dim, proj_drop=0., focal_level=2, focal_window=7, focal_factor=2, use_postln=False,
+        use_postln_in_modulation=False, normalize_modulator=False):
+        super().__init__()
+        self.dim = dim
+        # specific args for focalv3
+        self.focal_level = focal_level
+        self.focal_window = focal_window
+        self.focal_factor = focal_factor
+        self.use_postln_in_modulation = use_postln_in_modulation
+        self.normalize_modulator = normalize_modulator
+        self.f = nn.Linear(dim, 2*dim+(self.focal_level+1), bias=True)
+        self.h = nn.Conv2d(dim, dim, kernel_size=1, stride=1, padding=0, groups=1, bias=True)
+        self.act = nn.GELU()
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.focal_layers = nn.ModuleList()
+        if self.use_postln_in_modulation:
+            self.ln = nn.LayerNorm(dim)
+        for k in range(self.focal_level):
+            kernel_size = self.focal_factor*k + self.focal_window
+            self.focal_layers.append(
+                nn.Sequential(
+                    nn.Conv2d(dim, dim, kernel_size=kernel_size, stride=1, groups=dim,
+                        padding=kernel_size//2, bias=False),
+                    nn.GELU(),
+                    )
+                )
+    def forward(self, x):
+        """ Forward function.
+        Args:
+            x: input features with shape of (B, H, W, C)
+        """
+        B, nH, nW, C = x.shape
+        x = self.f(x)
+        x = x.permute(0, 3, 1, 2).contiguous()
+        q, ctx, gates = torch.split(x, (C, C, self.focal_level+1), 1)
+        ctx_all = 0
+        for l in range(self.focal_level):
+            ctx = self.focal_layers[l](ctx)
+            ctx_all = ctx_all + ctx*gates[:, l:l+1]
+        ctx_global = self.act(ctx.mean(2, keepdim=True).mean(3, keepdim=True))
+        ctx_all = ctx_all + ctx_global*gates[:,self.focal_level:]
+        if self.normalize_modulator:
+            ctx_all = ctx_all / (self.focal_level+1)
+        x_out = q * self.h(ctx_all)
+        x_out = x_out.permute(0, 2, 3, 1).contiguous()
+        if self.use_postln_in_modulation:
+            x_out = self.ln(x_out)
+        x_out = self.proj(x_out)
+        x_out = self.proj_drop(x_out)
+        return x_out
+class FocalModulationBlock(nn.Module):
+    """ Focal Modulation Block.
+    Args:
+        dim (int): Number of input channels.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        drop (float, optional): Dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+        focal_level (int): number of focal levels
+        focal_window (int): focal kernel size at level 1
+    """
+    def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm,
+                 focal_level=2, focal_window=9,
+                 use_postln=False, use_postln_in_modulation=False,
+                 normalize_modulator=False,
+                 use_layerscale=False,
+                 layerscale_value=1e-4):
+        super().__init__()
+        self.dim = dim
+        self.mlp_ratio = mlp_ratio
+        self.focal_window = focal_window
+        self.focal_level = focal_level
+        self.use_postln = use_postln
+        self.use_layerscale = use_layerscale
+        self.norm1 = norm_layer(dim)
+        self.modulation = FocalModulation(
+            dim, focal_window=self.focal_window, focal_level=self.focal_level, proj_drop=drop,
+            use_postln_in_modulation=use_postln_in_modulation,
+            normalize_modulator=normalize_modulator,
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.H = None
+        self.W = None
+        self.gamma_1 = 1.0
+        self.gamma_2 = 1.0
+        if self.use_layerscale:
+            self.gamma_1 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+            self.gamma_2 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+    def forward(self, x):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        if not self.use_postln:
+            x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        # FM
+        x = self.modulation(x).view(B, H * W, C)
+        if self.use_postln:
+            x = self.norm1(x)
+        # FFN
+        x = shortcut + self.drop_path(self.gamma_1 * x)
+        if self.use_postln:
+            x = x + self.drop_path(self.gamma_2 * self.norm2(self.mlp(x)))
+        else:
+            x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+        return x
+class BasicLayer(nn.Module):
+    """ A basic focal modulation layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        drop (float, optional): Dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        focal_level (int): Number of focal levels
+        focal_window (int): Focal window size at focal level 1
+        use_conv_embed (bool): Use overlapped convolution for patch embedding or now. Default: False
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+    """
+    def __init__(self,
+                 dim,
+                 depth,
+                 mlp_ratio=4.,
+                 drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 focal_window=9,
+                 focal_level=2,
+                 use_conv_embed=False,
+                 use_postln=False,
+                 use_postln_in_modulation=False,
+                 normalize_modulator=False,
+                 use_layerscale=False,
+                 use_checkpoint=False
+        ):
+        super().__init__()
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList([
+            FocalModulationBlock(
+                dim=dim,
+                mlp_ratio=mlp_ratio,
+                drop=drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                focal_window=focal_window,
+                focal_level=focal_level,
+                use_postln=use_postln,
+                use_postln_in_modulation=use_postln_in_modulation,
+                normalize_modulator=normalize_modulator,
+                use_layerscale=use_layerscale,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(
+                patch_size=2,
+                in_chans=dim, embed_dim=2*dim,
+                use_conv_embed=use_conv_embed,
+                norm_layer=norm_layer,
+                is_stem=False
+            )
+        else:
+            self.downsample = None
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x)
+            else:
+                x = blk(x)
+        if self.downsample is not None:
+            x_reshaped = x.transpose(1, 2).view(x.shape[0], x.shape[-1], H, W)
+            x_down = self.downsample(x_reshaped)
+            x_down = x_down.flatten(2).transpose(1, 2)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    Args:
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+        use_conv_embed (bool): Whether use overlapped convolution for patch embedding. Default: False
+        is_stem (bool): Is the stem block or not.
+    """
+    def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None, use_conv_embed=False, is_stem=False):
+        super().__init__()
+        patch_size = to_2tuple(patch_size)
+        self.patch_size = patch_size
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if use_conv_embed:
+            # if we choose to use conv embedding, then we treat the stem and non-stem differently
+            if is_stem:
+                kernel_size = 7; padding = 3; stride = 2
+            else:
+                kernel_size = 3; padding = 1; stride = 2
+            self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
+        else:
+            self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        """Forward function."""
+        _, _, H, W = x.size()
+        if W % self.patch_size[1] != 0:
+            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
+        if H % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
+        x = self.proj(x)  # B C Wh Ww
+        if self.norm is not None:
+            Wh, Ww = x.size(2), x.size(3)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
+        return x
+class FocalNet(nn.Module):
+    """ FocalNet backbone.
+    Args:
+        pretrain_img_size (int): Input image size for training the pretrained model,
+            used in absolute postion embedding. Default 224.
+        patch_size (int | tuple(int)): Patch size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        depths (tuple[int]): Depths of each Swin Transformer stage.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        drop_rate (float): Dropout rate.
+        drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True.
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        focal_levels (Sequence[int]): Number of focal levels at four stages
+        focal_windows (Sequence[int]): Focal window sizes at first focal level at four stages
+        use_conv_embed (bool): Whether use overlapped convolution for patch embedding
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self,
+                 pretrain_img_size=1600,
+                 patch_size=4,
+                 in_chans=3,
+                 embed_dim=96,
+                 depths=[2, 2, 6, 2],
+                 mlp_ratio=4.,
+                 drop_rate=0.,
+                 drop_path_rate=0.3, # 0.3 or 0.4 works better for large+ models
+                 norm_layer=nn.LayerNorm,
+                 patch_norm=True,
+                 out_indices=(0, 1, 2, 3),
+                 frozen_stages=-1,
+                 focal_levels=[3,3,3,3],
+                 focal_windows=[3,3,3,3],
+                 use_conv_embed=False,
+                 use_postln=False,
+                 use_postln_in_modulation=False,
+                 use_layerscale=False,
+                 normalize_modulator=False,
+                 use_checkpoint=False,
+        ):
+        super().__init__()
+        self.pretrain_img_size = pretrain_img_size
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.patch_norm = patch_norm
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None,
+            use_conv_embed=use_conv_embed, is_stem=True)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        # build layers
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2 ** i_layer),
+                depth=depths[i_layer],
+                mlp_ratio=mlp_ratio,
+                drop=drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchEmbed if (i_layer < self.num_layers - 1) else None,
+                focal_window=focal_windows[i_layer],
+                focal_level=focal_levels[i_layer],
+                use_conv_embed=use_conv_embed,
+                use_postln=use_postln,
+                use_postln_in_modulation=use_postln_in_modulation,
+                normalize_modulator=normalize_modulator,
+                use_layerscale=use_layerscale,
+                use_checkpoint=use_checkpoint)
+            self.layers.append(layer)
+        num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)]
+        self.num_features = num_features
+        # add a norm layer for each output
+        for i_layer in out_indices:
+            layer = norm_layer(num_features[i_layer])
+            layer_name = f'norm{i_layer}'
+            self.add_module(layer_name, layer)
+        self._freeze_stages()
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+        if self.frozen_stages >= 2:
+            self.pos_drop.eval()
+            for i in range(0, self.frozen_stages - 1):
+                m = self.layers[i]
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        def _init_weights(m):
+            if isinstance(m, nn.Linear):
+                trunc_normal_(m.weight, std=.02)
+                if isinstance(m, nn.Linear) and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+        if isinstance(pretrained, str):
+            self.apply(_init_weights)
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            self.apply(_init_weights)
+        else:
+            raise TypeError('pretrained must be a str or None')
+    def forward(self, x):
+        """Forward function."""
+        x_emb = self.patch_embed(x)
+        Wh, Ww = x_emb.size(2), x_emb.size(3)
+        x = x_emb.flatten(2).transpose(1, 2)
+        x = self.pos_drop(x)
+        outs = []
+        for i in range(self.num_layers):
+            layer = self.layers[i]
+            x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+            if i in self.out_indices:
+                norm_layer = getattr(self, f'norm{i}')
+                x_out = norm_layer(x_out)
+                out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous()
+                outs.append(out)
+        return outs, x_emb
+    def train(self, mode=True):
+        """Convert the model into training mode while keep layers freezed."""
+        super(FocalNet, self).train(mode)
+        self._freeze_stages()
+def build_focalnet(modelname, **kw):
+    assert modelname in [
+        'focalnet_L_384_22k',
+        'focalnet_L_384_22k_fl4',
+        'focalnet_XL_384_22k',
+        'focalnet_XL_384_22k_fl4',
+        'focalnet_H_224_22k',
+        'focalnet_H_224_22k_fl4',
+        ]
+    if 'focal_levels' in kw:
+        kw['focal_levels'] = [kw['focal_levels']] * 4
+    if 'focal_windows' in kw:
+        kw['focal_windows'] = [kw['focal_windows']] * 4
+    model_para_dict = {
+        'focalnet_L_384_22k': dict(
+            embed_dim=192,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [3, 3, 3, 3]),
+            focal_windows=kw.get('focal_windows', [5, 5, 5, 5]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_postln_in_modulation=False,
+            use_layerscale=True,
+            normalize_modulator=False,
+        ),
+        'focalnet_L_384_22k_fl4': dict(
+            embed_dim=192,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [4, 4, 4, 4]),
+            focal_windows=kw.get('focal_windows', [3, 3, 3, 3]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_postln_in_modulation=False,
+            use_layerscale=True,
+            normalize_modulator=True,
+        ),
+        'focalnet_XL_384_22k': dict(
+            embed_dim=256,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [3, 3, 3, 3]),
+            focal_windows=kw.get('focal_windows', [5, 5, 5, 5]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_postln_in_modulation=False,
+            use_layerscale=True,
+            normalize_modulator=False,
+        ),
+        'focalnet_XL_384_22k_fl4': dict(
+            embed_dim=256,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [4, 4, 4, 4]),
+            focal_windows=kw.get('focal_windows', [3, 3, 3, 3]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_postln_in_modulation=False,
+            use_layerscale=True,
+            normalize_modulator=True,
+        ),
+        'focalnet_H_224_22k': dict(
+            embed_dim=352,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [3, 3, 3, 3]),
+            focal_windows=kw.get('focal_windows', [3, 3, 3, 3]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_layerscale=True,
+            use_postln_in_modulation=True,
+            normalize_modulator=False,
+        ),
+        'focalnet_H_224_22k_fl4': dict(
+            embed_dim=352,
+            depths=[ 2, 2, 18, 2 ],
+            focal_levels=kw.get('focal_levels', [4, 4, 4, 4]),
+            focal_windows=kw.get('focal_windows', [3, 3, 3, 3]),
+            use_conv_embed=True,
+            use_postln=True,
+            use_postln_in_modulation=True,
+            use_layerscale=True,
+            normalize_modulator=False,
+        ),
+    }
+    kw_cgf = model_para_dict[modelname]
+    kw_cgf.update(kw)
+    model = FocalNet(**kw_cgf)
+    return model

RDNet-main/RDNet-main/models/arch/modules_sig.py ADDED Viewed

	@@ -0,0 +1,304 @@

+# --------------------------------------------------------
+# Reversible Column Networks
+# Copyright (c) 2022 Megvii Inc.
+# Licensed under The Apache License 2.0 [see LICENSE for details]
+# Written by Yuxuan Cai
+# --------------------------------------------------------
+import imp
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import DropPath
+class LayerNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+class LayerNorm2d(nn.Module):
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+class NAFBlock(nn.Module):
+    def __init__(self, dim, expand_dim, out_dim, kernel_size=3, layer_scale_init_value=1e-6, drop_path=0.):
+        super().__init__()
+        drop_out_rate = 0.
+        dw_channel = expand_dim
+        self.conv1 = nn.Conv2d(in_channels=dim, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=kernel_size, padding=1, stride=1, groups=dw_channel,
+                               bias=True)
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=dim, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+        # SimpleGate
+        self.sg = SimpleGate()
+        ffn_channel = expand_dim
+        self.conv4 = nn.Conv2d(in_channels=dim, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=out_dim, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.norm1 = LayerNorm2d(dim)
+        self.norm2 = LayerNorm2d(dim)
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.beta = nn.Parameter(torch.ones((1, dim, 1, 1)) * layer_scale_init_value, requires_grad=True)
+        self.gamma = nn.Parameter(torch.ones((1, dim, 1, 1)) * layer_scale_init_value, requires_grad=True)
+    def forward(self, inp):
+        x = inp
+        x = self.norm1(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.sg(x)
+        x = x * self.sca(x)
+        x = self.conv3(x)
+        x = self.dropout1(x)
+        y = inp + x * self.beta
+        x = self.conv4(self.norm2(y))
+        x = self.sg(x)
+        x = self.conv5(x)
+        x = self.dropout2(x)
+        return y + x * self.gamma
+class UpSampleConvnext(nn.Module):
+    def __init__(self, ratio, inchannel, outchannel):
+        super().__init__()
+        self.ratio = ratio
+        self.channel_reschedule = nn.Sequential(
+                                        # LayerNorm(inchannel, eps=1e-6, data_format="channels_last"),
+                                        nn.Linear(inchannel, outchannel),
+                                        LayerNorm(outchannel, eps=1e-6, data_format="channels_last"))
+        self.upsample  = nn.Upsample(scale_factor=2**ratio, mode='bilinear')
+    def forward(self, x):
+        x = x.permute(0, 2, 3, 1)
+        x = self.channel_reschedule(x)
+        x = x = x.permute(0, 3, 1, 2)
+        return self.upsample(x)
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_first", elementwise_affine = True):
+        super().__init__()
+        self.elementwise_affine = elementwise_affine
+        if elementwise_affine:
+            self.weight = nn.Parameter(torch.ones(normalized_shape))
+            self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            if self.elementwise_affine:
+                x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+class ConvNextBlock(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, in_channel, hidden_dim, out_channel, kernel_size=3, layer_scale_init_value=1e-6, drop_path= 0.0):
+        super().__init__()
+        self.dwconv = nn.Conv2d(in_channel, in_channel, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, groups=in_channel) # depthwise conv
+        self.norm = nn.LayerNorm(in_channel, eps=1e-6)
+        self.pwconv1 = nn.Linear(in_channel, hidden_dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(hidden_dim, out_channel)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((out_channel)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class Decoder(nn.Module):
+    def __init__(self, depth=[2,2,2,2], dim=[112, 72, 40, 24], block_type = None, kernel_size = 3) -> None:
+        super().__init__()
+        self.depth = depth
+        self.dim = dim
+        self.block_type = block_type
+        self._build_decode_layer(dim, depth, kernel_size)
+        self.pixelshuffle=nn.PixelShuffle(2)
+        # self.star_relu=StarReLU()
+        self.projback_ = nn.Sequential(
+            nn.Conv2d(
+                in_channels=dim[-1],
+                out_channels=2 ** 2 * 3 , kernel_size=1),
+            nn.PixelShuffle(2)
+        )
+        self.projback_2 = nn.Sequential(
+            nn.Conv2d(
+                in_channels=dim[-1],
+                out_channels=2 ** 2 * 3, kernel_size=1),
+            nn.PixelShuffle(2)
+        )
+    def _build_decode_layer(self, dim, depth, kernel_size):
+        normal_layers = nn.ModuleList()
+        upsample_layers = nn.ModuleList()
+        proj_layers = nn.ModuleList()
+        norm_layer = LayerNorm
+        for i in range(1, len(dim)):
+            module = [self.block_type(dim[i], dim[i], dim[i], kernel_size) for _ in range(depth[i])]
+            normal_layers.append(nn.Sequential(*module))
+            upsample_layers.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))
+            proj_layers.append(nn.Sequential(
+                nn.Conv2d(dim[i-1], dim[i], 1, 1),
+                norm_layer(dim[i]),
+                # StarReLU() #self.star_relu()
+                nn.GELU()
+                ))
+        for i in range(1, len(dim)):
+            module = [self.block_type(dim[i], dim[i], dim[i], kernel_size) for _ in range(depth[i])]
+            normal_layers.append(nn.Sequential(*module))
+            upsample_layers.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))
+            proj_layers.append(nn.Sequential(
+                               nn.Conv2d(dim[i-1], dim[i], 1, 1),
+                               norm_layer(dim[i]),
+            ))
+        self.normal_layers = normal_layers
+        self.upsample_layers = upsample_layers
+        self.proj_layers = proj_layers
+    def _forward_stage(self, stage, x):
+        x = self.proj_layers[stage](x)
+        x = self.upsample_layers[stage](x)
+        return self.normal_layers[stage](x)
+    def forward(self, c3, c2, c1, c0):
+        c0_clean, c0_ref = c0, c0
+        c1_clean, c1_ref = c1, c1
+        c2_clean, c2_ref = c2, c2
+        c3_clean, c3_ref = c3, c3
+        x_clean = self._forward_stage(0, c3_clean) * c2_clean
+        x_clean = self._forward_stage(1, x_clean) * c1_clean
+        x_clean = self._forward_stage(2, x_clean) * c0_clean
+        x_clean = self.projback_(x_clean)
+        x_ref = self._forward_stage(3, c3_ref) * c2_ref
+        x_ref = self._forward_stage(4, x_ref) * c1_ref
+        x_ref = self._forward_stage(5, x_ref) * c0_ref
+        x_ref = self.projback_2(x_ref)
+        x=torch.cat((x_clean,x_ref),dim=1)
+        return x
+class SimDecoder(nn.Module):
+    def __init__(self, in_channel, encoder_stride) -> None:
+        super().__init__()
+        self.projback = nn.Sequential(
+            LayerNorm(in_channel),
+            nn.Conv2d(
+                in_channels=in_channel,
+                out_channels=encoder_stride ** 2 * 3, kernel_size=1),
+            nn.PixelShuffle(encoder_stride),
+        )
+    def forward(self, c3):
+        return self.projback(c3)
+class StarReLU(nn.Module):
+    """
+    StarReLU: s * relu(x) ** 2 + b
+    """
+    def __init__(self, scale_value=1.0, bias_value=0.0,
+        scale_learnable=True, bias_learnable=True,
+        mode=None, inplace=True):
+        super().__init__()
+        self.inplace = inplace
+        self.relu = nn.ReLU(inplace=inplace)
+        self.scale = nn.Parameter(scale_value * torch.ones(1),
+            requires_grad=scale_learnable)
+        self.bias = nn.Parameter(bias_value * torch.ones(1),
+            requires_grad=bias_learnable)
+    def forward(self, x):
+        return self.scale * self.relu(x)**2 + self.bias

RDNet-main/RDNet-main/models/arch/reverse_function.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import torch
+from typing import Any, Iterable, List, Tuple, Callable
+import torch.distributed as dist
+def get_gpu_states(fwd_gpu_devices) -> Tuple[List[int], List[torch.Tensor]]:
+    fwd_gpu_states = []
+    for device in fwd_gpu_devices:
+        with torch.cuda.device(device):
+            fwd_gpu_states.append(torch.cuda.get_rng_state())
+    return fwd_gpu_states
+def get_gpu_device(*args):
+    fwd_gpu_devices = list(set(arg.get_device() for arg in args
+                               if isinstance(arg, torch.Tensor) and arg.is_cuda))
+    return fwd_gpu_devices
+def set_device_states(fwd_cpu_state, devices, states) -> None:
+    torch.set_rng_state(fwd_cpu_state)
+    for device, state in zip(devices, states):
+        with torch.cuda.device(device):
+            torch.cuda.set_rng_state(state)
+def detach_and_grad(inputs: Tuple[Any, ...]) -> Tuple[torch.Tensor, ...]:
+    if isinstance(inputs, tuple):
+        out = []
+        for inp in inputs:
+            if not isinstance(inp, torch.Tensor):
+                out.append(inp)
+                continue
+            x = inp.detach()
+            x.requires_grad = True
+            out.append(x)
+        return tuple(out)
+    else:
+        raise RuntimeError(
+            "Only tuple of tensors is supported. Got Unsupported input type: ", type(inputs).__name__)
+def get_cpu_and_gpu_states(gpu_devices):
+    return torch.get_rng_state(), get_gpu_states(gpu_devices)
+class ReverseFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, run_functions, alpha, *args):
+        l0, l1, l2, l3 = run_functions
+        alpha0, alpha1, alpha2, alpha3 = alpha
+        ctx.run_functions  = run_functions
+        ctx.alpha = alpha
+        ctx.preserve_rng_state = True
+        ctx.gpu_autocast_kwargs = {"enabled": torch.is_autocast_enabled(),
+                                   "dtype": torch.get_autocast_gpu_dtype(),
+                                   "cache_enabled": torch.is_autocast_cache_enabled()}
+        ctx.cpu_autocast_kwargs = {"enabled": torch.is_autocast_cpu_enabled(),
+                                   "dtype": torch.get_autocast_cpu_dtype(),
+                                   "cache_enabled": torch.is_autocast_cache_enabled()}
+        assert len(args) == 5
+        [x, c0, c1, c2, c3] = args
+        if type(c0) == int:
+            ctx.first_col = True
+        else:
+            ctx.first_col = False
+        with torch.no_grad():
+            gpu_devices = get_gpu_device(*args)
+            ctx.gpu_devices = gpu_devices
+            ctx.cpu_states_0, ctx.gpu_states_0  = get_cpu_and_gpu_states(gpu_devices)
+            c0 = l0(x, c1) + c0*alpha0
+            ctx.cpu_states_1, ctx.gpu_states_1  = get_cpu_and_gpu_states(gpu_devices)
+            c1 = l1(c0, c2) + c1*alpha1
+            ctx.cpu_states_2, ctx.gpu_states_2  = get_cpu_and_gpu_states(gpu_devices)
+            c2 = l2(c1, c3) + c2*alpha2
+            ctx.cpu_states_3, ctx.gpu_states_3  = get_cpu_and_gpu_states(gpu_devices)
+            c3 = l3(c2, None) + c3*alpha3
+        ctx.save_for_backward(x, c0, c1, c2, c3)
+        return x, c0, c1 ,c2, c3
+    @staticmethod
+    def backward(ctx, *grad_outputs):
+        x, c0, c1, c2, c3 = ctx.saved_tensors
+        l0, l1, l2, l3 = ctx.run_functions
+        alpha0, alpha1, alpha2, alpha3 = ctx.alpha
+        gx_right, g0_right, g1_right, g2_right, g3_right = grad_outputs
+        (x, c0, c1, c2, c3) = detach_and_grad((x, c0, c1, c2, c3))
+        with torch.enable_grad(), \
+            torch.random.fork_rng(devices=ctx.gpu_devices, enabled=ctx.preserve_rng_state), \
+            torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs), \
+            torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):
+            g3_up = g3_right
+            g3_left = g3_up*alpha3 ##shortcut
+            set_device_states(ctx.cpu_states_3, ctx.gpu_devices, ctx.gpu_states_3)
+            oup3 = l3(c2, None)
+            torch.autograd.backward(oup3, g3_up, retain_graph=True)
+            with torch.no_grad():
+                c3_left = (1/alpha3)*(c3 - oup3) ## feature reverse
+            g2_up = g2_right+ c2.grad
+            g2_left = g2_up*alpha2 ##shortcut
+            (c3_left,) = detach_and_grad((c3_left,))
+            set_device_states(ctx.cpu_states_2, ctx.gpu_devices, ctx.gpu_states_2)
+            oup2 = l2(c1, c3_left)
+            torch.autograd.backward(oup2, g2_up, retain_graph=True)
+            c3_left.requires_grad = False
+            cout3 = c3_left*alpha3 ##alpha3 update
+            torch.autograd.backward(cout3, g3_up)
+            with torch.no_grad():
+                c2_left = (1/alpha2)*(c2 - oup2) ## feature reverse
+            g3_left = g3_left + c3_left.grad if c3_left.grad is not None else g3_left
+            g1_up = g1_right+c1.grad
+            g1_left = g1_up*alpha1 ##shortcut
+            (c2_left,) = detach_and_grad((c2_left,))
+            set_device_states(ctx.cpu_states_1, ctx.gpu_devices, ctx.gpu_states_1)
+            oup1 = l1(c0, c2_left)
+            torch.autograd.backward(oup1, g1_up, retain_graph=True)
+            c2_left.requires_grad = False
+            cout2 = c2_left*alpha2 ##alpha2 update
+            torch.autograd.backward(cout2, g2_up)
+            with torch.no_grad():
+                c1_left = (1/alpha1)*(c1 - oup1) ## feature reverse
+            g0_up = g0_right + c0.grad
+            g0_left = g0_up*alpha0 ##shortcut
+            g2_left = g2_left + c2_left.grad if c2_left.grad is not None else g2_left ## Fusion
+            (c1_left,) = detach_and_grad((c1_left,))
+            set_device_states(ctx.cpu_states_0, ctx.gpu_devices, ctx.gpu_states_0)
+            oup0 = l0(x, c1_left)
+            torch.autograd.backward(oup0, g0_up, retain_graph=True)
+            c1_left.requires_grad = False
+            cout1 = c1_left*alpha1 ##alpha1 update
+            torch.autograd.backward(cout1, g1_up)
+            with torch.no_grad():
+                c0_left = (1/alpha0)*(c0 - oup0) ## feature reverse
+            gx_up = x.grad ## Fusion
+            g1_left = g1_left + c1_left.grad if c1_left.grad is not None else g1_left ## Fusion
+            c0_left.requires_grad = False
+            cout0 = c0_left*alpha0 ##alpha0 update
+            torch.autograd.backward(cout0, g0_up)
+        if ctx.first_col:
+            return None, None, gx_up, None, None, None, None
+        else:
+            return None, None, gx_up, g0_left, g1_left, g2_left, g3_left

RDNet-main/RDNet-main/models/arch/vgg.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from collections import namedtuple
+import torch
+from torchvision import models
+class Vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False):
+        super(Vgg16, self).__init__()
+        vgg_pretrained_features = models.vgg16(pretrained=True).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
+        return out
+class Vgg19(torch.nn.Module):
+    def __init__(self, requires_grad=False):
+        super(Vgg19, self).__init__()
+        # vgg_pretrained_features = models.vgg19(pretrained=True).features
+        self.vgg_pretrained_features = models.vgg19(pretrained=True).features
+        # self.slice1 = torch.nn.Sequential()
+        # self.slice2 = torch.nn.Sequential()
+        # self.slice3 = torch.nn.Sequential()
+        # self.slice4 = torch.nn.Sequential()
+        # self.slice5 = torch.nn.Sequential()
+        # for x in range(2):
+        #     self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        # for x in range(2, 7):
+        #     self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        # for x in range(7, 12):
+        #     self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        # for x in range(12, 21):
+        #     self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        # for x in range(21, 30):
+        #     self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X, indices=None):
+        if indices is None:
+            indices = [2, 7, 12, 21, 30]
+        out = []
+        # indices = sorted(indices)
+        for i in range(indices[-1]):
+            X = self.vgg_pretrained_features[i](X)
+            if (i + 1) in indices:
+                out.append(X)
+        return out
+        # h_relu1 = self.slice1(X)
+        # h_relu2 = self.slice2(h_relu1)
+        # h_relu3 = self.slice3(h_relu2)
+        # h_relu4 = self.slice4(h_relu3)
+        # h_relu5 = self.slice5(h_relu4)
+        # out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5]
+        # return out
+if __name__ == '__main__':
+    vgg = Vgg19()
+    import ipdb
+    ipdb.set_trace()

RDNet-main/RDNet-main/models/base_model.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import torch
+import util.util as util
+class BaseModel:
+    def name(self):
+        return self.__class__.__name__.lower()
+    def initialize(self, opt):
+        self.opt = opt
+        self.gpu_ids = opt.gpu_ids
+        self.isTrain = opt.isTrain
+        self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
+        last_split = opt.checkpoints_dir.split('/')[-1]
+        if opt.resume and last_split != 'checkpoints' and (last_split != opt.name or opt.supp_eval):
+            self.save_dir = opt.checkpoints_dir
+            self.model_save_dir = os.path.join(opt.checkpoints_dir.replace(opt.checkpoints_dir.split('/')[-1], ''),
+                                               opt.name)
+        else:
+            self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
+            self.model_save_dir = os.path.join(opt.checkpoints_dir, opt.name)
+        self._count = 0
+    def set_input(self, input):
+        self.input = input
+    def forward(self, mode='train'):
+        pass
+    # used in test time, no backprop
+    def test(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def print_optimizer_param(self):
+        print(self.optimizers[-1])
+    def save(self, label=None):
+        epoch = self.epoch
+        iterations = self.iterations
+        if label is None:
+            model_name = os.path.join(self.model_save_dir, self.opt.name + '_%03d_%08d.pt' % ((epoch), (iterations)))
+        else:
+            model_name = os.path.join(self.model_save_dir, self.opt.name + '_' + label + '.pt')
+        torch.save(self.state_dict(), model_name)
+    def save_eval(self, label=None):
+        model_name = os.path.join(self.model_save_dir, label + '.pt')
+        torch.save(self.state_dict_eval(), model_name)
+    def _init_optimizer(self, optimizers):
+        self.optimizers = optimizers
+        for optimizer in self.optimizers:
+            util.set_opt_param(optimizer, 'initial_lr', self.opt.lr)
+            util.set_opt_param(optimizer, 'weight_decay', self.opt.wd)

RDNet-main/RDNet-main/models/cls_model_eval_nocls_reg.py ADDED Viewed

	@@ -0,0 +1,517 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from models.losses import DINOLoss
+import os
+import numpy as np
+from collections import OrderedDict
+from ema_pytorch import EMA
+from models.arch.classifier import PretrainedConvNext
+import util.util as util
+import util.index as index
+import models.networks as networks
+import models.losses as losses
+from models import arch
+#from models.arch.dncnn import effnetv2_s
+from .base_model import BaseModel
+from PIL import Image
+from os.path import join
+#from torchviz import make_dot
+from models.arch.RDnet_ import FullNet_NLP
+import timm
+def tensor2im(image_tensor, imtype=np.uint8):
+    image_tensor = image_tensor.detach()
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = np.clip(image_numpy, 0, 1)
+    if image_numpy.shape[0] == 1:
+        image_numpy = np.tile(image_numpy, (3, 1, 1))
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0
+    # image_numpy = image_numpy.astype(imtype)
+    return image_numpy
+class EdgeMap(nn.Module):
+    def __init__(self, scale=1):
+        super(EdgeMap, self).__init__()
+        self.scale = scale
+        self.requires_grad = False
+    def forward(self, img):
+        img = img / self.scale
+        N, C, H, W = img.shape
+        gradX = torch.zeros(N, 1, H, W, dtype=img.dtype, device=img.device)
+        gradY = torch.zeros(N, 1, H, W, dtype=img.dtype, device=img.device)
+        gradx = (img[..., 1:, :] - img[..., :-1, :]).abs().sum(dim=1, keepdim=True)
+        grady = (img[..., 1:] - img[..., :-1]).abs().sum(dim=1, keepdim=True)
+        gradX[..., :-1, :] += gradx
+        gradX[..., 1:, :] += gradx
+        gradX[..., 1:-1, :] /= 2
+        gradY[..., :-1] += grady
+        gradY[..., 1:] += grady
+        gradY[..., 1:-1] /= 2
+        # edge = (gradX + gradY) / 2
+        edge = (gradX + gradY)
+        return edge
+class YTMTNetBase(BaseModel):
+    def _init_optimizer(self, optimizers):
+        self.optimizers = optimizers
+        for optimizer in self.optimizers:
+            util.set_opt_param(optimizer, 'initial_lr', self.opt.lr)
+            util.set_opt_param(optimizer, 'weight_decay', self.opt.wd)
+    def set_input(self, data, mode='train'):
+        target_t = None
+        target_r = None
+        data_name = None
+        identity = False
+        mode = mode.lower()
+        if mode == 'train':
+            input, target_t, target_r = data['input'], data['target_t'], data['target_r']
+        elif mode == 'eval':
+            input, target_t, target_r, data_name = data['input'], data['target_t'], data['target_r'], data['fn']
+        elif mode == 'test':
+            input, data_name = data['input'], data['fn']
+        else:
+            raise NotImplementedError('Mode [%s] is not implemented' % mode)
+        if len(self.gpu_ids) > 0:  # transfer data into gpu
+            input = input.to(device=self.gpu_ids[0])
+            if target_t is not None:
+                target_t = target_t.to(device=self.gpu_ids[0])
+            if target_r is not None:
+                target_r = target_r.to(device=self.gpu_ids[0])
+        self.input = input
+        self.identity = identity
+        self.input_edge = self.edge_map(self.input)
+        self.target_t = target_t
+        self.target_r = target_r
+        self.data_name = data_name
+        self.issyn = False if 'real' in data else True
+        self.aligned = False if 'unaligned' in data else True
+        if target_t is not None:
+            self.target_edge = self.edge_map(self.target_t)
+    def eval(self, data, savedir=None, suffix=None, pieapp=None):
+        self._eval()
+        self.set_input(data, 'eval')
+        with torch.no_grad():
+            self.forward_eval()
+            output_i = tensor2im(self.output_j[6])
+            output_j = tensor2im(self.output_j[7])
+            target = tensor2im(self.target_t)
+            target_r = tensor2im(self.target_r)
+            if self.aligned:
+                res = index.quality_assess(output_i, target)
+            else:
+                res = {}
+            if savedir is not None:
+                if self.data_name is not None:
+                    name = os.path.splitext(os.path.basename(self.data_name[0]))[0]
+                    savedir = join(savedir, suffix, name)
+                    os.makedirs(savedir, exist_ok=True)
+                    Image.fromarray(output_i.astype(np.uint8)).save(
+                        join(savedir, '{}_t.png'.format(self.opt.name)))
+                    Image.fromarray(output_j.astype(np.uint8)).save(
+                        join(savedir, '{}_r.png'.format(self.opt.name)))
+                    Image.fromarray(target.astype(np.uint8)).save(join(savedir, 't_label.png'))
+                    Image.fromarray(tensor2im(self.input).astype(np.uint8)).save(join(savedir, 'm_input.png'))
+                else:
+                    if not os.path.exists(join(savedir, 'transmission_layer')):
+                        os.makedirs(join(savedir, 'transmission_layer'))
+                        os.makedirs(join(savedir, 'blended'))
+                    Image.fromarray(target.astype(np.uint8)).save(
+                        join(savedir, 'transmission_layer', str(self._count) + '.png'))
+                    Image.fromarray(tensor2im(self.input).astype(np.uint8)).save(
+                        join(savedir, 'blended', str(self._count) + '.png'))
+                    self._count += 1
+            return res
+    def test(self, data, savedir=None):
+        # only the 1st input of the whole minibatch would be evaluated
+        self._eval()
+        self.set_input(data, 'test')
+        if self.data_name is not None and savedir is not None:
+            name = os.path.splitext(os.path.basename(self.data_name[0]))[0]
+            if not os.path.exists(join(savedir, name)):
+                os.makedirs(join(savedir, name))
+            if os.path.exists(join(savedir, name, '{}.png'.format(self.opt.name))):
+                return
+        with torch.no_grad():
+            output_i, output_j = self.forward()
+            output_i = tensor2im(output_i)
+            output_j = tensor2im(output_j)
+            if self.data_name is not None and savedir is not None:
+                Image.fromarray(output_i.astype(np.uint8)).save(join(savedir, name, '{}_l.png'.format(self.opt.name)))
+                Image.fromarray(output_j.astype(np.uint8)).save(join(savedir, name, '{}_r.png'.format(self.opt.name)))
+                Image.fromarray(tensor2im(self.input).astype(np.uint8)).save(join(savedir, name, 'm_input.png'))
+class ClsModel(YTMTNetBase):
+    def name(self):
+        return 'ytmtnet'
+    def __init__(self):
+        self.epoch = 0
+        self.iterations = 0
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.net_c = None
+    def print_network(self):
+        print('--------------------- Model ---------------------')
+        print('##################### NetG #####################')
+        networks.print_network(self.net_i)
+        if self.isTrain and self.opt.lambda_gan > 0:
+            print('##################### NetD #####################')
+            networks.print_network(self.netD)
+    def _eval(self):
+        self.net_i.eval()
+        self.net_c.eval()
+    def _train(self):
+        self.net_i.train()
+        self.net_c.eval()
+    def initialize(self, opt):
+        self.opt=opt
+        BaseModel.initialize(self, opt)
+        in_channels = 3
+        self.vgg = None
+        if opt.hyper:
+            self.vgg = losses.Vgg19(requires_grad=False).to(self.device)
+            in_channels += 1472
+        channels = [64, 128, 256, 512]
+        layers = [2, 2, 4, 2]
+        num_subnet = opt.num_subnet
+        self.net_c = PretrainedConvNext("convnext_small_in22k").cuda()
+        self.net_c.load_state_dict(torch.load('pretrained/cls_model.pth')['icnn'])
+        self.net_i = FullNet_NLP(channels, layers, num_subnet, opt.loss_col,num_classes=1000, drop_path=0,save_memory=True, inter_supv=True, head_init_scale=None, kernel_size=3).to(self.device)
+        self.edge_map = EdgeMap(scale=1).to(self.device)
+        if self.isTrain:
+            self.loss_dic = losses.init_loss(opt, self.Tensor)
+            vggloss = losses.ContentLoss()
+            vggloss.initialize(losses.VGGLoss(self.vgg))
+            self.loss_dic['t_vgg'] = vggloss
+            cxloss = losses.ContentLoss()
+            if opt.unaligned_loss == 'vgg':
+                cxloss.initialize(losses.VGGLoss(self.vgg, weights=[0.1], indices=[opt.vgg_layer]))
+            elif opt.unaligned_loss == 'ctx':
+                cxloss.initialize(losses.CXLoss(self.vgg, weights=[0.1, 0.1, 0.1], indices=[8, 13, 22]))
+            elif opt.unaligned_loss == 'mse':
+                cxloss.initialize(nn.MSELoss())
+            elif opt.unaligned_loss == 'ctx_vgg':
+                cxloss.initialize(losses.CXLoss(self.vgg, weights=[0.1, 0.1, 0.1, 0.1], indices=[8, 13, 22, 31],
+                                                criterions=[losses.CX_loss] * 3 + [nn.L1Loss()]))
+            else:
+                raise NotImplementedError
+            self.scaler=torch.cuda.amp.GradScaler()
+            with torch.autocast(device_type='cuda',dtype=torch.float16):
+                self.dinoloss=DINOLoss()
+            self.loss_dic['t_cx'] = cxloss
+            self.optimizer_G = torch.optim.Adam(self.net_i.parameters(),
+                                                lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.wd)
+            self._init_optimizer([self.optimizer_G])
+        if opt.resume:
+            self.load(self, opt.resume_epoch)
+    def backward_D(self):
+        loss_D=[]
+        weight=self.opt.weight_loss
+        for p in self.netD.parameters():
+            p.requires_grad = True
+        for i in range(4):
+            loss_D_1, pred_fake_1, pred_real_1 = self.loss_dic['gan'].get_loss(
+                self.netD, self.input, self.output_j[2*i], self.target_t)
+            loss_D.append(loss_D_1*weight)
+            weight+=self.opt.weight_loss
+        loss_sum=sum(loss_D)
+        self.loss_D, self.pred_fake, self.pred_real = (loss_sum, pred_fake_1, pred_real_1)
+        (self.loss_D * self.opt.lambda_gan).backward(retain_graph=True)
+    def get_loss(self, out_l, out_r):
+        loss_G_GAN_sum=[]
+        loss_icnn_pixel_sum=[]
+        loss_rcnn_pixel_sum=[]
+        loss_icnn_vgg_sum=[]
+        weight=self.opt.weight_loss
+        for i in range(self.opt.loss_col):
+            out_r_clean=out_r[2*i]
+            out_r_reflection=out_r[2*i+1]
+            if i != self.opt.loss_col -1:
+                loss_G_GAN = 0
+                loss_icnn_pixel = self.loss_dic['t_pixel'].get_loss(out_r_clean, self.target_t)
+                loss_rcnn_pixel = self.loss_dic['r_pixel'].get_loss(out_r_reflection, self.target_r) * 1.5 * self.opt.r_pixel_weight
+                loss_icnn_vgg = self.loss_dic['t_vgg'].get_loss(out_r_clean, self.target_t) * self.opt.lambda_vgg
+            else:
+                if self.opt.lambda_gan>0:
+                    loss_G_GAN=0
+                else:
+                    loss_G_GAN=0
+                loss_icnn_pixel = self.loss_dic['t_pixel'].get_loss(out_r_clean, self.target_t)
+                loss_rcnn_pixel = self.loss_dic['r_pixel'].get_loss(out_r_reflection, self.target_r) * 1.5 * self.opt.r_pixel_weight
+                loss_icnn_vgg = self.loss_dic['t_vgg'].get_loss(out_r_clean, self.target_t) * self.opt.lambda_vgg
+            loss_G_GAN_sum.append(loss_G_GAN*weight)
+            loss_icnn_pixel_sum.append(loss_icnn_pixel*weight)
+            loss_rcnn_pixel_sum.append(loss_rcnn_pixel*weight)
+            loss_icnn_vgg_sum.append(loss_icnn_vgg*weight)
+            weight=weight+self.opt.weight_loss
+        return sum(loss_G_GAN_sum), sum(loss_icnn_pixel_sum), sum(loss_rcnn_pixel_sum), sum(loss_icnn_vgg_sum)
+    def backward_G(self):
+        self.loss_G_GAN,self.loss_icnn_pixel, self.loss_rcnn_pixel, \
+        self.loss_icnn_vgg = self.get_loss(self.output_i, self.output_j)
+        self.loss_exclu = self.exclusion_loss(self.output_i, self.output_j, 3)
+        self.loss_recons = self.loss_dic['recons'](self.output_i, self.output_j, self.input) * 0.2
+        self.loss_G =  self.loss_G_GAN +self.loss_icnn_pixel + self.loss_rcnn_pixel + \
+                      self.loss_icnn_vgg
+        self.scaler.scale(self.loss_G).backward()
+    def hyper_column(self, input_img):
+        hypercolumn = self.vgg(input_img)
+        _, C, H, W = input_img.shape
+        hypercolumn = [F.interpolate(feature.detach(), size=(H, W), mode='bilinear', align_corners=False) for
+                       feature in hypercolumn]
+        input_i = [input_img]
+        input_i.extend(hypercolumn)
+        input_i = torch.cat(input_i, dim=1)
+        return input_i
+    def forward(self):
+        # without edge
+        self.output_j=[]
+        input_i = self.input
+        if self.vgg is not None:
+            input_i = self.hyper_column(input_i)
+        with torch.no_grad():
+            ipt = self.net_c(input_i)
+        output_i, output_j = self.net_i(input_i,ipt,prompt=True)
+        self.output_i = output_i
+        for i in range(self.opt.loss_col):
+            out_reflection, out_clean = output_j[i][:, :3, ...], output_j[i][:, 3:, ...]
+            self.output_j.append(out_clean)
+            self.output_j.append(out_reflection)
+        return self.output_i, self.output_j
+    @torch.no_grad()
+    def forward_eval(self):
+        self.output_j=[]
+        input_i = self.input
+        if self.vgg is not None:
+            input_i = self.hyper_column(input_i)
+        ipt = self.net_c(input_i)
+        output_i, output_j = self.net_i(input_i,ipt,prompt=True)
+        self.output_i = output_i #alpha * output_i + beta
+        for i in range(self.opt.loss_col):
+            out_reflection, out_clean = output_j[i][:, :3, ...], output_j[i][:, 3:, ...]
+            self.output_j.append(out_clean)
+            self.output_j.append(out_reflection)
+        return self.output_i, self.output_j
+    def optimize_parameters(self):
+        self._train()
+        self.forward()
+        self.optimizer_G.zero_grad()
+        self.backward_G()
+        self.optimizer_G.step()
+    def return_output(self):
+        output_clean = self.output_j[1]
+        output_reflection = self.output_j[0]
+        output_clean = tensor2im(output_clean).astype(np.uint8)
+        output_reflection = tensor2im(output_reflection).astype(np.uint8)
+        input=tensor2im(self.input)
+        return output_clean,output_reflection,input
+    def exclusion_loss(self, img_T, img_R, level=3, eps=1e-6):
+        loss_gra=[]
+        weight=0.25
+        for i in range(4):
+            grad_x_loss = []
+            grad_y_loss = []
+            img_T=self.output_j[2*i]
+            img_R=self.output_j[2*i+1]
+            for l in range(level):
+                grad_x_T, grad_y_T = self.compute_grad(img_T)
+                grad_x_R, grad_y_R = self.compute_grad(img_R)
+                alphax = (2.0 * torch.mean(torch.abs(grad_x_T))) / (torch.mean(torch.abs(grad_x_R)) + eps)
+                alphay = (2.0 * torch.mean(torch.abs(grad_y_T))) / (torch.mean(torch.abs(grad_y_R)) + eps)
+                gradx1_s = (torch.sigmoid(grad_x_T) * 2) - 1  # mul 2 minus 1 is to change sigmoid into tanh
+                grady1_s = (torch.sigmoid(grad_y_T) * 2) - 1
+                gradx2_s = (torch.sigmoid(grad_x_R * alphax) * 2) - 1
+                grady2_s = (torch.sigmoid(grad_y_R * alphay) * 2) - 1
+                grad_x_loss.append(((torch.mean(torch.mul(gradx1_s.pow(2), gradx2_s.pow(2)))) + eps) ** 0.25)
+                grad_y_loss.append(((torch.mean(torch.mul(grady1_s.pow(2), grady2_s.pow(2)))) + eps) ** 0.25)
+                img_T = F.interpolate(img_T, scale_factor=0.5, mode='bilinear')
+                img_R = F.interpolate(img_R, scale_factor=0.5, mode='bilinear')
+            loss_gradxy = torch.sum(sum(grad_x_loss) / 3) + torch.sum(sum(grad_y_loss) / 3)
+            loss_gra.append(loss_gradxy*weight)
+            weight+=0.25
+        return sum(loss_gra) / 2
+    def contain_loss(self, img_T, img_R, img_I, eps=1e-6):
+        pix_num = np.prod(img_I.shape)
+        predict_tx, predict_ty = self.compute_grad(img_T)
+        predict_tx, predict_ty = self.compute_grad(img_T)
+        predict_rx, predict_ry = self.compute_grad(img_R)
+        input_x, input_y = self.compute_grad(img_I)
+        out = torch.norm(predict_tx / (input_x + eps), 2) ** 2 + \
+              torch.norm(predict_ty / (input_y + eps), 2) ** 2 + \
+              torch.norm(predict_rx / (input_x + eps), 2) ** 2 + \
+              torch.norm(predict_ry / (input_y + eps), 2) ** 2
+        return out / pix_num
+    def compute_grad(self, img):
+        gradx = img[:, :, 1:, :] - img[:, :, :-1, :]
+        grady = img[:, :, :, 1:] - img[:, :, :, :-1]
+        return gradx, grady
+    def load(self, model, resume_epoch=None):
+        icnn_path = model.opt.icnn_path
+        state_dict = torch.load(icnn_path)
+        model.net_i.load_state_dict(state_dict['icnn'])
+        return state_dict
+    def state_dict(self):
+        state_dict = {
+            'icnn': self.net_i.state_dict(),
+            'opt_g': self.optimizer_G.state_dict(),
+            #'ema' : self.ema.state_dict(),
+            'epoch': self.epoch, 'iterations': self.iterations
+        }
+        if self.opt.lambda_gan > 0:
+            state_dict.update({
+                'opt_d': self.optimizer_D.state_dict(),
+                'netD': self.netD.state_dict(),
+            })
+        return state_dict
+class AvgPool2d(nn.Module):
+    def __init__(self, kernel_size=None, base_size=None, auto_pad=True, fast_imp=False, train_size=None):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.base_size = base_size
+        self.auto_pad = auto_pad
+        # only used for fast implementation
+        self.fast_imp = fast_imp
+        self.rs = [5, 4, 3, 2, 1]
+        self.max_r1 = self.rs[0]
+        self.max_r2 = self.rs[0]
+        self.train_size = train_size
+    def extra_repr(self) -> str:
+        return 'kernel_size={}, base_size={}, stride={}, fast_imp={}'.format(
+            self.kernel_size, self.base_size, self.kernel_size, self.fast_imp
+        )
+    def forward(self, x):
+        if self.kernel_size is None and self.base_size:
+            train_size = self.train_size
+            if isinstance(self.base_size, int):
+                self.base_size = (self.base_size, self.base_size)
+            self.kernel_size = list(self.base_size)
+            self.kernel_size[0] = x.shape[2] * self.base_size[0] // train_size[-2]
+            self.kernel_size[1] = x.shape[3] * self.base_size[1] // train_size[-1]
+            # only used for fast implementation
+            self.max_r1 = max(1, self.rs[0] * x.shape[2] // train_size[-2])
+            self.max_r2 = max(1, self.rs[0] * x.shape[3] // train_size[-1])
+        if self.kernel_size[0] >= x.size(-2) and self.kernel_size[1] >= x.size(-1):
+            return F.adaptive_avg_pool2d(x, 1)
+        if self.fast_imp:  # Non-equivalent implementation but faster
+            h, w = x.shape[2:]
+            if self.kernel_size[0] >= h and self.kernel_size[1] >= w:
+                out = F.adaptive_avg_pool2d(x, 1)
+            else:
+                r1 = [r for r in self.rs if h % r == 0][0]
+                r2 = [r for r in self.rs if w % r == 0][0]
+                # reduction_constraint
+                r1 = min(self.max_r1, r1)
+                r2 = min(self.max_r2, r2)
+                s = x[:, :, ::r1, ::r2].cumsum(dim=-1).cumsum(dim=-2)
+                n, c, h, w = s.shape
+                k1, k2 = min(h - 1, self.kernel_size[0] // r1), min(w - 1, self.kernel_size[1] // r2)
+                out = (s[:, :, :-k1, :-k2] - s[:, :, :-k1, k2:] - s[:, :, k1:, :-k2] + s[:, :, k1:, k2:]) / (k1 * k2)
+                out = torch.nn.functional.interpolate(out, scale_factor=(r1, r2))
+        else:
+            n, c, h, w = x.shape
+            s = x.cumsum(dim=-1).cumsum_(dim=-2)
+            s = torch.nn.functional.pad(s, (1, 0, 1, 0))  # pad 0 for convenience
+            k1, k2 = min(h, self.kernel_size[0]), min(w, self.kernel_size[1])
+            s1, s2, s3, s4 = s[:, :, :-k1, :-k2], s[:, :, :-k1, k2:], s[:, :, k1:, :-k2], s[:, :, k1:, k2:]
+            out = s4 + s1 - s2 - s3
+            out = out / (k1 * k2)
+        if self.auto_pad:
+            n, c, h, w = x.shape
+            _h, _w = out.shape[2:]
+            # print(x.shape, self.kernel_size)
+            pad2d = ((w - _w) // 2, (w - _w + 1) // 2, (h - _h) // 2, (h - _h + 1) // 2)
+            out = torch.nn.functional.pad(out, pad2d, mode='replicate')
+        return out
+def replace_layers(model, base_size, train_size, fast_imp, **kwargs):
+    for n, m in model.named_children():
+        if len(list(m.children())) > 0:
+            ## compound module, go inside it
+            replace_layers(m, base_size, train_size, fast_imp, **kwargs)
+        if isinstance(m, nn.AdaptiveAvgPool2d):
+            pool = AvgPool2d(base_size=base_size, fast_imp=fast_imp, train_size=train_size)
+            assert m.output_size == 1
+            setattr(model, n, pool)