Staty commited on Feb 11, 2025

Commit

2b21abc

verified ·

1 Parent(s): 1a8f2a9

Upload 50 files

Browse files

Files changed (50) hide show

.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/upload.iml +12 -0
.idea/workspace.xml +42 -0
model_LARRES.py +229 -0
model_convlstm.py +186 -0
modules.py +66 -0
test2015.h5 +3 -0
test2020.h5 +3 -0
train2015.h5 +3 -0
train2020.h5 +3 -0
train_simvp2.py +85 -0
utilpack/__init__.py +32 -0
utilpack/__pycache__/__init__.cpython-312.pyc +0 -0
utilpack/__pycache__/convlstm_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/e3dlstm_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/mau_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/mim_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/mmvp_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/phydnet_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/predrnn_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/predrnnpp_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/predrnnv2_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/simvp_modules.cpython-312.pyc +0 -0
utilpack/__pycache__/swinlstm_modules.cpython-312.pyc +0 -0
utilpack/convlstm_modules.py +58 -0
utilpack/e3dlstm_modules.py +119 -0
utilpack/layers/__init__.py +10 -0
utilpack/layers/__pycache__/__init__.cpython-312.pyc +0 -0
utilpack/layers/__pycache__/hornet.cpython-312.pyc +0 -0
utilpack/layers/__pycache__/moganet.cpython-312.pyc +0 -0
utilpack/layers/__pycache__/poolformer.cpython-312.pyc +0 -0
utilpack/layers/__pycache__/uniformer.cpython-312.pyc +0 -0
utilpack/layers/__pycache__/van.cpython-312.pyc +0 -0
utilpack/layers/hornet.py +112 -0
utilpack/layers/moganet.py +140 -0
utilpack/layers/poolformer.py +97 -0
utilpack/layers/uniformer.py +156 -0
utilpack/layers/van.py +119 -0
utilpack/mau_modules.py +66 -0
utilpack/mim_modules.py +211 -0
utilpack/mmvp_modules.py +349 -0
utilpack/phydnet_modules.py +463 -0
utilpack/predrnn_modules.py +79 -0
utilpack/predrnnpp_modules.py +169 -0
utilpack/predrnnv2_modules.py +82 -0
utilpack/simvp_modules.py +586 -0
utilpack/swinlstm_modules.py +317 -0
utilpack/wast_modules.py +577 -0

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/upload.iml" filepath="$PROJECT_DIR$/.idea/upload.iml" />
+    </modules>
+  </component>
+</project>

.idea/upload.iml ADDED Viewed

	@@ -0,0 +1,12 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,42 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="9591cce3-c276-4022-8bb6-62a293d16241" name="更改" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="ProjectColorInfo"><![CDATA[{
+  "associatedIndex": 3
+}]]></component>
+  <component name="ProjectId" id="2ssyDJvvBdJ2oeAvJVoyJMXumP7" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "last_opened_file_path": "C:/Users/Administrator/Desktop/upload"
+  }
+}]]></component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-python-sdk-98f27166c754-ba05f1cad1b1-com.jetbrains.pycharm.community.sharedIndexes.bundled-PC-242.21829.153" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="默认任务">
+      <changelist id="9591cce3-c276-4022-8bb6-62a293d16241" name="更改" comment="" />
+      <created>1739258456359</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1739258456359</updated>
+    </task>
+    <servers />
+  </component>
+</project>

model_LARRES.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import torch
+from torch import nn
+from modules import ConvSC, Inception
+from utilpack import (ConvNeXtSubBlock, ConvMixerSubBlock, GASubBlock, gInception_ST,
+                             HorNetSubBlock, MLPMixerSubBlock, MogaSubBlock, PoolFormerSubBlock,
+                             SwinSubBlock, UniformerSubBlock, VANSubBlock, ViTSubBlock, TAUSubBlock)
+def stride_generator(N, reverse=False):
+    strides = [1, 2]*10
+    if reverse: return list(reversed(strides[:N]))
+    else: return strides[:N]
+class Encoder(nn.Module):
+    def __init__(self,C_in, C_hid, N_S):
+        super(Encoder,self).__init__()
+        strides = stride_generator(N_S)
+        self.enc = nn.Sequential(
+            ConvSC(C_in, C_hid, stride=strides[0]),
+            *[ConvSC(C_hid, C_hid, stride=s) for s in strides[1:]]
+        )
+    def forward(self,x):# B*4, 3, 128, 128
+        enc1 = self.enc[0](x)
+        latent = enc1
+        for i in range(1,len(self.enc)):
+            latent = self.enc[i](latent)
+        return latent,enc1
+class Decoder(nn.Module):
+    def __init__(self,C_hid, C_out, N_S):
+        super(Decoder,self).__init__()
+        strides = stride_generator(N_S, reverse=True)
+        self.dec = nn.Sequential(
+            *[ConvSC(C_hid, C_hid, stride=s, transpose=True) for s in strides[:-1]],
+            ConvSC(2*C_hid, C_hid, stride=strides[-1], transpose=True)
+        )
+        self.readout = nn.Conv2d(C_hid, C_out, 1)
+    def forward(self, hid, enc1=None):
+        for i in range(0,len(self.dec)-1):
+            hid = self.dec[i](hid)
+        Y = self.dec[-1](torch.cat([hid, enc1], dim=1))
+        Y = self.readout(Y)
+        return Y
+class Mid_Xnet(nn.Module):
+    def __init__(self, channel_in, channel_hid, N_T, incep_ker = [3,5,7,11], groups=8):
+        super(Mid_Xnet, self).__init__()
+        self.N_T = N_T
+        enc_layers = [Inception(channel_in, channel_hid//2, channel_hid, incep_ker= incep_ker, groups=groups)]
+        for i in range(1, N_T-1):
+            enc_layers.append(Inception(channel_hid, channel_hid//2, channel_hid, incep_ker= incep_ker, groups=groups))
+        enc_layers.append(Inception(channel_hid, channel_hid//2, channel_hid, incep_ker= incep_ker, groups=groups))
+        dec_layers = [Inception(channel_hid, channel_hid//2, channel_hid, incep_ker= incep_ker, groups=groups)]
+        for i in range(1, N_T-1):
+            dec_layers.append(Inception(2*channel_hid, channel_hid//2, channel_hid, incep_ker= incep_ker, groups=groups))
+        dec_layers.append(Inception(2*channel_hid, channel_hid//2, channel_in, incep_ker= incep_ker, groups=groups))
+        self.enc = nn.Sequential(*enc_layers)
+        self.dec = nn.Sequential(*dec_layers)
+    def forward(self, x):
+        B, T, C, H, W = x.shape
+        x = x.reshape(B, T*C, H, W)
+        # encoder
+        skips = []
+        z = x
+        for i in range(self.N_T):
+            z = self.enc[i](z)
+            if i < self.N_T - 1:
+                skips.append(z)
+        # decoder
+        z = self.dec[0](z)
+        for i in range(1, self.N_T):
+            z = self.dec[i](torch.cat([z, skips[-i]], dim=1))
+        y = z.reshape(B, T, C, H, W)
+        return y
+class MetaBlock(nn.Module):
+    """The hidden Translator of MetaFormer for SimVP"""
+    def __init__(self, in_channels, out_channels, input_resolution=None, model_type=None,
+                 mlp_ratio=8., drop=0.0, drop_path=0.0, layer_i=0):
+        super(MetaBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        model_type = model_type.lower() if model_type is not None else 'gsta'
+        if model_type == 'gsta':
+            self.block = GASubBlock(
+                in_channels, kernel_size=21, mlp_ratio=mlp_ratio,
+                drop=drop, drop_path=drop_path, act_layer=nn.GELU)
+        elif model_type == 'convmixer':
+            self.block = ConvMixerSubBlock(in_channels, kernel_size=11, activation=nn.GELU)
+        elif model_type == 'convnext':
+            self.block = ConvNeXtSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path)
+        elif model_type == 'hornet':
+            self.block = HorNetSubBlock(in_channels, mlp_ratio=mlp_ratio, drop_path=drop_path)
+        elif model_type in ['mlp', 'mlpmixer']:
+            self.block = MLPMixerSubBlock(
+                in_channels, input_resolution, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path)
+        elif model_type in ['moga', 'moganet']:
+            self.block = MogaSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop_rate=drop, drop_path_rate=drop_path)
+        elif model_type == 'poolformer':
+            self.block = PoolFormerSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path)
+        elif model_type == 'swin':
+            self.block = SwinSubBlock(
+                in_channels, input_resolution, layer_i=layer_i, mlp_ratio=mlp_ratio,
+                drop=drop, drop_path=drop_path)
+        elif model_type == 'uniformer':
+            block_type = 'MHSA' if in_channels == out_channels and layer_i > 0 else 'Conv'
+            self.block = UniformerSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop=drop,
+                drop_path=drop_path, block_type=block_type)
+        elif model_type == 'van':
+            self.block = VANSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path, act_layer=nn.GELU)
+        elif model_type == 'vit':
+            self.block = ViTSubBlock(
+                in_channels, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path)
+        else:
+            assert False and "Invalid model_type in SimVP"
+        if in_channels != out_channels:
+            self.reduction = nn.Conv2d(
+                in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        z = self.block(x)
+        return z if self.in_channels == self.out_channels else self.reduction(z)
+class MidMetaNet(nn.Module):
+    """The hidden Translator of MetaFormer for SimVP"""
+    def __init__(self, channel_in, channel_hid, N2,
+                 input_resolution=None, model_type=None,
+                 mlp_ratio=4., drop=0.0, drop_path=0.1):
+        super(MidMetaNet, self).__init__()
+        assert N2 >= 2 and mlp_ratio > 1
+        self.N2 = N2
+        dpr = [  # stochastic depth decay rule
+            x.item() for x in torch.linspace(1e-2, drop_path, self.N2)]
+        # downsample
+        enc_layers = [MetaBlock(
+            channel_in, channel_hid, input_resolution, model_type,
+            mlp_ratio, drop, drop_path=dpr[0], layer_i=0)]
+        # middle layers
+        for i in range(1, N2-1):
+            enc_layers.append(MetaBlock(
+                channel_hid, channel_hid, input_resolution, model_type,
+                mlp_ratio, drop, drop_path=dpr[i], layer_i=i))
+        # upsample
+        enc_layers.append(MetaBlock(
+            channel_hid, channel_in, input_resolution, model_type,
+            mlp_ratio, drop, drop_path=drop_path, layer_i=N2-1))
+        self.enc = nn.Sequential(*enc_layers)
+    def forward(self, x):
+        B, T, C, H, W = x.shape
+        x = x.reshape(B, T*C, H, W)
+        z = x
+        for i in range(self.N2):
+            z = self.enc[i](z)
+        y = z.reshape(B, T, C, H, W)
+        return y
+class SimVP(nn.Module):
+    def __init__(self, hid_S=32, hid_T=256, N_S=2, N_T=8, incep_ker=[3,5,7,11], groups=4):
+        super(SimVP, self).__init__()
+        T, C, H, W = 36,1,72,72
+        self.enc = Encoder(C, hid_S, N_S)
+        self.hid = MidMetaNet(T * hid_S, hid_T, N_T,
+                              input_resolution=(H, W), model_type="vit",
+                              mlp_ratio=8, drop=0.0, drop_path=0.1)
+        self.dec = Decoder(hid_S, C, N_S)
+    def forward(self, x_raw):
+        B, T, C, H, W = x_raw.shape
+        x = x_raw.view(B*T, C, H, W)
+        embed, skip = self.enc(x)
+        _, C_, H_, W_ = embed.shape
+        z = embed.view(B, T, C_, H_, W_)
+        hid = self.hid(z)
+        hid = hid.reshape(B*T, C_, H_, W_)
+        Y = self.dec(hid, skip)
+        Y = Y.reshape(B, T, C, H, W)
+        return Y
+class larres(nn.Module):
+    def __init__(self, hid_S=32, hid_T=256, N_S=2, N_T=8, incep_ker=[3,5,7,11], groups=4):
+        super(larres, self).__init__()
+        T, C, H, W = 36,1,72,72
+        self.enc = Encoder(C, hid_S, N_S)
+        self.hid = Mid_Xnet(T * hid_S, hid_T, N_T, incep_ker, groups)
+        self.dec = Decoder(hid_S, C, N_S)
+    def forward(self, x_raw):
+        B, T, C, H, W = x_raw.shape
+        x = x_raw.view(B*T, C, H, W)
+        embed, skip = self.enc(x)
+        _, C_, H_, W_ = embed.shape
+        z = embed.view(B, T, C_, H_, W_)
+        hid = self.hid(z)
+        hid = hid.reshape(B*T, C_, H_, W_)
+        Y = self.dec(hid, skip)
+        Y = Y.reshape(B, T, C, H, W)
+        return Y

model_convlstm.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+import numpy as np
+import h5py
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import Subset
+from sklearn.model_selection import train_test_split
+#Obtained from: https://holmdk.github.io/2020/04/02/video_prediction.html
+class ConvLSTMCell(nn.Module):
+    def __init__(self, input_dim, hidden_dim, kernel_size, bias):
+        """
+        Initialize ConvLSTM cell.
+        Parameters
+        ----------
+        input_dim: int
+            Number of channels of input tensor.
+        hidden_dim: int
+            Number of channels of hidden state.
+        kernel_size: (int, int)
+            Size of the convolutional kernel.
+        bias: bool
+            Whether or not to add the bias.
+        """
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.kernel_size = kernel_size
+        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
+        self.bias = bias
+        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
+                              out_channels=4 * self.hidden_dim,
+                              kernel_size=self.kernel_size,
+                              padding=self.padding,
+                              bias=self.bias)
+    def forward(self, input_tensor, cur_state):
+        h_cur, c_cur = cur_state
+        combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis
+        combined_conv = self.conv(combined)
+        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
+        i = torch.sigmoid(cc_i)
+        f = torch.sigmoid(cc_f)
+        o = torch.sigmoid(cc_o)
+        g = torch.tanh(cc_g)
+        c_next = f * c_cur + i * g
+        h_next = o * torch.tanh(c_next)
+        return h_next, c_next
+    def init_hidden(self, batch_size, image_size):
+        height, width = image_size
+        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
+                torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))
+def process_highdim_array(arr):
+    """
+    处理形状为 (1, 60, 1, 71, 73) 的高维数组，将最后两个维度从 (71, 73) 变为 (72, 72)。
+    参数:
+    arr (ndarray): 输入的高维 numpy 数组，假设形状为 (1, 60, 1, 71, 73)。
+    返回:
+    ndarray: 处理后的数组，形状为 (1, 60, 1, 72, 72)。
+    """
+    # 检查数组的最后两个维度是否为 (71, 73)
+    if arr.shape[-2:] != (71, 73):
+        raise ValueError("输入数组的最后两个维度必须是 (71, 73)")
+    # 对最后两个维度的 (71, 73) 进行处理
+    # 去掉最后一个维度，变成 (71, 72)
+    arr_trimmed = arr[..., :-1]
+    # 在倒数第二个维度填充一行 0，变成 (72, 72)
+    arr_padded = np.pad(arr_trimmed, ((0, 0), (0, 0), (0, 1), (0, 0)), mode='constant', constant_values=0)
+    return arr_padded
+class ionexDataset(Dataset):
+    def __init__(self, npy_data, nstepsin=36, nstepsout=12, stride=12):
+        self.data = npy_data.astype(np.float32)
+        self.nstepsin=nstepsin
+        self.nstepsout=nstepsout
+        self.stride=stride
+        self.idx=np.arange(0,len(self.data)-nstepsout-nstepsin+1,stride)
+    def __getitem__(self, index):
+        # find the end of this pattern
+        i=self.idx[index]
+        end_ix = i + self.nstepsin
+        # check if we are beyond the sequence
+        if end_ix + self.nstepsout> len(self.data):
+            return None,None
+        # gather input and output parts of the pattern
+        seq_x, seq_y = self.data[i:end_ix], self.data[end_ix:end_ix+self.nstepsout]
+        return process_highdim_array(seq_x),process_highdim_array(seq_y)
+    def __len__(self):
+        return len(self.idx)
+    def split_train_val(self, val_split=0.25):
+        train_idx, val_idx = train_test_split(list(range(len(self))), test_size=val_split)
+        return Subset(self, train_idx), Subset(self, val_idx)
+nstepsin=36
+nstepsout=12
+stride=12
+max_epochs=200
+# batch_size=2
+f = h5py.File('train2015.h5', 'r')
+train_npy=np.array(f['2020'])/10
+f = h5py.File('test2015.h5', 'r')
+test_npy=np.array(f['2015'])/10
+# f = h5py.File('train2015.h5', 'r')
+# train_npy=np.array(f['2020'])/10
+# f1=h5py.File('c1pg2015.h5', 'r')
+# f = h5py.File('test2015.h5', 'r')
+# test_npy=np.array(f['2015'])/10-np.array(f1['2015'])/10
+# f = h5py.File('train2020.h5', 'r')
+# train_npy=np.array(f['2020'])/10
+# f = h5py.File('test2020.h5', 'r')
+# test_npy=np.array(f['2020'])/10
+# f = h5py.File('train2020.h5', 'r')
+# train_npy=np.array(f['2020'])/10
+# f1=h5py.File('c1pg2020.h5', 'r')
+# f = h5py.File('test2020.h5', 'r')
+# test_npy=np.array(f['2020'])/10-np.array(f1['2020'])/10
+f.close()
+print("Training data:", train_npy.shape)
+print("Testing data:", test_npy.shape)
+class EncoderDecoderConvLSTM(nn.Module):
+    def __init__(self, nf, in_chan, out_chan, nstepsout=12):
+        super().__init__()
+        self.nstepsout=nstepsout
+        self.encoder_1_convlstm = ConvLSTMCell(input_dim=in_chan, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.encoder_2_convlstm = ConvLSTMCell(input_dim=nf, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.encoder_3_convlstm = ConvLSTMCell(input_dim=nf, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.decoder_1_convlstm = ConvLSTMCell(input_dim=nf, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.decoder_2_convlstm = ConvLSTMCell(input_dim=nf, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.decoder_3_convlstm = ConvLSTMCell(input_dim=nf, hidden_dim=nf, kernel_size=(3, 3), bias=True)
+        self.conv2d = nn.Conv2d(in_channels=nf, out_channels=1, kernel_size=(1,1))
+    def forward(self, x, future_seq=0, hidden_state=None):
+        b, seq_len, _, h, w = x.size()
+        # encoder
+        # initialize hidden states
+        h1, c1 = self.encoder_1_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        h2, c2 = self.encoder_2_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        h3, c3 = self.decoder_3_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        b, seq_len, _, h, w = x.size()
+        for t in range(seq_len):
+            h1, c1 = self.encoder_1_convlstm(input_tensor=x[:, t, :, :], cur_state=[h1, c1])
+            h2, c2 = self.encoder_2_convlstm(input_tensor=h1, cur_state=[h2, c2])
+            h3, c3 = self.encoder_3_convlstm(input_tensor=h2, cur_state=[h3, c3])
+        # decoder
+        # initialize hidden states
+        h4, c4 = h1, c1 #self.decoder_1_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        h5, c5 = h2, c2 #self.decoder_2_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        h6, c6 = h3, c3 #self.decoder_3_convlstm.init_hidden(batch_size=b, image_size=(h, w))
+        outputs=[]
+        for t in range(self.nstepsout):
+            h4, c4 = self.decoder_1_convlstm(input_tensor=h3, cur_state=[h4, c4]) #note that h3 is not updated during prediction
+            h5, c5 = self.decoder_2_convlstm(input_tensor=h4, cur_state=[h5, c5])
+            h6, c6 = self.decoder_3_convlstm(input_tensor=h5, cur_state=[h6, c6])
+            outputs.append(self.conv2d(h4))
+        outputs = torch.stack(outputs, 1)
+        return outputs

modules.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from torch import nn
+class BasicConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, transpose=False, act_norm=False):
+        super(BasicConv2d, self).__init__()
+        self.act_norm=act_norm
+        if not transpose:
+            self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
+        else:
+            self.conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,output_padding=stride //2 )
+        self.norm = nn.GroupNorm(2, out_channels)
+        self.act = nn.LeakyReLU(0.2, inplace=True)
+    def forward(self, x):
+        y = self.conv(x)
+        if self.act_norm:
+            y = self.act(self.norm(y))
+        return y
+class ConvSC(nn.Module):
+    def __init__(self, C_in, C_out, stride, transpose=False, act_norm=True):
+        super(ConvSC, self).__init__()
+        if stride == 1:
+            transpose = False
+        self.conv = BasicConv2d(C_in, C_out, kernel_size=3, stride=stride,
+                                padding=1, transpose=transpose, act_norm=act_norm)
+    def forward(self, x):
+        y = self.conv(x)
+        return y
+class GroupConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups, act_norm=False):
+        super(GroupConv2d, self).__init__()
+        self.act_norm = act_norm
+        if in_channels % groups != 0:
+            groups = 1
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,groups=groups)
+        self.norm = nn.GroupNorm(groups,out_channels)
+        self.activate = nn.LeakyReLU(0.2, inplace=True)
+    def forward(self, x):
+        y = self.conv(x)
+        if self.act_norm:
+            y = self.activate(self.norm(y))
+        return y
+class Inception(nn.Module):
+    def __init__(self, C_in, C_hid, C_out, incep_ker=[3,5,7,11], groups=8):
+        super(Inception, self).__init__()
+        self.conv1 = nn.Conv2d(C_in, C_hid, kernel_size=1, stride=1, padding=0)
+        layers = []
+        for ker in incep_ker:
+            layers.append(GroupConv2d(C_hid, C_out, kernel_size=ker, stride=1, padding=ker//2, groups=groups, act_norm=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        y = 0
+        for layer in self.layers:
+            y += layer(x)
+        return y

test2015.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8f012ef4a1fc40d1c993cea1eff972ea56cbda86fd3a433431ea71d82259e09
+size 181614368

test2020.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a1c3c6d19a81a998ab4381bf189ba0ac7b8c6378008ad7b3d1465ffa20edd1
+size 182111936

train2015.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f17b0b8430647d2cc21a1bc2af719e3a2370bed8d93ac70626fcc08fcc2e546c
+size 545336576

train2020.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f39605870ce1fc5930277d5d225a29b3aaaff8fc53c4a00c9c6149740d91ebf
+size 544839008

train_simvp2.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+import numpy as np
+import h5py
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import Subset
+from sklearn.model_selection import train_test_split
+import torch.optim as optim
+from model_convlstm import ionexDataset, train_npy, nstepsin, nstepsout, stride, EncoderDecoderConvLSTM, max_epochs
+from model_LARRES import larres
+# ionexData = ionexDataset(train_npy, nstepsin=nstepsin, nstepsout=nstepsout, stride=stride)
+# train_data, val_data = ionexData.split_train_val(val_split=0.2)
+#
+# train_loader = DataLoader(train_data, batch_size=16, num_workers=0)
+# val_loader = DataLoader(val_data, batch_size=16, num_workers=0)
+ionexData = ionexDataset(train_npy, nstepsin=nstepsin, nstepsout=nstepsout, stride=stride)
+train_data, val_data = ionexData.split_train_val(val_split=0.2)
+train_loader = DataLoader(train_data, batch_size=16, num_workers=0)
+val_loader = DataLoader(val_data, batch_size=16, num_workers=0)
+for X, y in train_loader:
+    print(f"Shape of X: {X.shape} {X.dtype} [N, C, H, W]")
+    print(f"Shape of Y: {y.shape} {y.dtype}")
+    break
+print(f"Training samples: {len(train_loader.dataset)}")
+# print(f"Validation samples: {len(val_loader.dataset)}")
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model=larres().to(device)
+# model.load_state_dict(torch.load("best_model.pth"))
+optimizer = optim.Adam(model.parameters(), lr=0.0001)
+criterion = nn.L1Loss()
+# 训练和验证
+best_val_loss = float('inf')
+# num_epochs = 50
+for epoch in range(max_epochs):
+    # 训练阶段
+    model.train()
+    all_loss = 0
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)  # 将数据和目标迁移到 CUDA
+        optimizer.zero_grad()
+        output = model(data)
+        target_last = target - data[:, 24:36, :, :, :]
+        # loss = criterion(output, target_last)  # 使用 L1 损失
+        loss = criterion(output[:,:12,:,:71,:], target_last[:,:12,:,:71,:])  # 使用 L1 损失
+        print(loss)
+        all_loss+=loss
+        loss.backward()
+        optimizer.step()
+    print(f'Epoch {epoch + 1}/{max_epochs}, Train Loss: {all_loss.item():.4f}')
+    # 验证阶段
+    model.eval()
+    val_loss = 0.0
+    with torch.no_grad():
+        for data, target in val_loader:
+            data, target = data.to(device), target.to(device)  # 将数据和目标迁移到 CUDA
+            output = model(data)
+            target_last = target - data[:, 24:36, :, :, :]
+            # loss = criterion(output, target_last)  # 使用 L1 损失
+            loss = criterion(output[:, :12, :, :71, :], target_last[:, :12, :, :71, :])  # 使用 L1 损失
+            val_loss += loss.item()
+    val_loss /= len(val_loader)
+    print(f'Epoch {epoch + 1}/{max_epochs}, Val Loss: {val_loss:.4f}')
+    # 保存最佳模型
+    if val_loss < best_val_loss:
+        best_val_loss = val_loss
+        torch.save(model.state_dict(), 'best_model.pth')
+        print('Best model saved!')
+print('Training completed.')

utilpack/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Copyright (c) CAIRI AI Lab. All rights reserved
+from .convlstm_modules import ConvLSTMCell
+from .e3dlstm_modules import Eidetic3DLSTMCell, tf_Conv3d
+from .mim_modules import MIMBlock, MIMN
+from .mau_modules import MAUCell
+from .phydnet_modules import PhyCell, PhyD_ConvLSTM, PhyD_EncoderRNN, K2M
+from .predrnn_modules import SpatioTemporalLSTMCell
+from .predrnnpp_modules import CausalLSTMCell, GHU
+from .predrnnv2_modules import SpatioTemporalLSTMCellv2
+from .simvp_modules import (BasicConv2d, ConvSC, GroupConv2d,
+                            ConvNeXtSubBlock, ConvMixerSubBlock, GASubBlock, gInception_ST,
+                            HorNetSubBlock, MLPMixerSubBlock, MogaSubBlock, PoolFormerSubBlock,
+                            SwinSubBlock, UniformerSubBlock, VANSubBlock, ViTSubBlock, TAUSubBlock)
+from .mmvp_modules import (ResBlock, RRDB, ResidualDenseBlock_4C, Up, Conv3D, ConvLayer,
+                           MatrixPredictor3DConv, SimpleMatrixPredictor3DConv_direct, PredictModel)
+from .swinlstm_modules import UpSample, DownSample, STconvert
+__all__ = [
+    'ConvLSTMCell', 'CausalLSTMCell', 'GHU', 'SpatioTemporalLSTMCell', 'SpatioTemporalLSTMCellv2',
+    'MIMBlock', 'MIMN', 'Eidetic3DLSTMCell', 'tf_Conv3d',
+    'PhyCell', 'PhyD_ConvLSTM', 'PhyD_EncoderRNN', 'K2M', 'MAUCell',
+    'BasicConv2d', 'ConvSC', 'GroupConv2d',
+    'ConvNeXtSubBlock', 'ConvMixerSubBlock', 'GASubBlock', 'gInception_ST',
+    'HorNetSubBlock', 'MLPMixerSubBlock', 'MogaSubBlock', 'PoolFormerSubBlock',
+    'SwinSubBlock', 'UniformerSubBlock', 'VANSubBlock', 'ViTSubBlock', 'TAUSubBlock',
+    'ResBlock', 'RRDB', 'ResidualDenseBlock_4C', 'Up', 'Conv3D', 'ConvLayer',
+    'MatrixPredictor3DConv', 'SimpleMatrixPredictor3DConv_direct', 'PredictModel',
+    'UpSample', 'DownSample', 'STconvert'
+]

utilpack/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.73 kB). View file

utilpack/__pycache__/convlstm_modules.cpython-312.pyc ADDED Viewed

Binary file (3.37 kB). View file

utilpack/__pycache__/e3dlstm_modules.cpython-312.pyc ADDED Viewed

Binary file (6.96 kB). View file

utilpack/__pycache__/mau_modules.cpython-312.pyc ADDED Viewed

Binary file (4.09 kB). View file

utilpack/__pycache__/mim_modules.cpython-312.pyc ADDED Viewed

Binary file (10.8 kB). View file

utilpack/__pycache__/mmvp_modules.cpython-312.pyc ADDED Viewed

Binary file (25.7 kB). View file

utilpack/__pycache__/phydnet_modules.cpython-312.pyc ADDED Viewed

Binary file (27.2 kB). View file

utilpack/__pycache__/predrnn_modules.cpython-312.pyc ADDED Viewed

Binary file (4.65 kB). View file

utilpack/__pycache__/predrnnpp_modules.cpython-312.pyc ADDED Viewed

Binary file (9.27 kB). View file

utilpack/__pycache__/predrnnv2_modules.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

utilpack/__pycache__/simvp_modules.cpython-312.pyc ADDED Viewed

Binary file (37.5 kB). View file

utilpack/__pycache__/swinlstm_modules.cpython-312.pyc ADDED Viewed

Binary file (16.6 kB). View file

utilpack/convlstm_modules.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+import torch.nn as nn
+class ConvLSTMCell(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(ConvLSTMCell, self).__init__()
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        if layer_norm:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+        else:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def forward(self, x_t, h_t, c_t):
+        x_concat = self.conv_x(x_t)
+        h_concat = self.conv_h(h_t)
+        i_x, f_x, g_x, o_x = torch.split(x_concat, self.num_hidden, dim=1)
+        i_h, f_h, g_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        i_t = torch.sigmoid(i_x + i_h)
+        f_t = torch.sigmoid(f_x + f_h)
+        g_t = torch.tanh(g_x + g_h)
+        c_new = f_t * c_t + i_t * g_t
+        o_t = torch.sigmoid(o_x + o_h)
+        h_new = o_t * torch.tanh(c_new)
+        return h_new, c_new

utilpack/e3dlstm_modules.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class tf_Conv3d(nn.Module):
+    def __init__(self, in_channels, out_channels, *vargs, **kwargs):
+        super(tf_Conv3d, self).__init__()
+        self.conv3d = nn.Conv3d(in_channels, out_channels, *vargs, **kwargs)
+    def forward(self, input):
+        return F.interpolate(self.conv3d(input), size=input.shape[-3:], mode="nearest")
+class Eidetic3DLSTMCell(nn.Module):
+    def __init__(self, in_channel, num_hidden, window_length,
+                 height, width, filter_size, stride, layer_norm):
+        super(Eidetic3DLSTMCell, self).__init__()
+        self._norm_c_t = nn.LayerNorm([num_hidden, window_length, height, width])
+        self.num_hidden = num_hidden
+        self.padding = (0, filter_size[1] // 2, filter_size[2] // 2)
+        self._forget_bias = 1.0
+        if layer_norm:
+            self.conv_x = nn.Sequential(
+                tf_Conv3d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 7, window_length, height, width])
+            )
+            self.conv_h = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, window_length, height, width])
+            )
+            self.conv_gm = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, window_length, height, width])
+            )
+            self.conv_new_cell = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, window_length, height, width])
+            )
+            self.conv_new_gm = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, window_length, height, width])
+            )
+        else:
+            self.conv_x = nn.Sequential(
+                tf_Conv3d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_gm = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_new_cell = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_new_gm = nn.Sequential(
+                tf_Conv3d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = tf_Conv3d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def _attn(self, in_query, in_keys, in_values):
+        batch, num_channels, _, width, height = in_query.shape
+        query = in_query.reshape(batch, -1, num_channels)
+        keys = in_keys.reshape(batch, -1, num_channels)
+        values = in_values.reshape(batch, -1, num_channels)
+        attn = torch.einsum('bxc,byc->bxy', query, keys)
+        attn = torch.softmax(attn, dim=2)
+        attn = torch.einsum("bxy,byc->bxc", attn, values)
+        return attn.reshape(batch, num_channels, -1, width, height)
+    def forward(self, x_t, h_t, c_t, global_memory, eidetic_cell):
+        h_concat = self.conv_h(h_t)
+        i_h, g_h, r_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        x_concat = self.conv_x(x_t)
+        i_x, g_x, r_x, o_x, temp_i_x, temp_g_x, temp_f_x = \
+            torch.split(x_concat, self.num_hidden, dim=1)
+        i_t = torch.sigmoid(i_x + i_h)
+        r_t = torch.sigmoid(r_x + r_h)
+        g_t = torch.tanh(g_x + g_h)
+        new_cell = c_t + self._attn(r_t, eidetic_cell, eidetic_cell)
+        new_cell = self._norm_c_t(new_cell) + i_t * g_t
+        new_global_memory = self.conv_gm(global_memory)
+        i_m, f_m, g_m, m_m = torch.split(new_global_memory, self.num_hidden, dim=1)
+        temp_i_t = torch.sigmoid(temp_i_x + i_m)
+        temp_f_t = torch.sigmoid(temp_f_x + f_m + self._forget_bias)
+        temp_g_t = torch.tanh(temp_g_x + g_m)
+        new_global_memory = temp_f_t * torch.tanh(m_m) + temp_i_t * temp_g_t
+        o_c = self.conv_new_cell(new_cell)
+        o_m = self.conv_new_gm(new_global_memory)
+        output_gate = torch.tanh(o_x + o_h + o_c + o_m)
+        memory = torch.cat((new_cell, new_global_memory), 1)
+        memory = self.conv_last(memory)
+        output = torch.tanh(memory) * torch.sigmoid(output_gate)
+        return output, new_cell, global_memory

utilpack/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .hornet import HorBlock
+from .moganet import ChannelAggregationFFN, MultiOrderGatedAggregation, MultiOrderDWConv
+from .poolformer import PoolFormerBlock
+from .uniformer import CBlock, SABlock
+from .van import DWConv, MixMlp, VANBlock
+__all__ = [
+    'HorBlock', 'ChannelAggregationFFN', 'MultiOrderGatedAggregation', 'MultiOrderDWConv',
+    'PoolFormerBlock', 'CBlock', 'SABlock', 'DWConv', 'MixMlp', 'VANBlock',
+]

utilpack/layers/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (582 Bytes). View file

utilpack/layers/__pycache__/hornet.cpython-312.pyc ADDED Viewed

Binary file (7.78 kB). View file

utilpack/layers/__pycache__/moganet.cpython-312.pyc ADDED Viewed

Binary file (8.2 kB). View file

utilpack/layers/__pycache__/poolformer.cpython-312.pyc ADDED Viewed

Binary file (6 kB). View file

utilpack/layers/__pycache__/uniformer.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

utilpack/layers/__pycache__/van.cpython-312.pyc ADDED Viewed

Binary file (8.25 kB). View file

utilpack/layers/hornet.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# refer to the code from HorNet, Thanks!
+# https://github.com/raoyongming/HorNet
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.layers import DropPath
+import torch.fft
+def get_dwconv(dim, kernel, bias):
+    return nn.Conv2d(dim, dim, kernel_size=kernel, padding=(kernel-1)//2 ,bias=bias, groups=dim)
+class gnconv(nn.Module):
+    def __init__(self, dim, order=5, gflayer=None, h=14, w=8, s=1.0):
+        super().__init__()
+        self.order = order
+        self.dims = [dim // 2 ** i for i in range(order)]
+        self.dims.reverse()
+        self.proj_in = nn.Conv2d(dim, 2*dim, 1)
+        if gflayer is None:
+            self.dwconv = get_dwconv(sum(self.dims), 7, True)
+        else:
+            self.dwconv = gflayer(sum(self.dims), h=h, w=w)
+        self.proj_out = nn.Conv2d(dim, dim, 1)
+        self.pws = nn.ModuleList(
+            [nn.Conv2d(self.dims[i], self.dims[i+1], 1) for i in range(order-1)]
+        )
+        self.scale = s
+        print('[gnconv]', order, 'order with dims=', self.dims, 'scale=%.4f'%self.scale)
+    def forward(self, x, mask=None, dummy=False):
+        fused_x = self.proj_in(x)
+        pwa, abc = torch.split(fused_x, (self.dims[0], sum(self.dims)), dim=1)
+        dw_abc = self.dwconv(abc) * self.scale
+        dw_list = torch.split(dw_abc, self.dims, dim=1)
+        x = pwa * dw_list[0]
+        for i in range(self.order -1):
+            x = self.pws[i](x) * dw_list[i+1]
+        x = self.proj_out(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+class HorBlock(nn.Module):
+    """ HorNet block """
+    def __init__(self, dim, order=4, mlp_ratio=4, drop_path=0., init_value=1e-6, gnconv=gnconv):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6, data_format='channels_first')
+        self.gnconv = gnconv(dim, order)  # depthwise conv
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, int(mlp_ratio * dim))  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(int(mlp_ratio * dim), dim)
+        self.gamma1 = nn.Parameter(init_value * torch.ones(dim), requires_grad=True)
+        self.gamma2 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        B, C, H, W  = x.shape
+        gamma1 = self.gamma1.view(C, 1, 1)
+        x = x + self.drop_path(gamma1 * self.gnconv(self.norm1(x)))
+        input = x
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm2(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma2 is not None:
+            x = self.gamma2 * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x

utilpack/layers/moganet.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# refer to the code from MogaNet, Thanks!
+# https://github.com/Westlake-AI/MogaNet/blob/main/models/moganet.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ChannelAggregationFFN(nn.Module):
+    """An implementation of FFN with Channel Aggregation in MogaNet."""
+    def __init__(self, embed_dims, mlp_hidden_dims, kernel_size=3, act_layer=nn.GELU, ffn_drop=0.):
+        super(ChannelAggregationFFN, self).__init__()
+        self.embed_dims = embed_dims
+        self.mlp_hidden_dims = mlp_hidden_dims
+        self.fc1 = nn.Conv2d(
+            in_channels=embed_dims, out_channels=self.mlp_hidden_dims, kernel_size=1)
+        self.dwconv = nn.Conv2d(
+            in_channels=self.mlp_hidden_dims, out_channels=self.mlp_hidden_dims, kernel_size=kernel_size,
+            padding=kernel_size // 2, bias=True, groups=self.mlp_hidden_dims)
+        self.act = act_layer()
+        self.fc2 = nn.Conv2d(
+            in_channels=mlp_hidden_dims, out_channels=embed_dims, kernel_size=1)
+        self.drop = nn.Dropout(ffn_drop)
+        self.decompose = nn.Conv2d(
+            in_channels=self.mlp_hidden_dims, out_channels=1, kernel_size=1)
+        self.sigma = nn.Parameter(
+            1e-5 * torch.ones((1, mlp_hidden_dims, 1, 1)), requires_grad=True)
+        self.decompose_act = act_layer()
+    def feat_decompose(self, x):
+        x = x + self.sigma * (x - self.decompose_act(self.decompose(x)))
+        return x
+    def forward(self, x):
+        # proj 1
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        # proj 2
+        x = self.feat_decompose(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class MultiOrderDWConv(nn.Module):
+    """Multi-order Features with Dilated DWConv Kernel in MogaNet."""
+    def __init__(self, embed_dims, dw_dilation=[1, 2, 3], channel_split=[1, 3, 4]):
+        super(MultiOrderDWConv, self).__init__()
+        self.split_ratio = [i / sum(channel_split) for i in channel_split]
+        self.embed_dims_1 = int(self.split_ratio[1] * embed_dims)
+        self.embed_dims_2 = int(self.split_ratio[2] * embed_dims)
+        self.embed_dims_0 = embed_dims - self.embed_dims_1 - self.embed_dims_2
+        self.embed_dims = embed_dims
+        assert len(dw_dilation) == len(channel_split) == 3
+        assert 1 <= min(dw_dilation) and max(dw_dilation) <= 3
+        assert embed_dims % sum(channel_split) == 0
+        # basic DW conv
+        self.DW_conv0 = nn.Conv2d(
+            in_channels=self.embed_dims, out_channels=self.embed_dims, kernel_size=5,
+            padding=(1 + 4 * dw_dilation[0]) // 2,
+            groups=self.embed_dims, stride=1, dilation=dw_dilation[0],
+        )
+        # DW conv 1
+        self.DW_conv1 = nn.Conv2d(
+            in_channels=self.embed_dims_1, out_channels=self.embed_dims_1, kernel_size=5,
+            padding=(1 + 4 * dw_dilation[1]) // 2,
+            groups=self.embed_dims_1, stride=1, dilation=dw_dilation[1],
+        )
+        # DW conv 2
+        self.DW_conv2 = nn.Conv2d(
+            in_channels=self.embed_dims_2, out_channels=self.embed_dims_2, kernel_size=7,
+            padding=(1 + 6 * dw_dilation[2]) // 2,
+            groups=self.embed_dims_2, stride=1, dilation=dw_dilation[2],
+        )
+        # a channel convolution
+        self.PW_conv = nn.Conv2d(
+            in_channels=embed_dims, out_channels=embed_dims, kernel_size=1)
+    def forward(self, x):
+        x_0 = self.DW_conv0(x)
+        x_1 = self.DW_conv1(
+            x_0[:, self.embed_dims_0: self.embed_dims_0+self.embed_dims_1, ...])
+        x_2 = self.DW_conv2(
+            x_0[:, self.embed_dims-self.embed_dims_2:, ...])
+        x = torch.cat([
+            x_0[:, :self.embed_dims_0, ...], x_1, x_2], dim=1)
+        x = self.PW_conv(x)
+        return x
+class MultiOrderGatedAggregation(nn.Module):
+    """Spatial Block with Multi-order Gated Aggregation in MogaNet."""
+    def __init__(self, embed_dims, attn_dw_dilation=[1, 2, 3], attn_channel_split=[1, 3, 4], attn_shortcut=True):
+        super(MultiOrderGatedAggregation, self).__init__()
+        self.embed_dims = embed_dims
+        self.attn_shortcut = attn_shortcut
+        self.proj_1 = nn.Conv2d(
+            in_channels=embed_dims, out_channels=embed_dims, kernel_size=1)
+        self.gate = nn.Conv2d(
+            in_channels=embed_dims, out_channels=embed_dims, kernel_size=1)
+        self.value = MultiOrderDWConv(
+            embed_dims=embed_dims, dw_dilation=attn_dw_dilation, channel_split=attn_channel_split)
+        self.proj_2 = nn.Conv2d(
+            in_channels=embed_dims, out_channels=embed_dims, kernel_size=1)
+        # activation for gating and value
+        self.act_value = nn.SiLU()
+        self.act_gate = nn.SiLU()
+        # decompose
+        self.sigma = nn.Parameter(1e-5 * torch.ones((1, embed_dims, 1, 1)), requires_grad=True)
+    def feat_decompose(self, x):
+        x = self.proj_1(x)
+        # x_d: [B, C, H, W] -> [B, C, 1, 1]
+        x_d = F.adaptive_avg_pool2d(x, output_size=1)
+        x = x + self.sigma * (x - x_d)
+        x = self.act_value(x)
+        return x
+    def forward(self, x):
+        if self.attn_shortcut:
+            shortcut = x.clone()
+        # proj 1x1
+        x = self.feat_decompose(x)
+        # gating and value branch
+        g = self.gate(x)
+        v = self.value(x)
+        # aggregation
+        x = self.proj_2(self.act_gate(g) * self.act_gate(v))
+        if self.attn_shortcut:
+            x = x + shortcut
+        return x

utilpack/layers/poolformer.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# refer to the code from PoolFormer, Thanks!
+# https://github.com/sail-sg/poolformer/blob/main/models/poolformer.py
+import torch
+import torch.nn as nn
+from timm.layers import DropPath, trunc_normal_
+class GroupNorm(nn.GroupNorm):
+    """
+    Group Normalization with 1 group.
+    Input: tensor in shape [B, C, H, W]
+    """
+    def __init__(self, num_channels, **kwargs):
+        super().__init__(1, num_channels, **kwargs)
+class Pooling(nn.Module):
+    """
+    Implementation of pooling for PoolFormer
+    --pool_size: pooling size
+    """
+    def __init__(self, pool_size=3):
+        super().__init__()
+        self.pool = nn.AvgPool2d(
+            pool_size, stride=1, padding=pool_size//2, count_include_pad=False)
+    def forward(self, x):
+        return self.pool(x) - x
+class Mlp(nn.Module):
+    """
+    Implementation of MLP with 1*1 convolutions.
+    Input: tensor with shape [B, C, H, W]
+    """
+    def __init__(self, in_features, hidden_features=None,
+                 out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
+        self.act = act_layer()
+        self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
+        self.drop = nn.Dropout(drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Conv2d):
+            trunc_normal_(m.weight, std=.02)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class PoolFormerBlock(nn.Module):
+    """
+    Implementation of one PoolFormer block.
+    --dim: embedding dim
+    --pool_size: pooling size
+    --mlp_ratio: mlp expansion ratio
+    --act_layer: activation
+    --norm_layer: normalization
+    --drop: dropout rate
+    --drop path: Stochastic Depth,
+        refer to https://arxiv.org/abs/1603.09382
+    --init_value: LayerScale,
+        refer to https://arxiv.org/abs/2103.17239
+    """
+    def __init__(self, dim, pool_size=3, mlp_ratio=4., drop=0., drop_path=0.,
+                 init_value=1e-5, act_layer=nn.GELU, norm_layer=GroupNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.token_mixer = Pooling(pool_size=pool_size)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer, drop=drop)
+        # The following two techniques are useful to train deep PoolFormers.
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.layer_scale_1 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+        self.layer_scale_2 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+    def forward(self, x):
+        x = x + self.drop_path(
+            self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * self.token_mixer(self.norm1(x)))
+        x = x + self.drop_path(
+            self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * self.mlp(self.norm2(x)))
+        return x

utilpack/layers/uniformer.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# refer to the code from Uniformer, Thanks!
+# https://github.com/Sense-X/UniFormer/blob/main/image_classification/models/uniformer.py
+import math
+import torch
+import torch.nn as nn
+from timm.layers import DropPath, trunc_normal_
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class CMlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
+        self.act = act_layer()
+        self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = qk_scale or head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]   # make torchscript happy (cannot use tensor as tuple)
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class CBlock(nn.Module):
+    def __init__(self, dim, num_heads=4, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
+        self.norm1 = nn.BatchNorm2d(dim)
+        self.conv1 = nn.Conv2d(dim, dim, 1)
+        self.conv2 = nn.Conv2d(dim, dim, 1)
+        self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = nn.BatchNorm2d(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {}
+    def forward(self, x):
+        x = x + self.pos_embed(x)
+        x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x)))))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class SABlock(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., init_value=1e-6, act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        # layer scale
+        self.gamma_1 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
+        self.gamma_2 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'gamma_1', 'gamma_2'}
+    def forward(self, x):
+        x = x + self.pos_embed(x)
+        B, N, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+        x = x.transpose(1, 2).reshape(B, N, H, W)
+        return x

utilpack/layers/van.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# refer to the code from VAN, Thanks!
+# https://github.com/Visual-Attention-Network/VAN-Classification
+import math
+import torch
+import torch.nn as nn
+from timm.layers import DropPath, trunc_normal_
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+    def forward(self, x):
+        x = self.dwconv(x)
+        return x
+class MixMlp(nn.Module):
+    def __init__(self,
+                 in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Conv2d(in_features, hidden_features, 1)  # 1x1
+        self.dwconv = DWConv(hidden_features)                  # CFF: Convlutional feed-forward network
+        self.act = act_layer()                                 # GELU
+        self.fc2 = nn.Conv2d(hidden_features, out_features, 1) # 1x1
+        self.drop = nn.Dropout(drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class LKA(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
+        self.conv_spatial = nn.Conv2d(
+            dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
+        self.conv1 = nn.Conv2d(dim, dim, 1)
+    def forward(self, x):
+        u = x.clone()
+        attn = self.conv0(x)
+        attn = self.conv_spatial(attn)
+        attn = self.conv1(attn)
+        return u * attn
+class Attention(nn.Module):
+    def __init__(self, d_model, attn_shortcut=True):
+        super().__init__()
+        self.proj_1 = nn.Conv2d(d_model, d_model, 1)
+        self.activation = nn.GELU()
+        self.spatial_gating_unit = LKA(d_model)
+        self.proj_2 = nn.Conv2d(d_model, d_model, 1)
+        self.attn_shortcut = attn_shortcut
+    def forward(self, x):
+        if self.attn_shortcut:
+            shortcut = x.clone()
+        x = self.proj_1(x)
+        x = self.activation(x)
+        x = self.spatial_gating_unit(x)
+        x = self.proj_2(x)
+        if self.attn_shortcut:
+            x = x + shortcut
+        return x
+class VANBlock(nn.Module):
+    def __init__(self, dim, mlp_ratio=4., drop=0.,drop_path=0., init_value=1e-2, act_layer=nn.GELU, attn_shortcut=True):
+        super().__init__()
+        self.norm1 = nn.BatchNorm2d(dim)
+        self.attn = Attention(dim, attn_shortcut=attn_shortcut)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = nn.BatchNorm2d(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = MixMlp(
+            in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.layer_scale_1 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+        self.layer_scale_2 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+    def forward(self, x):
+        x = x + self.drop_path(
+            self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * self.attn(self.norm1(x)))
+        x = x + self.drop_path(
+            self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * self.mlp(self.norm2(x)))
+        return x

utilpack/mau_modules.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+import torch.nn as nn
+import math
+class MAUCell(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, tau, cell_mode):
+        super(MAUCell, self).__init__()
+        self.num_hidden = num_hidden
+        # self.padding = (filter_size[0] // 2, filter_size[1] // 2)
+        self.padding = filter_size // 2
+        self.cell_mode = cell_mode
+        self.d = num_hidden * height * width
+        self.tau = tau
+        self.states = ['residual', 'normal']
+        if not self.cell_mode in self.states:
+            raise AssertionError
+        self.conv_t = nn.Sequential(
+            nn.Conv2d(in_channel, 3 * num_hidden, kernel_size=filter_size,
+                      stride=stride, padding=self.padding),
+            nn.LayerNorm([3 * num_hidden, height, width])
+        )
+        self.conv_t_next = nn.Sequential(
+            nn.Conv2d(in_channel, num_hidden, kernel_size=filter_size,
+                      stride=stride, padding=self.padding),
+            nn.LayerNorm([num_hidden, height, width])
+        )
+        self.conv_s = nn.Sequential(
+            nn.Conv2d(num_hidden, 3 * num_hidden, kernel_size=filter_size,
+                      stride=stride, padding=self.padding),
+            nn.LayerNorm([3 * num_hidden, height, width])
+        )
+        self.conv_s_next = nn.Sequential(
+            nn.Conv2d(num_hidden, num_hidden, kernel_size=filter_size,
+                      stride=stride, padding=self.padding),
+            nn.LayerNorm([num_hidden, height, width])
+        )
+        self.softmax = nn.Softmax(dim=0)
+    def forward(self, T_t, S_t, t_att, s_att):
+        s_next = self.conv_s_next(S_t)
+        t_next = self.conv_t_next(T_t)
+        weights_list = []
+        for i in range(self.tau):
+            weights_list.append((s_att[i] * s_next).sum(dim=(1, 2, 3)) / math.sqrt(self.d))
+        weights_list = torch.stack(weights_list, dim=0)
+        weights_list = torch.reshape(weights_list, (*weights_list.shape, 1, 1, 1))
+        weights_list = self.softmax(weights_list)
+        T_trend = t_att * weights_list
+        T_trend = T_trend.sum(dim=0)
+        t_att_gate = torch.sigmoid(t_next)
+        T_fusion = T_t * t_att_gate + (1 - t_att_gate) * T_trend
+        T_concat = self.conv_t(T_fusion)
+        S_concat = self.conv_s(S_t)
+        t_g, t_t, t_s = torch.split(T_concat, self.num_hidden, dim=1)
+        s_g, s_t, s_s = torch.split(S_concat, self.num_hidden, dim=1)
+        T_gate = torch.sigmoid(t_g)
+        S_gate = torch.sigmoid(s_g)
+        T_new = T_gate * t_t + (1 - T_gate) * s_t
+        S_new = S_gate * s_s + (1 - S_gate) * t_s
+        if self.cell_mode == 'residual':
+            S_new = S_new + S_t
+        return T_new, S_new

utilpack/mim_modules.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import torch
+import torch.nn as nn
+class MIMBlock(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(MIMBlock, self).__init__()
+        self.convlstm_c = None
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        self.ct_weight = nn.Parameter(torch.zeros(num_hidden*2, height, width))
+        self.oc_weight = nn.Parameter(torch.zeros(num_hidden, height, width))
+        if layer_norm:
+            self.conv_t_cc = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 3, height, width])
+            )
+            self.conv_s_cc = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_x_cc = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_h_concat = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_x_concat = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+        else:
+            self.conv_t_cc = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_s_cc = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_x_cc = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h_concat = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_x_concat = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def _init_state(self, inputs):
+        return torch.zeros_like(inputs)
+    def MIMS(self, x, h_t, c_t):
+        if h_t is None:
+            h_t = self._init_state(x)
+        if c_t is None:
+            c_t = self._init_state(x)
+        h_concat = self.conv_h_concat(h_t)
+        i_h, g_h, f_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        ct_activation = torch.mul(c_t.repeat(1,2,1,1), self.ct_weight)
+        i_c, f_c = torch.split(ct_activation, self.num_hidden, dim=1)
+        i_ = i_h + i_c
+        f_ = f_h + f_c
+        g_ = g_h
+        o_ = o_h
+        if x != None:
+            x_concat = self.conv_x_concat(x)
+            i_x, g_x, f_x, o_x = torch.split(x_concat, self.num_hidden, dim=1)
+            i_ = i_ +  i_x
+            f_ = f_ + f_x
+            g_ = g_ +  g_x
+            o_ = o_ + o_x
+        i_ = torch.sigmoid(i_)
+        f_ = torch.sigmoid(f_ + self._forget_bias)
+        c_new = f_ * c_t + i_ * torch.tanh(g_)
+        o_c = torch.mul(c_new, self.oc_weight)
+        h_new = torch.sigmoid(o_ + o_c) * torch.tanh(c_new)
+        return h_new, c_new
+    def forward(self, x, diff_h, h, c, m):
+        h = self._init_state(x) if h is None else h
+        c = self._init_state(x) if c is None else c
+        m = self._init_state(x) if m is None else m
+        diff_h = self._init_state(x) if diff_h is None else diff_h
+        t_cc = self.conv_t_cc(h)
+        s_cc = self.conv_s_cc(m)
+        x_cc = self.conv_x_cc(x)
+        i_s, g_s, f_s, o_s = torch.split(s_cc, self.num_hidden, dim=1)
+        i_t, g_t, o_t = torch.split(t_cc, self.num_hidden, dim=1)
+        i_x, g_x, f_x, o_x = torch.split(x_cc, self.num_hidden, dim=1)
+        i = torch.sigmoid(i_x + i_t)
+        i_ = torch.sigmoid(i_x + i_s)
+        g = torch.tanh(g_x + g_t)
+        g_ = torch.tanh(g_x + g_s)
+        f_ = torch.sigmoid(f_x + f_s + self._forget_bias)
+        o = torch.sigmoid(o_x + o_t + o_s)
+        new_m = f_ * m + i_ * g_
+        c, self.convlstm_c = self.MIMS(diff_h, c, self.convlstm_c \
+            if self.convlstm_c is None else self.convlstm_c.detach())
+        new_c = c + i * g
+        cell = torch.cat((new_c, new_m), 1)
+        new_h = o * torch.tanh(self.conv_last(cell))
+        return new_h, new_c, new_m
+class MIMN(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(MIMN, self).__init__()
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        self.ct_weight = nn.Parameter(torch.zeros(num_hidden*2, height, width))
+        self.oc_weight = nn.Parameter(torch.zeros(num_hidden, height, width))
+        if layer_norm:
+            self.conv_h_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_x_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+        else:
+            self.conv_h_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_x_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def _init_state(self, inputs):
+        return torch.zeros_like(inputs)
+    def forward(self, x, h_t, c_t):
+        if h_t is None:
+            h_t = self._init_state(x)
+        if c_t is None:
+            c_t = self._init_state(x)
+        h_concat = self.conv_h_concat(h_t)
+        i_h, g_h, f_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        ct_activation = torch.mul(c_t.repeat(1,2,1,1), self.ct_weight)
+        i_c, f_c = torch.split(ct_activation, self.num_hidden, dim=1)
+        i_ = i_h + i_c
+        f_ = f_h + f_c
+        g_ = g_h
+        o_ = o_h
+        if x != None:
+            x_concat = self.conv_x_concat(x)
+            i_x, g_x, f_x, o_x = torch.split(x_concat, self.num_hidden, dim=1)
+            i_ = i_ + i_x
+            f_ = f_ + f_x
+            g_ = g_ + g_x
+            o_ = o_ + o_x
+        i_ = torch.sigmoid(i_)
+        f_ = torch.sigmoid(f_ + self._forget_bias)
+        c_new = f_ * c_t + i_ * torch.tanh(g_)
+        o_c = torch.mul(c_new, self.oc_weight)
+        h_new = torch.sigmoid(o_ + o_c) * torch.tanh(c_new)
+        return h_new, c_new

utilpack/mmvp_modules.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResidualDenseBlock_4C(nn.Module):
+    def __init__(self, nf=64, gc = 32, bias=True):
+        super(ResidualDenseBlock_4C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(nf + 3 * gc, nf, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        # initialization
+        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
+        return x4 * 0.2 + x
+class RRDB(nn.Module):
+    '''Residual in Residual Dense Block'''
+    def __init__(self, nf):
+        super(RRDB, self).__init__()
+        gc = nf // 2
+        self.RDB1 = ResidualDenseBlock_4C(nf, gc)
+        self.RDB2 = ResidualDenseBlock_4C(nf, gc)
+        self.RDB3 = ResidualDenseBlock_4C(nf, gc)
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x
+class Up(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels, bilinear=True, skip=True, scale=2, bn=True, motion=False):
+        super().__init__()
+        factor = scale
+        # if bilinear, use the normal convolutions to reduce the number of channels
+        if bilinear:
+            if skip:
+                self.up = nn.Upsample(scale_factor=factor, mode='bilinear', align_corners=True)
+                self.conv = ConvLayer(in_channels, out_channels, bn=bn)
+            else:
+                self.up = nn.Upsample(scale_factor=factor, mode='bilinear', align_corners=True)
+                self.conv = ConvLayer(in_channels, out_channels)
+        else:
+            if skip:
+                self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=factor, stride=factor)
+                self.conv = ConvLayer(out_channels*2, out_channels, bn=bn, motion=motion)
+            else:
+                self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=factor, stride=factor)
+                self.conv = ConvLayer(out_channels, out_channels, bn=bn, motion=motion)
+    def forward(self, x1, x2=None):
+        x1 = self.up(x1)
+        if x2 is None:
+            return self.conv(x1)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        # if you have padding issues, see
+        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
+        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, downsample=False,
+                 upsample=False, skip=False, factor=2, motion=False):
+        super().__init__()
+        self.upsample = upsample
+        self.maxpool= None
+        if downsample:
+            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
+            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2)
+            if factor == 4:
+                self.maxpool = nn.MaxPool2d(2)
+        elif upsample:
+            self.conv1 = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=factor, stride=factor)
+            if motion:
+                self.shortcut = nn.Sequential(nn.Upsample(scale_factor=factor,
+                                                          mode='bilinear',
+                                                          align_corners=True),
+                                              nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1),
+                                              nn.BatchNorm2d(out_channels))
+            else:
+                self.shortcut = nn.Sequential(nn.Upsample(scale_factor=factor,
+                                                          mode='bilinear',
+                                                          align_corners=True),
+                                              nn.Conv2d(in_channels, out_channels,kernel_size=1, stride=1))
+        else:
+            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            self.shortcut = nn.Sequential()
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, input):
+        shortcut = self.shortcut(input)
+        input = nn.ReLU()(self.conv1(input))
+        input = nn.ReLU()(self.conv2(input))
+        input = input + shortcut
+        if self.maxpool is not None:
+            input = self.maxpool(input)
+        return nn.LeakyReLU()(input)
+class ConvLayer(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels, mid_channels=None, bn=True, motion=False, dilation=1):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+        )  if motion else  nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=dilation, bias=False, dilation=dilation),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.conv(x)
+class Conv3D(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel_size, stride, padding):
+        super(Conv3D, self).__init__()
+        self.conv3d = nn.Conv3d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, padding=padding)
+        self.bn3d = nn.BatchNorm3d(out_channel)
+    def forward(self, x):
+        # input x: (batch, seq, c, h, w)
+        x = x.permute(0, 2, 1, 3, 4).contiguous()  # (batch, c, seq_len, h, w)
+        x = F.leaky_relu(self.bn3d(self.conv3d(x)))
+        x = x.permute(0, 2, 1, 3, 4).contiguous()  # (batch, seq_len, c, h, w)
+        return x
+class MatrixPredictor3DConv(nn.Module):
+    def __init__(self, hidden_len=64):
+        super(MatrixPredictor3DConv, self).__init__()
+        self.unet_base = hidden_len #64
+        self.hidden_len = hidden_len #64
+        self.conv_pre_1 = nn.Conv2d(hidden_len,hidden_len, kernel_size=3, stride=1, padding=1)
+        self.conv_pre_2 = nn.Conv2d(hidden_len, hidden_len, kernel_size=3, stride=1, padding=1)
+        self.conv3d_1 = Conv3D(self.unet_base, self.unet_base, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
+        self.conv3d_2 = Conv3D(self.unet_base*2, self.unet_base*2, kernel_size=(3  , 3, 3), stride=1, padding=(0, 1, 1))
+        self.conv1_1 = nn.Conv2d(hidden_len, self.unet_base, kernel_size=3, stride=2, padding=1)
+        self.conv2_1 = nn.Conv2d(self.unet_base, self.unet_base * 2, kernel_size=3, stride=2, padding=1)
+        self.conv3_1 = nn.Conv2d(self.unet_base * 3, self.unet_base, kernel_size=3, stride=1, padding=1)
+        self.conv4_1 = nn.Conv2d(self.unet_base, self.hidden_len, kernel_size=3, stride=1, padding=1)
+        self.bn_pre_1 = nn.BatchNorm2d(hidden_len)
+        self.bn_pre_2 = nn.BatchNorm2d(hidden_len)
+        self.bn1_1 = nn.BatchNorm2d(self.unet_base)
+        self.bn2_1 = nn.BatchNorm2d(self.unet_base * 2)
+        self.bn3_1 = nn.BatchNorm2d(self.unet_base)
+        self.bn4_1 = nn.BatchNorm2d(self.hidden_len)
+    def forward(self,x):
+        # x [B,T,C,32,32]
+        # out: [B,C,32,32]
+        batch, seq, z, h, w = x.size()
+        x = x.reshape(-1, x.size(-3), x.size(-2), x.size(-1))
+        x = F.leaky_relu(self.bn_pre_1(self.conv_pre_1(x)))
+        x = F.leaky_relu(self.bn_pre_2(self.conv_pre_2(x)))
+        x_1 = F.leaky_relu(self.bn1_1(self.conv1_1(x)))
+        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)).contiguous()  # (batch, seq, c, h, w)
+        x_1 = self.conv3d_1(x_1) #  (batch, seq, c, h, w), 1st temporal conv
+        x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous()  # (batch * seq, c, h, w)
+        x_2 = F.leaky_relu(self.bn2_1(self.conv2_1(x_1)))    # (batch * seq, c, h // 2, w // 2)
+        x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)).contiguous()  # (batch, seq, c, h, w)
+        x_2 = self.conv3d_2(x_2) # (batch, seq=1, c, h // 2, w // 2), 2nd temporal conv
+        x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous()  # (batch * seq, c, h//2, w//2), seq = 1
+        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)) # (batch, seq, c, h, w)
+        x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() # (batch, c, seq, h, w)
+        x_1 = F.adaptive_max_pool3d(x_1, (1, None, None)) # (batch, c, 1, h, w)
+        x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() # (batch, 1, c, h, w)
+        x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous() # (batch*1, c, h, w)
+        x_3 = F.leaky_relu(self.bn3_1(self.conv3_1(torch.cat((F.interpolate(x_2, scale_factor=(2, 2)), x_1), dim=1))))
+        x = x.view(batch, -1, x.size(1), x.size(2), x.size(3)) # (batch, seq, 1, h, w)
+        x = F.leaky_relu(self.bn4_1(self.conv4_1(F.interpolate(x_3, scale_factor=(2, 2)))))
+        return x
+class SimpleMatrixPredictor3DConv_direct(nn.Module):
+    def __init__(self, T, hidden_len=64, image_pred=False, aft_seq_length=10):
+        super(SimpleMatrixPredictor3DConv_direct, self).__init__()
+        self.unet_base = hidden_len #64
+        self.hidden_len = hidden_len #64
+        self.conv_pre_1 = nn.Conv2d(hidden_len,hidden_len, kernel_size=3, stride=1, padding=1)
+        self.conv_pre_2 = nn.Conv2d(hidden_len, hidden_len, kernel_size=3, stride=1, padding=1)
+        self.fut_len = aft_seq_length
+        self.conv3d_1 = Conv3D(self.unet_base, self.unet_base, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
+        if self.fut_len > 1 :
+            self.temporal_layer = Conv3D(self.unet_base*2, self.unet_base*2, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
+        else:
+            self.temporal_layer = nn.Sequential(
+            nn.Conv2d(self.unet_base *2, self.unet_base * 2, kernel_size=3, stride=1, padding=1),
+            nn.LeakyReLU())
+        input_len = T if image_pred else T - 1
+        self.conv_translate = nn.Sequential(
+            nn.Conv2d(self.unet_base * input_len , self.unet_base * self.fut_len, kernel_size=1, stride=1, padding=0),
+            nn.LeakyReLU())
+        self.conv1_1 = nn.Conv2d(hidden_len, self.unet_base, kernel_size=3, stride=2, padding=1)
+        self.conv2_1 = nn.Conv2d(self.unet_base, self.unet_base * 2, kernel_size=3, stride=2, padding=1)
+        self.conv3_1 = nn.Conv2d(self.unet_base * 3, self.unet_base, kernel_size=3, stride=1, padding=1)
+        self.conv4_1 = nn.Conv2d(self.unet_base, self.hidden_len, kernel_size=3, stride=1, padding=1)
+        self.bn_pre_1 = nn.BatchNorm2d(hidden_len)
+        self.bn_pre_2 = nn.BatchNorm2d(hidden_len)
+        self.bn1_1 = nn.BatchNorm2d(self.unet_base)
+        self.bn2_1 = nn.BatchNorm2d(self.unet_base * 2)
+        self.bn3_1 = nn.BatchNorm2d(self.unet_base)
+        self.bn4_1 = nn.BatchNorm2d(self.hidden_len)
+        self.bn_translate = nn.BatchNorm2d(self.unet_base * self.fut_len)
+    def forward(self,x):
+        # x [B,T,C,32,32]
+        # out: [B,C,32,32]
+        batch, seq, z, h, w = x.size()
+        x = x.reshape(-1, x.size(-3), x.size(-2), x.size(-1))
+        x = F.leaky_relu(self.bn_pre_1(self.conv_pre_1(x)))
+        x = F.leaky_relu(self.bn_pre_2(self.conv_pre_2(x)))
+        x_1 = F.leaky_relu(self.bn1_1(self.conv1_1(x)))
+        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)).contiguous()  # (batch, seq, c, h, w)
+        x_1 = self.conv3d_1(x_1) #  (batch, seq, c, h, w), 1st temporal conv
+        batch, seq, c, h, w = x_1.shape
+        x_tmp = x_1.reshape(batch,-1,h,w)
+        x_tmp = self.bn_translate(self.conv_translate(x_tmp))
+        x_1 = x_tmp.reshape(batch,self.fut_len,c,h,w)
+        x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous()  # (batch * seq, c, h, w)
+        x_2 = F.leaky_relu(self.bn2_1(self.conv2_1(x_1))) # (batch * seq, c, h // 2, w // 2)
+        if self.fut_len > 1:
+            x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)).contiguous()  # (batch, seq, c, h, w)
+            x_2 = self.temporal_layer(x_2) # (batch, seq=10, c, h // 2, w // 2)
+            x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous()  # (batch * seq, c, h//2, w//2), seq = 1
+        else:
+            x_2 = self.temporal_layer(x_2) # (batch * seq,c, h // 2, w // 2)
+        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)) # (batch, seq, c, h, w)
+        x_1 = x_1.reshape(-1, x_1.size(2), x_1.size(3), x_1.size(4))
+        x_3 = F.leaky_relu(self.bn3_1(self.conv3_1(torch.cat((F.interpolate(x_2, size=x_1.shape[2:]), x_1), dim=1))))
+        x = x.view(batch, -1, x.size(1), x.size(2), x.size(3)) # (batch, seq, 1, h, w)
+        x = F.leaky_relu(self.bn4_1(self.conv4_1(F.interpolate(x_3, size = x.shape[3:]))))
+        return x
+class PredictModel(nn.Module):
+    def __init__(self, T, hidden_len=32, aft_seq_length=10, mx_h=32, mx_w=32, use_direct_predictor=True):
+        super(PredictModel, self).__init__()
+        self.mx_h = mx_h
+        self.mx_w = mx_w
+        self.hidden_len = hidden_len
+        self.fut_len = aft_seq_length
+        self.conv1 = nn.Conv2d( 1, hidden_len, kernel_size=3, padding=1, bias=False)
+        self.fuse_conv = nn.Conv2d(hidden_len*2, hidden_len, kernel_size=3, padding=1, bias=False)
+        if use_direct_predictor:
+            self.predictor = SimpleMatrixPredictor3DConv_direct(T=T, hidden_len=hidden_len, aft_seq_length=aft_seq_length)
+        else:
+            self.predictor = MatrixPredictor3DConv(hidden_len)
+        self.out_conv = nn.Conv2d(hidden_len, 1, kernel_size=3, padding=1, bias=False)
+        self.softmax = nn.Softmax(dim=-1)
+        self.sigmoid = nn.Sigmoid()
+    def res_interpolate(self,in_tensor,template_tensor):
+        '''
+        in_tensor: batch,c,h'w',H'W'
+        tempolate_tensor: batch,c,hw,HW
+        out_tensor: batch,c,hw,HW
+        '''
+        out_tensor = F.interpolate(in_tensor,template_tensor.shape[-2:]) # (BThw,target_h,target_w)
+        return out_tensor
+    def forward(self,matrix_seq, softmax=False, res=None):
+        B,T,hw,window_size = matrix_seq.size()
+        matrix_seq = matrix_seq.reshape(-1,hw,self.mx_h,self.mx_w) # (BT,hw,hw)
+        matrix_seq = matrix_seq.reshape(B*T*hw,self.mx_h,self.mx_w).unsqueeze(1) # (BThw,1,h,w)
+        x = self.conv1(matrix_seq)
+        x = x.reshape(B,T,hw,-1,self.mx_h,self.mx_w)
+        x = x.permute(0,2,1,3,4,5).reshape(B*hw,T,-1,self.mx_h,self.mx_w)
+        emb = self.predictor(x)
+        emb = emb.reshape(B*hw*self.fut_len,-1,self.mx_h,self.mx_w)
+        res_emb = emb.clone()
+        if res is not None:
+            template = emb.clone().reshape(B,hw,emb.shape[1],-1).permute(0,2,1,3)
+            in_tensor = res.clone().reshape(B,hw//4,emb.shape[1],-1).permute(0,2,1,3)
+            res_tensor = self.res_interpolate(in_tensor,template).permute(0,2,1,3).reshape(emb.shape)
+            emb = self.fuse_conv(torch.cat([emb,res_tensor],dim=1))
+        out = self.out_conv(emb) #(Bhwt,16,h//4,w//4)
+        out = out.reshape(B,hw,-1,self.mx_h,self.mx_w)
+        out = out.permute(0,2,1,3,4)
+        out = out.reshape(B,-1,hw,window_size)
+        if softmax:
+            out = out.view(B,out.shape[1],-1)
+            out = self.softmax(out)
+            out = out.reshape(B,-1,hw,window_size)
+        return out,res_emb

utilpack/phydnet_modules.py ADDED Viewed

	@@ -0,0 +1,463 @@

+import torch
+import torch.nn as nn
+from numpy import *
+from numpy.linalg import *
+from scipy.special import factorial
+from functools import reduce
+__all__ = ['M2K','K2M']
+class PhyCell_Cell(nn.Module):
+    def __init__(self, input_dim, F_hidden_dim, kernel_size, bias=1):
+        super(PhyCell_Cell, self).__init__()
+        self.input_dim  = input_dim
+        self.F_hidden_dim = F_hidden_dim
+        self.kernel_size = kernel_size
+        self.padding     = kernel_size[0] // 2, kernel_size[1] // 2
+        self.bias = bias
+        self.F = nn.Sequential()
+        self.F.add_module('conv1', nn.Conv2d(in_channels=input_dim, out_channels=F_hidden_dim,
+                                             kernel_size=self.kernel_size, stride=(1,1), padding=self.padding))
+        self.F.add_module('bn1',nn.GroupNorm(7 ,F_hidden_dim))
+        self.F.add_module('conv2', nn.Conv2d(in_channels=F_hidden_dim, out_channels=input_dim,
+                                             kernel_size=(1,1), stride=(1,1), padding=(0,0)))
+        self.convgate = nn.Conv2d(in_channels=self.input_dim + self.input_dim,
+                                  out_channels=self.input_dim,
+                                  kernel_size=(3,3),
+                                  padding=(1,1), bias=self.bias)
+    def forward(self, x, hidden):  # x [batch_size, hidden_dim, height, width]
+        combined = torch.cat([x, hidden], dim=1)  # concatenate along channel axis
+        combined_conv = self.convgate(combined)
+        K = torch.sigmoid(combined_conv)
+        hidden_tilde = hidden + self.F(hidden)  # prediction
+        next_hidden = hidden_tilde + K * (x-hidden_tilde)  # correction , Haddamard product
+        return next_hidden
+class PhyCell(nn.Module):
+    def __init__(self, input_shape, input_dim, F_hidden_dims, n_layers, kernel_size, device):
+        super(PhyCell, self).__init__()
+        self.input_shape = input_shape
+        self.input_dim = input_dim
+        self.F_hidden_dims = F_hidden_dims
+        self.n_layers = n_layers
+        self.kernel_size = kernel_size
+        self.H = []
+        self.device = device
+        cell_list = []
+        for i in range(0, self.n_layers):
+            cell_list.append(PhyCell_Cell(input_dim=input_dim,
+                                          F_hidden_dim=self.F_hidden_dims[i],
+                                          kernel_size=self.kernel_size))
+        self.cell_list = nn.ModuleList(cell_list)
+    def forward(self, input_, first_timestep=False):  # input_ [batch_size, 1, channels, width, height]
+        batch_size = input_.data.size()[0]
+        if (first_timestep):
+            self.initHidden(batch_size)  # init Hidden at each forward start
+        for j, cell in enumerate(self.cell_list):
+            self.H[j] = self.H[j].to(input_.device)
+            if j==0:  # bottom layer
+                self.H[j] = cell(input_, self.H[j])
+            else:
+                self.H[j] = cell(self.H[j-1],self.H[j])
+        return self.H, self.H
+    def initHidden(self, batch_size):
+        self.H = []
+        for i in range(self.n_layers):
+            self.H.append(torch.zeros(
+                batch_size, self.input_dim, self.input_shape[0], self.input_shape[1]).to(self.device))
+    def setHidden(self, H):
+        self.H = H
+class PhyD_ConvLSTM_Cell(nn.Module):
+    def __init__(self, input_shape, input_dim, hidden_dim, kernel_size, bias=1):
+        """
+        input_shape: (int, int)
+            Height and width of input tensor as (height, width).
+        input_dim: int
+            Number of channels of input tensor.
+        hidden_dim: int
+            Number of channels of hidden state.
+        kernel_size: (int, int)
+            Size of the convolutional kernel.
+        bias: bool
+            Whether or not to add the bias.
+        """
+        super(PhyD_ConvLSTM_Cell, self).__init__()
+        self.height, self.width = input_shape
+        self.input_dim  = input_dim
+        self.hidden_dim = hidden_dim
+        self.kernel_size = kernel_size
+        self.padding     = kernel_size[0] // 2, kernel_size[1] // 2
+        self.bias        = bias
+        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
+                              out_channels=4 * self.hidden_dim,
+                              kernel_size=self.kernel_size,
+                              padding=self.padding, bias=self.bias)
+    # we implement LSTM that process only one timestep
+    def forward(self, x, hidden): # x [batch, hidden_dim, width, height]
+        h_cur, c_cur = hidden
+        combined = torch.cat([x, h_cur], dim=1)  # concatenate along channel axis
+        combined_conv = self.conv(combined)
+        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
+        i = torch.sigmoid(cc_i)
+        f = torch.sigmoid(cc_f)
+        o = torch.sigmoid(cc_o)
+        g = torch.tanh(cc_g)
+        c_next = f * c_cur + i * g
+        h_next = o * torch.tanh(c_next)
+        return h_next, c_next
+class PhyD_ConvLSTM(nn.Module):
+    def __init__(self, input_shape, input_dim, hidden_dims, n_layers, kernel_size, device):
+        super(PhyD_ConvLSTM, self).__init__()
+        self.input_shape = input_shape
+        self.input_dim = input_dim
+        self.hidden_dims = hidden_dims
+        self.n_layers = n_layers
+        self.kernel_size = kernel_size
+        self.H, self.C = [], []
+        self.device = device
+        cell_list = []
+        for i in range(0, self.n_layers):
+            cur_input_dim = self.input_dim if i == 0 else self.hidden_dims[i-1]
+            print('layer ', i, 'input dim ', cur_input_dim, ' hidden dim ', self.hidden_dims[i])
+            cell_list.append(PhyD_ConvLSTM_Cell(input_shape=self.input_shape,
+                                                input_dim=cur_input_dim,
+                                                hidden_dim=self.hidden_dims[i],
+                                                kernel_size=self.kernel_size))
+        self.cell_list = nn.ModuleList(cell_list)
+    def forward(self, input_, first_timestep=False): # input_ [batch_size, 1, channels, width, height]
+        batch_size = input_.data.size()[0]
+        if (first_timestep):
+            self.initHidden(batch_size) # init Hidden at each forward start
+        for j, cell in enumerate(self.cell_list):
+            self.H[j] = self.H[j].to(input_.device)
+            self.C[j] = self.C[j].to(input_.device)
+            if j==0: # bottom layer
+                self.H[j], self.C[j] = cell(input_, (self.H[j],self.C[j]))
+            else:
+                self.H[j], self.C[j] = cell(self.H[j-1],(self.H[j],self.C[j]))
+        return (self.H,self.C) , self.H   # (hidden, output)
+    def initHidden(self,batch_size):
+        self.H, self.C = [],[]
+        for i in range(self.n_layers):
+            self.H.append(torch.zeros(
+                batch_size, self.hidden_dims[i], self.input_shape[0], self.input_shape[1]).to(self.device))
+            self.C.append(torch.zeros(
+                batch_size, self.hidden_dims[i], self.input_shape[0], self.input_shape[1]).to(self.device))
+    def setHidden(self, hidden):
+        H,C = hidden
+        self.H, self.C = H,C
+class dcgan_conv(nn.Module):
+    def __init__(self, nin, nout, stride):
+        super(dcgan_conv, self).__init__()
+        self.main = nn.Sequential(
+                nn.Conv2d(in_channels=nin, out_channels=nout, kernel_size=(3,3),
+                          stride=stride, padding=1),
+                nn.GroupNorm(16, nout),
+                nn.LeakyReLU(0.2, inplace=True),
+            )
+    def forward(self, input):
+        return self.main(input)
+class dcgan_upconv(nn.Module):
+    def __init__(self, nin, nout, stride):
+        super(dcgan_upconv, self).__init__()
+        if stride==2:
+            output_padding = 1
+        else:
+            output_padding = 0
+        self.main = nn.Sequential(
+                nn.ConvTranspose2d(in_channels=nin, out_channels=nout, kernel_size=(3,3),
+                                   stride=stride, padding=1, output_padding=output_padding),
+                nn.GroupNorm(16, nout),
+                nn.LeakyReLU(0.2, inplace=True),
+            )
+    def forward(self, input):
+        return self.main(input)
+class encoder_E(nn.Module):
+    def __init__(self, nc=1, nf=32, patch_size=4):
+        super(encoder_E, self).__init__()
+        assert patch_size in [2, 4]
+        stride_2 = patch_size // 2
+        # input is (1) x 64 x 64
+        self.c1 = dcgan_conv(nc, nf, stride=2) # (32) x 32 x 32
+        self.c2 = dcgan_conv(nf, nf, stride=1) # (32) x 32 x 32
+        self.c3 = dcgan_conv(nf, 2*nf, stride=stride_2) # (64) x 16 x 16
+    def forward(self, input):
+        h1 = self.c1(input)
+        h2 = self.c2(h1)
+        h3 = self.c3(h2)
+        return h3
+class decoder_D(nn.Module):
+    def __init__(self, nc=1, nf=32, patch_size=4):
+        super(decoder_D, self).__init__()
+        assert patch_size in [2, 4]
+        stride_2 = patch_size // 2
+        output_padding = 1 if stride_2==2 else 0
+        self.upc1 = dcgan_upconv(2*nf, nf, stride=2) #(32) x 32 x 32
+        self.upc2 = dcgan_upconv(nf, nf, stride=1) #(32) x 32 x 32
+        self.upc3 = nn.ConvTranspose2d(in_channels=nf, out_channels=nc, kernel_size=(3,3),
+                                       stride=stride_2, padding=1,
+                                       output_padding=output_padding)  #(nc) x 64 x 64
+    def forward(self, input):
+        d1 = self.upc1(input)
+        d2 = self.upc2(d1)
+        d3 = self.upc3(d2)
+        return d3
+class encoder_specific(nn.Module):
+    def __init__(self, nc=64, nf=64):
+        super(encoder_specific, self).__init__()
+        self.c1 = dcgan_conv(nc, nf, stride=1) # (64) x 16 x 16
+        self.c2 = dcgan_conv(nf, nf, stride=1) # (64) x 16 x 16
+    def forward(self, input):
+        h1 = self.c1(input)
+        h2 = self.c2(h1)
+        return h2
+class decoder_specific(nn.Module):
+    def __init__(self, nc=64, nf=64):
+        super(decoder_specific, self).__init__()
+        self.upc1 = dcgan_upconv(nf, nf, stride=1) #(64) x 16 x 16
+        self.upc2 = dcgan_upconv(nf, nc, stride=1) #(32) x 32 x 32
+    def forward(self, input):
+        d1 = self.upc1(input)
+        d2 = self.upc2(d1)
+        return d2
+class PhyD_EncoderRNN(torch.nn.Module):
+    def __init__(self, phycell, convcell, in_channel=1, patch_size=4):
+        super(PhyD_EncoderRNN, self).__init__()
+        self.encoder_E = encoder_E(nc=in_channel, patch_size=patch_size) # general encoder 64x64x1 -> 32x32x32
+        self.encoder_Ep = encoder_specific() # specific image encoder 32x32x32 -> 16x16x64
+        self.encoder_Er = encoder_specific()
+        self.decoder_Dp = decoder_specific() # specific image decoder 16x16x64 -> 32x32x32
+        self.decoder_Dr = decoder_specific()
+        self.decoder_D = decoder_D(nc=in_channel, patch_size=patch_size) # general decoder 32x32x32 -> 64x64x1
+        self.phycell = phycell
+        self.convcell = convcell
+    def forward(self, input, first_timestep=False, decoding=False):
+        input = self.encoder_E(input) # general encoder 64x64x1 -> 32x32x32
+        if decoding:  # input=None in decoding phase
+            input_phys = None
+        else:
+            input_phys = self.encoder_Ep(input)
+        input_conv = self.encoder_Er(input)
+        hidden1, output1 = self.phycell(input_phys, first_timestep)
+        hidden2, output2 = self.convcell(input_conv, first_timestep)
+        decoded_Dp = self.decoder_Dp(output1[-1])
+        decoded_Dr = self.decoder_Dr(output2[-1])
+        out_phys = torch.sigmoid(self.decoder_D(decoded_Dp)) # partial reconstructions for vizualization
+        out_conv = torch.sigmoid(self.decoder_D(decoded_Dr))
+        concat = decoded_Dp + decoded_Dr
+        output_image = torch.sigmoid( self.decoder_D(concat ))
+        return out_phys, hidden1, output_image, out_phys, out_conv
+def _apply_axis_left_dot(x, mats):
+    assert x.dim() == len(mats)+1
+    sizex = x.size()
+    k = x.dim()-1
+    for i in range(k):
+        x = tensordot(mats[k-i-1], x, dim=[1,k])
+    x = x.permute([k,]+list(range(k))).contiguous()
+    x = x.view(sizex)
+    return x
+def _apply_axis_right_dot(x, mats):
+    assert x.dim() == len(mats)+1
+    sizex = x.size()
+    k = x.dim()-1
+    x = x.permute(list(range(1,k+1))+[0,])
+    for i in range(k):
+        x = tensordot(x, mats[i], dim=[0,0])
+    x = x.contiguous()
+    x = x.view(sizex)
+    return x
+class _MK(nn.Module):
+    def __init__(self, shape):
+        super(_MK, self).__init__()
+        self._size = torch.Size(shape)
+        self._dim = len(shape)
+        M = []
+        invM = []
+        assert len(shape) > 0
+        j = 0
+        for l in shape:
+            M.append(zeros((l,l)))
+            for i in range(l):
+                M[-1][i] = ((arange(l)-(l-1)//2)**i)/factorial(i)
+            invM.append(inv(M[-1]))
+            self.register_buffer('_M'+str(j), torch.from_numpy(M[-1]))
+            self.register_buffer('_invM'+str(j), torch.from_numpy(invM[-1]))
+            j += 1
+    @property
+    def M(self):
+        return list(self._buffers['_M'+str(j)] for j in range(self.dim()))
+    @property
+    def invM(self):
+        return list(self._buffers['_invM'+str(j)] for j in range(self.dim()))
+    def size(self):
+        return self._size
+    def dim(self):
+        return self._dim
+    def _packdim(self, x):
+        assert x.dim() >= self.dim()
+        if x.dim() == self.dim():
+            x = x[newaxis,:]
+        x = x.contiguous()
+        x = x.view([-1,]+list(x.size()[-self.dim():]))
+        return x
+    def forward(self):
+        pass
+class M2K(_MK):
+    """
+    convert moment matrix to convolution kernel
+    Arguments:
+        shape (tuple of int): kernel shape
+    Usage:
+        m2k = M2K([5,5])
+        m = torch.randn(5,5,dtype=torch.float64)
+        k = m2k(m)
+    """
+    def __init__(self, shape):
+        super(M2K, self).__init__(shape)
+    def forward(self, m):
+        """
+        m (Tensor): torch.size=[...,*self.shape]
+        """
+        sizem = m.size()
+        m = self._packdim(m)
+        m = _apply_axis_left_dot(m, self.invM)
+        m = m.view(sizem)
+        return m
+class K2M(_MK):
+    """
+    convert convolution kernel to moment matrix
+    Arguments:
+        shape (tuple of int): kernel shape
+    Usage:
+        k2m = K2M([5,5])
+        k = torch.randn(5,5,dtype=torch.float64)
+        m = k2m(k)
+    """
+    def __init__(self, shape):
+        super(K2M, self).__init__(shape)
+    def forward(self, k):
+        """
+        k (Tensor): torch.size=[...,*self.shape]
+        """
+        sizek = k.size()
+        k = self._packdim(k)
+        k = _apply_axis_left_dot(k, self.M)
+        k = k.view(sizek)
+        return k
+def tensordot(a,b,dim):
+    """
+    tensordot in PyTorch, see numpy.tensordot?
+    """
+    l = lambda x,y:x*y
+    if isinstance(dim,int):
+        a = a.contiguous()
+        b = b.contiguous()
+        sizea = a.size()
+        sizeb = b.size()
+        sizea0 = sizea[:-dim]
+        sizea1 = sizea[-dim:]
+        sizeb0 = sizeb[:dim]
+        sizeb1 = sizeb[dim:]
+        N = reduce(l, sizea1, 1)
+        assert reduce(l, sizeb0, 1) == N
+    else:
+        adims = dim[0]
+        bdims = dim[1]
+        adims = [adims,] if isinstance(adims, int) else adims
+        bdims = [bdims,] if isinstance(bdims, int) else bdims
+        adims_ = set(range(a.dim())).difference(set(adims))
+        adims_ = list(adims_)
+        adims_.sort()
+        perma = adims_+adims
+        bdims_ = set(range(b.dim())).difference(set(bdims))
+        bdims_ = list(bdims_)
+        bdims_.sort()
+        permb = bdims+bdims_
+        a = a.permute(*perma).contiguous()
+        b = b.permute(*permb).contiguous()
+        sizea = a.size()
+        sizeb = b.size()
+        sizea0 = sizea[:-len(adims)]
+        sizea1 = sizea[-len(adims):]
+        sizeb0 = sizeb[:len(bdims)]
+        sizeb1 = sizeb[len(bdims):]
+        N = reduce(l, sizea1, 1)
+        assert reduce(l, sizeb0, 1) == N
+    a = a.view([-1,N])
+    b = b.view([N,-1])
+    c = a@b
+    return c.view(sizea0+sizeb1)

utilpack/predrnn_modules.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import torch.nn as nn
+class SpatioTemporalLSTMCell(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(SpatioTemporalLSTMCell, self).__init__()
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        if layer_norm:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 7, height, width])
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 3, height, width])
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+        else:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def forward(self, x_t, h_t, c_t, m_t):
+        x_concat = self.conv_x(x_t)
+        h_concat = self.conv_h(h_t)
+        m_concat = self.conv_m(m_t)
+        i_x, f_x, g_x, i_x_prime, f_x_prime, g_x_prime, o_x = \
+            torch.split(x_concat, self.num_hidden, dim=1)
+        i_h, f_h, g_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        i_m, f_m, g_m = torch.split(m_concat, self.num_hidden, dim=1)
+        i_t = torch.sigmoid(i_x + i_h)
+        f_t = torch.sigmoid(f_x + f_h + self._forget_bias)
+        g_t = torch.tanh(g_x + g_h)
+        c_new = f_t * c_t + i_t * g_t
+        i_t_prime = torch.sigmoid(i_x_prime + i_m)
+        f_t_prime = torch.sigmoid(f_x_prime + f_m + self._forget_bias)
+        g_t_prime = torch.tanh(g_x_prime + g_m)
+        m_new = f_t_prime * m_t + i_t_prime * g_t_prime
+        mem = torch.cat((c_new, m_new), 1)
+        o_t = torch.sigmoid(o_x + o_h + self.conv_o(mem))
+        h_new = o_t * torch.tanh(self.conv_last(mem))
+        return h_new, c_new, m_new

utilpack/predrnnpp_modules.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import torch
+import torch.nn as nn
+class CausalLSTMCell(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(CausalLSTMCell, self).__init__()
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        if layer_norm:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 7, height, width])
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_c = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 3, height, width])
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 3, height, width])
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+            self.conv_c2m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_om = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+        else:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_c = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_c2m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_om = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def forward(self, x_t, h_t, c_t, m_t):
+        x_concat = self.conv_x(x_t)
+        h_concat = self.conv_h(h_t)
+        c_concat = self.conv_c(c_t)
+        m_concat = self.conv_m(m_t)
+        i_x, f_x, g_x, i_x_prime, f_x_prime, g_x_prime, o_x = \
+            torch.split(x_concat, self.num_hidden, dim=1)
+        i_h, f_h, g_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        i_m, f_m, m_m = torch.split(m_concat, self.num_hidden, dim=1)
+        i_c, f_c, g_c = torch.split(c_concat, self.num_hidden, dim=1)
+        i_t = torch.sigmoid(i_x + i_h + i_c)
+        f_t = torch.sigmoid(f_x + f_h + f_c + self._forget_bias)
+        g_t = torch.tanh(g_x + g_h + g_c)
+        c_new = f_t * c_t + i_t * g_t
+        c2m = self.conv_c2m(c_new)
+        i_c, g_c, f_c, o_c = torch.split(c2m, self.num_hidden, dim=1)
+        i_t_prime = torch.sigmoid(i_x_prime + i_m + i_c)
+        f_t_prime = torch.sigmoid(f_x_prime + f_m + f_c + self._forget_bias)
+        g_t_prime = torch.tanh(g_x_prime + g_c)
+        m_new = f_t_prime * torch.tanh(m_m) + i_t_prime * g_t_prime
+        o_m = self.conv_om(m_new)
+        o_t = torch.tanh(o_x + o_h + o_c + o_m)
+        mem = torch.cat((c_new, m_new), 1)
+        h_new = o_t * torch.tanh(self.conv_last(mem))
+        return h_new, c_new, m_new
+class GHU(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size,
+                 stride, layer_norm, initializer=0.001):
+        super(GHU, self).__init__()
+        self.filter_size = filter_size
+        self.padding = filter_size // 2
+        self.num_hidden = num_hidden
+        self.layer_norm = layer_norm
+        if layer_norm:
+            self.z_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 2, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+            self.x_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 2, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+        else:
+            self.z_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 2, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.x_concat = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 2, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        if initializer != -1:
+            self.initializer = initializer
+            self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d)):
+            nn.init.uniform_(m.weight, -self.initializer, self.initializer)
+    def _init_state(self, inputs):
+        return torch.zeros_like(inputs)
+    def forward(self, x, z):
+        if z is None:
+            z = self._init_state(x)
+        z_concat = self.z_concat(z)
+        x_concat = self.x_concat(x)
+        gates = x_concat + z_concat
+        p, u = torch.split(gates, self.num_hidden, dim=1)
+        p = torch.tanh(p)
+        u = torch.sigmoid(u)
+        z_new = u * p + (1-u) * z
+        return z_new

utilpack/predrnnv2_modules.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+import torch.nn as nn
+class SpatioTemporalLSTMCellv2(nn.Module):
+    def __init__(self, in_channel, num_hidden, height, width, filter_size, stride, layer_norm):
+        super(SpatioTemporalLSTMCellv2, self).__init__()
+        self.num_hidden = num_hidden
+        self.padding = filter_size // 2
+        self._forget_bias = 1.0
+        if layer_norm:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 7, height, width])
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 4, height, width])
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden * 3, height, width])
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+                nn.LayerNorm([num_hidden, height, width])
+            )
+        else:
+            self.conv_x = nn.Sequential(
+                nn.Conv2d(in_channel, num_hidden * 7, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_h = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 4, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_m = nn.Sequential(
+                nn.Conv2d(num_hidden, num_hidden * 3, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+            self.conv_o = nn.Sequential(
+                nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=filter_size,
+                          stride=stride, padding=self.padding, bias=False),
+            )
+        self.conv_last = nn.Conv2d(num_hidden * 2, num_hidden, kernel_size=1,
+                                   stride=1, padding=0, bias=False)
+    def forward(self, x_t, h_t, c_t, m_t):
+        x_concat = self.conv_x(x_t)
+        h_concat = self.conv_h(h_t)
+        m_concat = self.conv_m(m_t)
+        i_x, f_x, g_x, i_x_prime, f_x_prime, g_x_prime, o_x = \
+            torch.split(x_concat, self.num_hidden, dim=1)
+        i_h, f_h, g_h, o_h = torch.split(h_concat, self.num_hidden, dim=1)
+        i_m, f_m, g_m = torch.split(m_concat, self.num_hidden, dim=1)
+        i_t = torch.sigmoid(i_x + i_h)
+        f_t = torch.sigmoid(f_x + f_h + self._forget_bias)
+        g_t = torch.tanh(g_x + g_h)
+        delta_c = i_t * g_t
+        c_new = f_t * c_t + delta_c
+        i_t_prime = torch.sigmoid(i_x_prime + i_m)
+        f_t_prime = torch.sigmoid(f_x_prime + f_m + self._forget_bias)
+        g_t_prime = torch.tanh(g_x_prime + g_m)
+        delta_m = i_t_prime * g_t_prime
+        m_new = f_t_prime * m_t + delta_m
+        mem = torch.cat((c_new, m_new), 1)
+        o_t = torch.sigmoid(o_x + o_h + self.conv_o(mem))
+        h_new = o_t * torch.tanh(self.conv_last(mem))
+        return h_new, c_new, m_new, delta_c, delta_m

utilpack/simvp_modules.py ADDED Viewed

	@@ -0,0 +1,586 @@

+import math
+import torch
+import torch.nn as nn
+from timm.layers import DropPath, trunc_normal_
+from timm.models.convnext import ConvNeXtBlock
+from timm.models.mlp_mixer import MixerBlock
+from timm.models.swin_transformer import SwinTransformerBlock, window_partition, window_reverse
+from timm.models.vision_transformer import Block as ViTBlock
+from .layers import (HorBlock, ChannelAggregationFFN, MultiOrderGatedAggregation,
+                     PoolFormerBlock, CBlock, SABlock, MixMlp, VANBlock)
+class BasicConv2d(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 upsampling=False,
+                 act_norm=False,
+                 act_inplace=True):
+        super(BasicConv2d, self).__init__()
+        self.act_norm = act_norm
+        if upsampling is True:
+            self.conv = nn.Sequential(*[
+                nn.Conv2d(in_channels, out_channels*4, kernel_size=kernel_size,
+                          stride=1, padding=padding, dilation=dilation),
+                nn.PixelShuffle(2)
+            ])
+        else:
+            self.conv = nn.Conv2d(
+                in_channels, out_channels, kernel_size=kernel_size,
+                stride=stride, padding=padding, dilation=dilation)
+        self.norm = nn.GroupNorm(2, out_channels)
+        self.act = nn.SiLU(inplace=act_inplace)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        y = self.conv(x)
+        if self.act_norm:
+            y = self.act(self.norm(y))
+        return y
+class ConvSC(nn.Module):
+    def __init__(self,
+                 C_in,
+                 C_out,
+                 kernel_size=3,
+                 downsampling=False,
+                 upsampling=False,
+                 act_norm=True,
+                 act_inplace=True):
+        super(ConvSC, self).__init__()
+        stride = 2 if downsampling is True else 1
+        padding = (kernel_size - stride + 1) // 2
+        self.conv = BasicConv2d(C_in, C_out, kernel_size=kernel_size, stride=stride,
+                                upsampling=upsampling, padding=padding,
+                                act_norm=act_norm, act_inplace=act_inplace)
+    def forward(self, x):
+        y = self.conv(x)
+        return y
+class GroupConv2d(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=0,
+                 groups=1,
+                 act_norm=False,
+                 act_inplace=True):
+        super(GroupConv2d, self).__init__()
+        self.act_norm=act_norm
+        if in_channels % groups != 0:
+            groups=1
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, kernel_size=kernel_size,
+            stride=stride, padding=padding, groups=groups)
+        self.norm = nn.GroupNorm(groups,out_channels)
+        self.activate = nn.LeakyReLU(0.2, inplace=act_inplace)
+    def forward(self, x):
+        y = self.conv(x)
+        if self.act_norm:
+            y = self.activate(self.norm(y))
+        return y
+class gInception_ST(nn.Module):
+    """A IncepU block for SimVP"""
+    def __init__(self, C_in, C_hid, C_out, incep_ker = [3,5,7,11], groups = 8):
+        super(gInception_ST, self).__init__()
+        self.conv1 = nn.Conv2d(C_in, C_hid, kernel_size=1, stride=1, padding=0)
+        layers = []
+        for ker in incep_ker:
+            layers.append(GroupConv2d(
+                C_hid, C_out, kernel_size=ker, stride=1,
+                padding=ker//2, groups=groups, act_norm=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        y = 0
+        for layer in self.layers:
+            y += layer(x)
+        return y
+class AttentionModule(nn.Module):
+    """Large Kernel Attention for SimVP"""
+    def __init__(self, dim, kernel_size, dilation=3):
+        super().__init__()
+        d_k = 2 * dilation - 1
+        d_p = (d_k - 1) // 2
+        dd_k = kernel_size // dilation + ((kernel_size // dilation) % 2 - 1)
+        dd_p = (dilation * (dd_k - 1) // 2)
+        self.conv0 = nn.Conv2d(dim, dim, d_k, padding=d_p, groups=dim)
+        self.conv_spatial = nn.Conv2d(
+            dim, dim, dd_k, stride=1, padding=dd_p, groups=dim, dilation=dilation)
+        self.conv1 = nn.Conv2d(dim, 2*dim, 1)
+    def forward(self, x):
+        u = x.clone()
+        attn = self.conv0(x)           # depth-wise conv
+        attn = self.conv_spatial(attn) # depth-wise dilation convolution
+        f_g = self.conv1(attn)
+        split_dim = f_g.shape[1] // 2
+        f_x, g_x = torch.split(f_g, split_dim, dim=1)
+        return torch.sigmoid(g_x) * f_x
+class SpatialAttention(nn.Module):
+    """A Spatial Attention block for SimVP"""
+    def __init__(self, d_model, kernel_size=21, attn_shortcut=True):
+        super().__init__()
+        self.proj_1 = nn.Conv2d(d_model, d_model, 1)         # 1x1 conv
+        self.activation = nn.GELU()                          # GELU
+        self.spatial_gating_unit = AttentionModule(d_model, kernel_size)
+        self.proj_2 = nn.Conv2d(d_model, d_model, 1)         # 1x1 conv
+        self.attn_shortcut = attn_shortcut
+    def forward(self, x):
+        if self.attn_shortcut:
+            shortcut = x.clone()
+        x = self.proj_1(x)
+        x = self.activation(x)
+        x = self.spatial_gating_unit(x)
+        x = self.proj_2(x)
+        if self.attn_shortcut:
+            x = x + shortcut
+        return x
+class GASubBlock(nn.Module):
+    """A GABlock (gSTA) for SimVP"""
+    def __init__(self, dim, kernel_size=21, mlp_ratio=4.,
+                 drop=0., drop_path=0.1, init_value=1e-2, act_layer=nn.GELU):
+        super().__init__()
+        self.norm1 = nn.BatchNorm2d(dim)
+        self.attn = SpatialAttention(dim, kernel_size)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = nn.BatchNorm2d(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = MixMlp(
+            in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.layer_scale_1 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+        self.layer_scale_2 = nn.Parameter(init_value * torch.ones((dim)), requires_grad=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'layer_scale_1', 'layer_scale_2'}
+    def forward(self, x):
+        x = x + self.drop_path(
+            self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * self.attn(self.norm1(x)))
+        x = x + self.drop_path(
+            self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * self.mlp(self.norm2(x)))
+        return x
+class ConvMixerSubBlock(nn.Module):
+    """A block of ConvMixer."""
+    def __init__(self, dim, kernel_size=9, activation=nn.GELU):
+        super().__init__()
+        # spatial mixing
+        self.conv_dw = nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same")
+        self.act_1 = activation()
+        self.norm_1 = nn.BatchNorm2d(dim)
+        # channel mixing
+        self.conv_pw = nn.Conv2d(dim, dim, kernel_size=1)
+        self.act_2 = activation()
+        self.norm_2 = nn.BatchNorm2d(dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.BatchNorm2d):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return dict()
+    def forward(self, x):
+        x = x + self.norm_1(self.act_1(self.conv_dw(x)))
+        x = self.norm_2(self.act_2(self.conv_pw(x)))
+        return x
+class ConvNeXtSubBlock(ConvNeXtBlock):
+    """A block of ConvNeXt."""
+    def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0.1):
+        super().__init__(dim, mlp_ratio=mlp_ratio,
+                         drop_path=drop_path, ls_init_value=1e-6, conv_mlp=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'gamma'}
+    def forward(self, x):
+        x = x + self.drop_path(
+            self.gamma.reshape(1, -1, 1, 1) * self.mlp(self.norm(self.conv_dw(x))))
+        return x
+class HorNetSubBlock(HorBlock):
+    """A block of HorNet."""
+    def __init__(self, dim, mlp_ratio=4., drop_path=0.1, init_value=1e-6):
+        super().__init__(dim, mlp_ratio=mlp_ratio, drop_path=drop_path, init_value=init_value)
+        self.apply(self._init_weights)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'gamma1', 'gamma2'}
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+class MLPMixerSubBlock(MixerBlock):
+    """A block of MLP-Mixer."""
+    def __init__(self, dim, input_resolution=None, mlp_ratio=4., drop=0., drop_path=0.1):
+        seq_len = input_resolution[0] * input_resolution[1]
+        super().__init__(dim, seq_len=seq_len,
+                         mlp_ratio=(0.5, mlp_ratio), drop_path=drop_path, drop=drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return dict()
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = x + self.drop_path(self.mlp_tokens(self.norm1(x).transpose(1, 2)).transpose(1, 2))
+        x = x + self.drop_path(self.mlp_channels(self.norm2(x)))
+        return x.reshape(B, H, W, C).permute(0, 3, 1, 2)
+class MogaSubBlock(nn.Module):
+    """A block of MogaNet."""
+    def __init__(self, embed_dims, mlp_ratio=4., drop_rate=0., drop_path_rate=0., init_value=1e-5,
+                 attn_dw_dilation=[1, 2, 3], attn_channel_split=[1, 3, 4]):
+        super(MogaSubBlock, self).__init__()
+        self.out_channels = embed_dims
+        # spatial attention
+        self.norm1 = nn.BatchNorm2d(embed_dims)
+        self.attn = MultiOrderGatedAggregation(
+            embed_dims, attn_dw_dilation=attn_dw_dilation, attn_channel_split=attn_channel_split)
+        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
+        # channel MLP
+        self.norm2 = nn.BatchNorm2d(embed_dims)
+        mlp_hidden_dims = int(embed_dims * mlp_ratio)
+        self.mlp = ChannelAggregationFFN(
+            embed_dims=embed_dims, mlp_hidden_dims=mlp_hidden_dims, ffn_drop=drop_rate)
+        # init layer scale
+        self.layer_scale_1 = nn.Parameter(init_value * torch.ones((1, embed_dims, 1, 1)), requires_grad=True)
+        self.layer_scale_2 = nn.Parameter(init_value * torch.ones((1, embed_dims, 1, 1)), requires_grad=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'layer_scale_1', 'layer_scale_2', 'sigma'}
+    def forward(self, x):
+        x = x + self.drop_path(self.layer_scale_1 * self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.layer_scale_2 * self.mlp(self.norm2(x)))
+        return x
+class PoolFormerSubBlock(PoolFormerBlock):
+    """A block of PoolFormer."""
+    def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0.1):
+        super().__init__(dim, pool_size=3, mlp_ratio=mlp_ratio, drop_path=drop_path,
+                         drop=drop, init_value=1e-5)
+        self.apply(self._init_weights)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'layer_scale_1', 'layer_scale_2'}
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+class SwinSubBlock(SwinTransformerBlock):
+    """A block of Swin Transformer."""
+    def __init__(self, dim, input_resolution=None, layer_i=0, mlp_ratio=4., drop=0., drop_path=0.1):
+        window_size = 7 if input_resolution[0] % 7 == 0 else max(4, input_resolution[0] // 16)
+        window_size = min(8, window_size)
+        shift_size = 0 if (layer_i % 2 == 0) else window_size // 2
+        super().__init__(dim, input_resolution, num_heads=8, window_size=window_size,
+                         shift_size=shift_size, mlp_ratio=mlp_ratio,
+                         drop_path=drop_path, attn_drop=drop, proj_drop=drop, qkv_bias=True)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {}
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        x = super().forward(x)
+        return x.reshape(B, H, W, C).permute(0, 3, 1, 2)
+def UniformerSubBlock(embed_dims, mlp_ratio=4., drop=0., drop_path=0.,
+                      init_value=1e-6, block_type='Conv'):
+    """Build a block of Uniformer."""
+    assert block_type in ['Conv', 'MHSA']
+    if block_type == 'Conv':
+        return CBlock(dim=embed_dims, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path)
+    else:
+        return SABlock(dim=embed_dims, num_heads=8, mlp_ratio=mlp_ratio, qkv_bias=True,
+                       drop=drop, drop_path=drop_path, init_value=init_value)
+class VANSubBlock(VANBlock):
+    """A block of VAN."""
+    def __init__(self, dim, mlp_ratio=4., drop=0.,drop_path=0., init_value=1e-2, act_layer=nn.GELU):
+        super().__init__(dim=dim, mlp_ratio=mlp_ratio, drop=drop, drop_path=drop_path,
+                         init_value=init_value, act_layer=act_layer)
+        self.apply(self._init_weights)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'layer_scale_1', 'layer_scale_2'}
+    def _init_weights(self, m):
+        if isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+class ViTSubBlock(ViTBlock):
+    """A block of Vision Transformer."""
+    def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0.1):
+        super().__init__(dim=dim, num_heads=8, mlp_ratio=mlp_ratio, qkv_bias=True,
+                         attn_drop=drop, proj_drop=0, drop_path=drop_path, act_layer=nn.GELU, norm_layer=nn.LayerNorm)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {}
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = x + self.drop_path(self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x.reshape(B, H, W, C).permute(0, 3, 1, 2)
+class TemporalAttention(nn.Module):
+    """A Temporal Attention block for Temporal Attention Unit"""
+    def __init__(self, d_model, kernel_size=21, attn_shortcut=True):
+        super().__init__()
+        self.proj_1 = nn.Conv2d(d_model, d_model, 1)         # 1x1 conv
+        self.activation = nn.GELU()                          # GELU
+        self.spatial_gating_unit = TemporalAttentionModule(d_model, kernel_size)
+        self.proj_2 = nn.Conv2d(d_model, d_model, 1)         # 1x1 conv
+        self.attn_shortcut = attn_shortcut
+    def forward(self, x):
+        if self.attn_shortcut:
+            shortcut = x.clone()
+        x = self.proj_1(x)
+        x = self.activation(x)
+        x = self.spatial_gating_unit(x)
+        x = self.proj_2(x)
+        if self.attn_shortcut:
+            x = x + shortcut
+        return x
+class TemporalAttentionModule(nn.Module):
+    """Large Kernel Attention for SimVP"""
+    def __init__(self, dim, kernel_size, dilation=3, reduction=16):
+        super().__init__()
+        d_k = 2 * dilation - 1
+        d_p = (d_k - 1) // 2
+        dd_k = kernel_size // dilation + ((kernel_size // dilation) % 2 - 1)
+        dd_p = (dilation * (dd_k - 1) // 2)
+        self.conv0 = nn.Conv2d(dim, dim, d_k, padding=d_p, groups=dim)
+        self.conv_spatial = nn.Conv2d(
+            dim, dim, dd_k, stride=1, padding=dd_p, groups=dim, dilation=dilation)
+        self.conv1 = nn.Conv2d(dim, dim, 1)
+        self.reduction = max(dim // reduction, 4)
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(dim, dim // self.reduction, bias=False), # reduction
+            nn.ReLU(True),
+            nn.Linear(dim // self.reduction, dim, bias=False), # expansion
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        u = x.clone()
+        attn = self.conv0(x)           # depth-wise conv
+        attn = self.conv_spatial(attn) # depth-wise dilation convolution
+        f_x = self.conv1(attn)         # 1x1 conv
+        # append a se operation
+        b, c, _, _ = x.size()
+        se_atten = self.avg_pool(x).view(b, c)
+        se_atten = self.fc(se_atten).view(b, c, 1, 1)
+        return se_atten * f_x * u
+class TAUSubBlock(GASubBlock):
+    """A TAUBlock (tau) for Temporal Attention Unit"""
+    def __init__(self, dim, kernel_size=21, mlp_ratio=4.,
+                 drop=0., drop_path=0.1, init_value=1e-2, act_layer=nn.GELU):
+        super().__init__(dim=dim, kernel_size=kernel_size, mlp_ratio=mlp_ratio,
+                 drop=drop, drop_path=drop_path, init_value=init_value, act_layer=act_layer)
+        self.attn = TemporalAttention(dim, kernel_size)

utilpack/swinlstm_modules.py ADDED Viewed

	@@ -0,0 +1,317 @@

+import torch
+import torch.nn as nn
+from timm.models.swin_transformer import SwinTransformerBlock,  window_reverse, PatchEmbed, PatchMerging, window_partition
+from timm.layers import to_2tuple
+class SwinLSTMCell(nn.Module):
+    def __init__(self, dim, input_resolution, num_heads, window_size, depth,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, flag=None):
+        """
+        Args:
+        flag:  0 UpSample   1 DownSample  2 STconvert
+        """
+        super(SwinLSTMCell, self).__init__()
+        self.STBs = nn.ModuleList(
+            STB(i, dim=dim, input_resolution=input_resolution, depth=depth,
+                num_heads=num_heads, window_size=window_size, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop, attn_drop=attn_drop,
+                drop_path=drop_path, norm_layer=norm_layer, flag=flag)
+            for i in range(depth))
+    def forward(self, xt, hidden_states):
+        """
+        Args:
+        xt: input for t period
+        hidden_states: [hx, cx] hidden_states for t-1 period
+        """
+        if hidden_states is None:
+            B, L, C = xt.shape
+            hx = torch.zeros(B, L, C).to(xt.device)
+            cx = torch.zeros(B, L, C).to(xt.device)
+        else:
+            hx, cx = hidden_states
+        outputs = []
+        for index, layer in enumerate(self.STBs):
+            if index == 0:
+                x = layer(xt, hx)
+                outputs.append(x)
+            else:
+                if index % 2 == 0:
+                    x = layer(outputs[-1], xt)
+                    outputs.append(x)
+                if index % 2 == 1:
+                    x = layer(outputs[-1], None)
+                    outputs.append(x)
+        o_t = outputs[-1]
+        Ft = torch.sigmoid(o_t)
+        cell = torch.tanh(o_t)
+        Ct = Ft * (cx + cell)
+        Ht = Ft * torch.tanh(Ct)
+        return Ht, (Ht, Ct)
+class STB(SwinTransformerBlock):
+    def __init__(self, index, dim, input_resolution, depth, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, flag=None):
+        if flag == 0:
+            drop_path = drop_path[depth - index - 1]
+        elif flag == 1:
+            drop_path = drop_path[index]
+        elif flag == 2:
+            drop_path = drop_path
+        super(STB, self).__init__(dim=dim, input_resolution=input_resolution,
+                                  num_heads=num_heads, window_size=window_size,
+                                  shift_size=0 if (index % 2 == 0) else window_size // 2,
+                                  mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                                  drop=drop, attn_drop=attn_drop,
+                                  drop_path=drop_path,
+                                  norm_layer=norm_layer)
+        self.red = nn.Linear(2 * dim, dim)
+    def forward(self, x, hx=None):
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        if hx is not None:
+            hx = self.norm1(hx)
+            x = torch.cat((x, hx), -1)
+            x = self.red(x)
+        x = x.view(B, H, W, C)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+        else:
+            shifted_x = x
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # num_win*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # num_win*B, window_size*window_size, C
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=self.attn_mask)  # num_win*B, window_size*window_size, C
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        x = x.view(B, H * W, C)
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchInflated(nn.Module):
+    r""" Tensor to Patch Inflating
+    Args:
+        in_chans (int): Number of input image channels.
+        embed_dim (int): Number of linear projection output channels.
+        input_resolution (tuple[int]): Input resulotion.
+    """
+    def __init__(self, in_chans, embed_dim, input_resolution, stride=2, padding=1, output_padding=1):
+        super(PatchInflated, self).__init__()
+        stride = to_2tuple(stride)
+        padding = to_2tuple(padding)
+        output_padding = to_2tuple(output_padding)
+        self.input_resolution = input_resolution
+        self.Conv = nn.ConvTranspose2d(in_channels=embed_dim, out_channels=in_chans, kernel_size=(3, 3),
+                                       stride=stride, padding=padding, output_padding=output_padding)
+    def forward(self, x):
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
+        x = x.view(B, H, W, C)
+        x = x.permute(0, 3, 1, 2)
+        x = self.Conv(x)
+        return x
+class PatchExpanding(nn.Module):
+    r""" Patch Expanding Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, dim_scale=2, norm_layer=nn.LayerNorm):
+        super(PatchExpanding, self).__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.expand = nn.Linear(dim, 2 * dim, bias=False) if dim_scale == 2 else nn.Identity()
+        self.norm = norm_layer(dim // dim_scale)
+    def forward(self, x):
+        H, W = self.input_resolution
+        x = self.expand(x)
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        x = x.view(B, H, W, C)
+        x = x.reshape(B, H, W, 2, 2, C // 4)
+        x = x.permute(0, 1, 3, 2, 4, 5).reshape(B, H * 2, W * 2, C // 4)
+        x = x.view(B, -1, C // 4)
+        x = self.norm(x)
+        return x
+class UpSample(nn.Module):
+    def __init__(self, img_size, patch_size, in_chans, embed_dim, depths_upsample, num_heads, window_size, mlp_ratio=4.,
+                 qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
+                 norm_layer=nn.LayerNorm, flag=0):
+        super(UpSample, self).__init__()
+        self.img_size = img_size
+        self.num_layers = len(depths_upsample)
+        self.embed_dim = embed_dim
+        self.mlp_ratio = mlp_ratio
+        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, norm_layer=nn.LayerNorm)
+        patches_resolution = self.patch_embed.grid_size
+        self.Unembed = PatchInflated(in_chans=in_chans, embed_dim=embed_dim, input_resolution=patches_resolution)
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths_upsample))]
+        self.layers = nn.ModuleList()
+        self.upsample = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            resolution1 = (patches_resolution[0] // (2 ** (self.num_layers - i_layer)))
+            resolution2 = (patches_resolution[1] // (2 ** (self.num_layers - i_layer)))
+            dimension = int(embed_dim * 2 ** (self.num_layers - i_layer))
+            upsample = PatchExpanding(input_resolution=(resolution1, resolution2), dim=dimension)
+            layer = SwinLSTMCell(dim=dimension, input_resolution=(resolution1, resolution2),
+                                 depth=depths_upsample[(self.num_layers - 1 - i_layer)],
+                                 num_heads=num_heads[(self.num_layers - 1 - i_layer)],
+                                 window_size=window_size,
+                                 mlp_ratio=self.mlp_ratio,
+                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                 drop=drop_rate, attn_drop=attn_drop_rate,
+                                 drop_path=dpr[sum(depths_upsample[:(self.num_layers - 1 - i_layer)]):
+                                               sum(depths_upsample[:(self.num_layers - 1 - i_layer) + 1])],
+                                 norm_layer=norm_layer, flag=flag)
+            self.layers.append(layer)
+            self.upsample.append(upsample)
+    def forward(self, x, y):
+        hidden_states_up = []
+        for index, layer in enumerate(self.layers):
+            x, hidden_state = layer(x, y[index])
+            x = self.upsample[index](x)
+            hidden_states_up.append(hidden_state)
+        x = torch.sigmoid(self.Unembed(x))
+        return hidden_states_up, x
+class DownSample(nn.Module):
+    def __init__(self, img_size, patch_size, in_chans, embed_dim, depths_downsample, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
+                 norm_layer=nn.LayerNorm, flag=1):
+        super(DownSample, self).__init__()
+        self.num_layers = len(depths_downsample)
+        self.embed_dim = embed_dim
+        self.mlp_ratio = mlp_ratio
+        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, norm_layer=nn.LayerNorm)
+        patches_resolution = self.patch_embed.grid_size
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths_downsample))]
+        self.layers = nn.ModuleList()
+        self.downsample = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            downsample = PatchMerging(input_resolution=(patches_resolution[0] // (2 ** i_layer),
+                                                        patches_resolution[1] // (2 ** i_layer)),
+                                      dim=int(embed_dim * 2 ** i_layer))
+            layer = SwinLSTMCell(dim=int(embed_dim * 2 ** i_layer),
+                                 input_resolution=(patches_resolution[0] // (2 ** i_layer),
+                                                   patches_resolution[1] // (2 ** i_layer)),
+                                 depth=depths_downsample[i_layer],
+                                 num_heads=num_heads[i_layer],
+                                 window_size=window_size,
+                                 mlp_ratio=self.mlp_ratio,
+                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                 drop=drop_rate, attn_drop=attn_drop_rate,
+                                 drop_path=dpr[sum(depths_downsample[:i_layer]):sum(depths_downsample[:i_layer + 1])],
+                                 norm_layer=norm_layer, flag=flag)
+            self.layers.append(layer)
+            self.downsample.append(downsample)
+    def forward(self, x, y):
+        x = self.patch_embed(x)
+        hidden_states_down = []
+        for index, layer in enumerate(self.layers):
+            x, hidden_state = layer(x, y[index])
+            x = self.downsample[index](x)
+            hidden_states_down.append(hidden_state)
+        return hidden_states_down, x
+class STconvert(nn.Module):
+    def __init__(self, img_size, patch_size, in_chans, embed_dim, depths, num_heads,
+                 window_size, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm, flag=2):
+        super(STconvert, self).__init__()
+        self.embed_dim = embed_dim
+        self.mlp_ratio = mlp_ratio
+        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size,
+                                      in_chans=in_chans, embed_dim=embed_dim,
+                                      norm_layer=norm_layer)
+        patches_resolution = self.patch_embed.grid_size
+        self.patch_inflated = PatchInflated(in_chans=in_chans, embed_dim=embed_dim,
+                                            input_resolution=patches_resolution)
+        self.layer = SwinLSTMCell(dim=embed_dim,
+                                  input_resolution=(patches_resolution[0], patches_resolution[1]),
+                                  depth=depths, num_heads=num_heads,
+                                  window_size=window_size, mlp_ratio=mlp_ratio,
+                                  qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                  drop=drop_rate, attn_drop=attn_drop_rate,
+                                  drop_path=drop_path_rate, norm_layer=norm_layer,
+                                  flag=flag)
+    def forward(self, x, h=None):
+        x = self.patch_embed(x)
+        x, hidden_state = self.layer(x, h)
+        x = torch.sigmoid(self.patch_inflated(x))
+        return x, hidden_state

utilpack/wast_modules.py ADDED Viewed

	@@ -0,0 +1,577 @@

+import torch, pywt
+import torch.nn as nn
+from einops import rearrange
+from functools import partial
+from itertools import accumulate
+from timm.layers import DropPath, activations
+from timm.models._efficientnet_blocks import SqueezeExcite, InvertedResidual
+# version adaptation for PyTorch > 1.7.1
+IS_HIGH_VERSION = tuple(map(int, torch.__version__.split('+')[0].split('.'))) > (1, 7, 1)
+if IS_HIGH_VERSION:
+    import torch.fft
+class HighFocalFrequencyLoss(nn.Module):
+    """ Example:
+        fake = torch.randn(4, 3, 128, 64)
+        real = torch.randn(4, 3, 128, 64)
+        hffl = HighFocalFrequencyLoss()
+        loss = hffl(fake, real)
+        print(loss)
+    """
+    def __init__(self, loss_weight=0.001, level=1, tau=0.1, alpha=1.0, patch_factor=1, ave_spectrum=False, log_matrix=True, batch_matrix=False):
+        super(HighFocalFrequencyLoss, self).__init__()
+        self.loss_weight = loss_weight
+        self.alpha = alpha
+        self.patch_factor = patch_factor
+        self.ave_spectrum = ave_spectrum
+        self.log_matrix = log_matrix
+        self.batch_matrix = batch_matrix
+        self.level = level
+        self.tau = tau
+        self.DWT = WaveletTransform2D().cuda()
+    def tensor2freq(self, x):
+        # crop image patches
+        patch_factor = self.patch_factor
+        _, _, h, w = x.shape
+        assert h % patch_factor == 0 and w % patch_factor == 0, (
+            'Patch factor should be divisible by image height and width')
+        patch_list = []
+        patch_h = h // patch_factor
+        patch_w = w // patch_factor
+        for i in range(patch_factor):
+            for j in range(patch_factor):
+                patch_list.append(x[:, :, i * patch_h:(i + 1) * patch_h, j * patch_w:(j + 1) * patch_w])
+        # stack to patch tensor
+        y = torch.stack(patch_list, 1)
+        # perform 2D DFT (real-to-complex, orthonormalization)
+        if IS_HIGH_VERSION:
+            freq = torch.fft.fft2(y, norm='ortho')
+            freq = torch.stack([freq.real, freq.imag], -1)
+        else:
+            freq = torch.rfft(y, 2, onesided=False, normalized=True)
+        return freq
+    def build_freq_mask(self, shape):
+        H, W = shape[-2:]
+        radius = self.tau * max(H, W)
+        Y, X = torch.meshgrid(torch.arange(H), torch.arange(W))
+        mask = torch.ones_like(X, dtype=torch.float32).cuda()
+        centers = [(0, 0), (0, W - 1), (H - 1, 0), (H - 1, W - 1)]
+        for center in centers:
+            distance = torch.sqrt((X - center[1]) ** 2 + (Y - center[0]) ** 2)
+            mask[distance <= radius] = 0
+        return mask
+    def loss_formulation(self, recon_freq, real_freq, matrix=None):
+        # spectrum weight matrix
+        if matrix is not None:
+            # if the matrix is predefined
+            weight_matrix = matrix.detach()
+        else:
+            # if the matrix is calculated online: continuous, dynamic, based on current Euclidean distance
+            matrix_tmp = (recon_freq - real_freq) ** 2
+            matrix_tmp = torch.sqrt(matrix_tmp[..., 0] + matrix_tmp[..., 1]) ** self.alpha
+            # whether to adjust the spectrum weight matrix by logarithm
+            if self.log_matrix:
+                matrix_tmp = torch.log(matrix_tmp + 1.0)
+            # whether to calculate the spectrum weight matrix using batch-based statistics
+            if self.batch_matrix:
+                matrix_tmp = matrix_tmp / matrix_tmp.max()
+            else:
+                matrix_tmp = matrix_tmp / matrix_tmp.max(-1).values.max(-1).values[:, :, :, None, None]
+            matrix_tmp[torch.isnan(matrix_tmp)] = 0.0
+            matrix_tmp = torch.clamp(matrix_tmp, min=0.0, max=1.0)
+            weight_matrix = matrix_tmp.clone().detach()
+        assert weight_matrix.min().item() >= 0 and weight_matrix.max().item() <= 1, (
+            'The values of spectrum weight matrix should be in the range [0, 1], '
+            'but got Min: %.10f Max: %.10f' % (weight_matrix.min().item(), weight_matrix.max().item()))
+        # frequency distance using (squared) Euclidean distance
+        tmp = (recon_freq - real_freq) ** 2
+        freq_distance = tmp[..., 0] + tmp[..., 1]
+        # dynamic spectrum weighting (Hadamard product)
+        mask = self.build_freq_mask(weight_matrix.shape)
+        loss = weight_matrix * freq_distance * mask
+        return torch.mean(loss)
+    def frequency_loss(self, pred, target, matrix=None):
+        """Forward function to calculate focal frequency loss.
+        Args:
+            pred (torch.Tensor): of shape (N, C, H, W). Predicted tensor.
+            target (torch.Tensor): of shape (N, C, H, W). Target tensor.
+            matrix (torch.Tensor, optional): Element-wise spectrum weight matrix.
+                Default: None (If set to None: calculated online, dynamic).
+        """
+        pred_freq = self.tensor2freq(pred)
+        target_freq = self.tensor2freq(target)
+        # whether to use minibatch average spectrum
+        if self.ave_spectrum:
+            pred_freq = torch.mean(pred_freq, 0, keepdim=True)
+            target_freq = torch.mean(target_freq, 0, keepdim=True)
+        return self.loss_formulation(pred_freq, target_freq, matrix)
+    def forward(self, pred, target, matrix=None, **kwargs):
+        pred = rearrange(pred, 'b t c h w -> (b t) c h w') if kwargs["reshape"] is True else pred
+        target = rearrange(target, 'b t c h w -> (b t) c h w') if kwargs["reshape"] is True else target
+        loss = 0
+        for level in range(self.level):
+            pred, _, _, _ = self.DWT(pred)
+            target, _, _, _ = self.DWT(target)
+            loss += self.frequency_loss(pred, target, matrix)
+        return loss * self.loss_weight
+class WaveletTransform2D(nn.Module):
+    """Compute a two-dimensional wavelet transform.
+        loss = nn.MSELoss()
+        data = torch.rand(1, 3, 128, 256)
+        DWT = WaveletTransform2D()
+        IDWT = WaveletTransform2D(inverse=True)
+        LL, LH, HL, HH = DWT(data)
+        recdata = IDWT([LL, LH, HL, HH])
+        print(loss(data, recdata))
+    """
+    def __init__(self, inverse=False, wavelet="haar", mode="constant"):
+        super(WaveletTransform2D, self).__init__()
+        self.mode = mode
+        wavelet = pywt.Wavelet(wavelet)
+        if isinstance(wavelet, tuple):
+            dec_lo, dec_hi, rec_lo, rec_hi = wavelet
+        else:
+            dec_lo, dec_hi, rec_lo, rec_hi = wavelet.filter_bank
+        self.inverse = inverse
+        if inverse is False:
+            dec_lo = torch.tensor(dec_lo).flip(-1).unsqueeze(0)
+            dec_hi = torch.tensor(dec_hi).flip(-1).unsqueeze(0)
+            self.build_filters(dec_lo, dec_hi)
+        else:
+            rec_lo = torch.tensor(rec_lo).unsqueeze(0)
+            rec_hi = torch.tensor(rec_hi).unsqueeze(0)
+            self.build_filters(rec_lo, rec_hi)
+    def build_filters(self, lo, hi):
+        # construct 2d filter
+        self.dim_size = lo.shape[-1]
+        ll = self.outer(lo, lo)
+        lh = self.outer(hi, lo)
+        hl = self.outer(lo, hi)
+        hh = self.outer(hi, hi)
+        filters = torch.stack([ll, lh, hl, hh],dim=0)
+        filters = filters.unsqueeze(1)
+        self.register_buffer('filters', filters)  # [4, 1, height, width]
+    def outer(self, a: torch.Tensor, b: torch.Tensor):
+        """Torch implementation of numpy's outer for 1d vectors."""
+        a_flat = torch.reshape(a, [-1])
+        b_flat = torch.reshape(b, [-1])
+        a_mul = torch.unsqueeze(a_flat, dim=-1)
+        b_mul = torch.unsqueeze(b_flat, dim=0)
+        return a_mul * b_mul
+    def get_pad(self, data_len: int, filter_len: int):
+        padr = (2 * filter_len - 3) // 2
+        padl = (2 * filter_len - 3) // 2
+        # pad to even singal length.
+        if data_len % 2 != 0:
+            padr += 1
+        return padr, padl
+    def adaptive_pad(self, data):
+        padb, padt = self.get_pad(data.shape[-2], self.dim_size)
+        padr, padl = self.get_pad(data.shape[-1], self.dim_size)
+        data_pad = torch.nn.functional.pad(data, [padl, padr, padt, padb], mode=self.mode)
+        return data_pad
+    def forward(self, data):
+        if self.inverse is False:
+            b, c, h, w = data.shape
+            dec_res = []
+            data = self.adaptive_pad(data)
+            for filter in self.filters:
+                dec_res.append(torch.nn.functional.conv2d(data, filter.repeat(c, 1, 1, 1), stride=2, groups=c))
+            return dec_res
+        else:
+            b, c, h, w = data[0].shape
+            data = torch.stack(data, dim=2).reshape(b, -1, h, w)
+            rec_res = torch.nn.functional.conv_transpose2d(data, self.filters.repeat(c, 1, 1, 1), stride=2, groups=c)
+            return rec_res
+class WaveletTransform3D(nn.Module):
+    """Compute a three-dimensional wavelet transform.
+        Example:
+            loss = nn.MSELoss()
+            data = torch.rand(1, 3, 10, 128, 256)
+            DWT = WaveletTransform3D()
+            IDWT = WaveletTransform3D(inverse=True)
+            LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = DWT(data)
+            recdata = IDWT([LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH])
+            print(loss(data, recdata))
+            LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = DWT_3D(data)
+            recdata = IDWT_3D(LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH)
+            print(loss(data, recdata))
+        """
+    def __init__(self, inverse=False, wavelet="haar", mode="constant"):
+        super(WaveletTransform3D, self).__init__()
+        self.mode = mode
+        wavelet = pywt.Wavelet(wavelet)
+        if isinstance(wavelet, tuple):
+            dec_lo, dec_hi, rec_lo, rec_hi = wavelet
+        else:
+            dec_lo, dec_hi, rec_lo, rec_hi = wavelet.filter_bank
+        self.inverse = inverse
+        if inverse is False:
+            dec_lo = torch.tensor(dec_lo).flip(-1).unsqueeze(0)
+            dec_hi = torch.tensor(dec_hi).flip(-1).unsqueeze(0)
+            self.build_filters(dec_lo, dec_hi)
+        else:
+            rec_lo = torch.tensor(rec_lo).unsqueeze(0)
+            rec_hi = torch.tensor(rec_hi).unsqueeze(0)
+            self.build_filters(rec_lo, rec_hi)
+    def build_filters(self, lo, hi):
+        # construct 3d filter
+        self.dim_size = lo.shape[-1]
+        size = [self.dim_size] * 3
+        lll = self.outer(lo, self.outer(lo, lo)).reshape(size)
+        llh = self.outer(lo, self.outer(lo, hi)).reshape(size)
+        lhl = self.outer(lo, self.outer(hi, lo)).reshape(size)
+        lhh = self.outer(lo, self.outer(hi, hi)).reshape(size)
+        hll = self.outer(hi, self.outer(lo, lo)).reshape(size)
+        hlh = self.outer(hi, self.outer(lo, hi)).reshape(size)
+        hhl = self.outer(hi, self.outer(hi, lo)).reshape(size)
+        hhh = self.outer(hi, self.outer(hi, hi)).reshape(size)
+        filters = torch.stack([lll, llh, lhl, lhh, hll, hlh, hhl, hhh], dim=0)
+        filters = filters.unsqueeze(1)
+        self.register_buffer('filters', filters)  # [8, 1, length, height, width]
+    def outer(self, a: torch.Tensor, b: torch.Tensor):
+        """Torch implementation of numpy's outer for 1d vectors."""
+        a_flat = torch.reshape(a, [-1])
+        b_flat = torch.reshape(b, [-1])
+        a_mul = torch.unsqueeze(a_flat, dim=-1)
+        b_mul = torch.unsqueeze(b_flat, dim=0)
+        return a_mul * b_mul
+    def get_pad(self, data_len: int, filter_len: int):
+        padr = (2 * filter_len - 3) // 2
+        padl = (2 * filter_len - 3) // 2
+        # pad to even singal length.
+        if data_len % 2 != 0:
+            padr += 1
+        return padr, padl
+    def adaptive_pad(self, data):
+        pad_back, pad_front = self.get_pad(data.shape[-3], self.dim_size)
+        pad_bottom, pad_top = self.get_pad(data.shape[-2], self.dim_size)
+        pad_right, pad_left = self.get_pad(data.shape[-1], self.dim_size)
+        data_pad = torch.nn.functional.pad(
+            data, [pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back], mode=self.mode)
+        return data_pad
+    def forward(self, data):
+        if self.inverse is False:
+            b, c, t, h, w = data.shape
+            dec_res = []
+            data = self.adaptive_pad(data)
+            for filter in self.filters:
+                dec_res.append(torch.nn.functional.conv3d(data, filter.repeat(c, 1, 1, 1, 1), stride=2, groups=c))
+            return dec_res
+        else:
+            b, c, t, h, w = data[0].shape
+            data = torch.stack(data, dim=2).reshape(b, -1, t, h, w)
+            rec_res = torch.nn.functional.conv_transpose3d(data, self.filters.repeat(c, 1, 1, 1, 1), stride=2, groups=c)
+            return rec_res
+class FrequencyAttention(nn.Module):
+    def __init__(self, in_dim, out_dim, reduction=32):
+        super(FrequencyAttention, self).__init__()
+        self.avgpool_h = nn.AdaptiveAvgPool2d((None, 1))
+        self.avgpool_w = nn.AdaptiveAvgPool2d((1, None))
+        hidden_dim = max(8, in_dim // reduction)
+        self.conv1 = nn.Conv2d(in_dim, hidden_dim, kernel_size=1, stride=1, padding=0)
+        self.bn1 = nn.BatchNorm2d(hidden_dim)
+        self.act = activations.HardSwish(inplace=True)
+        self.conv_h = nn.Conv2d(hidden_dim, out_dim, kernel_size=1, stride=1, padding=0)
+        self.conv_w = nn.Conv2d(hidden_dim, out_dim, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        identity = x
+        n, c, h, w = x.size()
+        x_h = self.avgpool_h(x)  # b c h 1
+        x_w = self.avgpool_w(x).permute(0, 1, 3, 2)  # b c w 1
+        y = torch.cat([x_h, x_w], dim=2)  # b c (h+w) 1
+        y = self.conv1(y)
+        y = self.bn1(y)
+        y = self.act(y)
+        x_h, x_w = torch.split(y, [h, w], dim=2)
+        x_w = x_w.permute(0, 1, 3, 2)
+        a_h = self.conv_h(x_h).sigmoid()
+        a_w = self.conv_w(x_w).sigmoid()
+        out = identity * a_w * a_h
+        return out
+class TF_AwareBlock(nn.Module):
+    def __init__(self, dim, mlp_ratio=4., drop=0., ls_init_value=1e-2, drop_path=0.1, large_kernel=51, small_kernel=5):
+        super().__init__()
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm1 = nn.BatchNorm2d(dim)
+        self.norm2 = nn.BatchNorm2d(dim)
+        self.lk1 = nn.Sequential(
+            nn.Conv2d(dim, dim, kernel_size=(large_kernel, 5), groups=dim, padding="same"),
+            nn.BatchNorm2d(dim)
+        )
+        self.lk2 = nn.Sequential(
+            nn.Conv2d(dim, dim, kernel_size=(5, large_kernel), groups=dim, padding="same"),
+            nn.BatchNorm2d(dim)
+        )
+        self.sk = nn.Sequential(
+            nn.Conv2d(dim, dim, kernel_size=(small_kernel, small_kernel), groups=dim, padding="same"),
+            nn.BatchNorm2d(dim)
+        )
+        self.low_frequency_attn = FrequencyAttention(in_dim=dim, out_dim=dim, reduction=4)
+        self.high_frequency_attn = FrequencyAttention(in_dim=dim, out_dim=dim, reduction=4)
+        self.temporal_mixer = InvertedResidual(in_chs=dim, out_chs=dim, dw_kernel_size=7, exp_ratio=mlp_ratio,
+                                            se_layer=partial(SqueezeExcite, rd_ratio=0.25), noskip=True)
+        self.layer_scale_1 = nn.Parameter(ls_init_value * torch.ones((dim)), requires_grad=True)
+        self.layer_scale_2 = nn.Parameter(ls_init_value * torch.ones((dim)), requires_grad=True)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'layer_scale_1', 'layer_scale_2'}
+    def forward(self, x):
+        attn = self.norm1(x)
+        x = x + self.drop_path(self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * (self.low_frequency_attn(self.lk1(attn) + self.lk2(attn)) + self.high_frequency_attn(self.sk(attn))))
+        x = x + self.drop_path(self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * self.temporal_mixer(self.norm2(x)))
+        return x
+class TF_AwareBlocks(nn.Module):
+    def __init__(self, dim, num_blocks, drop_path, use_bottleneck=None, use_hid=False, mlp_ratio=4., drop=0., ls_init_value=1e-2, large_kernel=51, small_kernel=5):
+        super().__init__()
+        assert len(drop_path) == num_blocks, "drop_path list doesn't match num_blocks"
+        self.use_hid = use_hid
+        self.use_bottleneck = use_bottleneck
+        blocks = []
+        for i in range(num_blocks):
+            block = TF_AwareBlock(dim, mlp_ratio, drop, ls_init_value, drop_path[i], large_kernel, small_kernel)
+            blocks.append(block)
+        self.blocks = nn.Sequential(*blocks)
+        self.concat_block = nn.Conv2d(dim * 2, dim, 3, 1, 1) if use_hid==True else None
+        self.DWT = WaveletTransform3D(inverse=False) if use_bottleneck == "decompose" else None
+        self.IDWT = WaveletTransform3D(inverse=True) if use_bottleneck == "decompose" else None
+    def forward(self, x, skip=None):  # b, c ,t, h, w
+        if self.concat_block is not None and self.use_bottleneck is None:
+            b, c, t, h, w = x.shape
+            x = rearrange(x, 'b c t h w -> b (c t) h w')
+            x = self.concat_block(torch.cat([x, skip], dim=1))
+            x = self.blocks(x)
+            x = rearrange(x, 'b (c t) h w -> b c t h w', t=t)
+            return x
+        elif self.concat_block is None and self.use_bottleneck is None:
+            b, c, t, h, w = x.shape
+            x = rearrange(x, 'b c t h w -> b (c t) h w')
+            x = skip= self.blocks(x)
+            x = rearrange(x, 'b (c t) h w -> b c t h w', t=t)
+            return x, skip
+        elif self.use_bottleneck is not None:
+            LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = self.DWT(x) if self.use_bottleneck == "decompose" else [x, None, None, None, None, None, None, None]
+            b, c, t, h, w = LLL.shape
+            LLL = rearrange(LLL, 'b c t h w -> b (c t) h w')
+            LLL = self.blocks(LLL)
+            LLL = rearrange(LLL, 'b (c t) h w -> b c t h w', t=t)
+            x = self.IDWT([LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH]) if self.use_bottleneck == "decompose" else LLL
+            return x
+class Wavelet_3D_Embedding(nn.Module):
+    def __init__(self, in_dim, out_dim, emb_dim=None):
+        super().__init__()
+        emb_dim = in_dim if emb_dim==None else emb_dim
+        self.conv_0 = nn.Sequential(nn.Conv3d(in_dim, in_dim, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1),),
+                    nn.BatchNorm3d(in_dim),
+                    nn.GELU(),)
+        self.conv_1 = nn.Sequential(nn.Conv3d(in_dim, out_dim, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1),),
+                    nn.BatchNorm3d(out_dim),
+                    nn.GELU(),)
+        self.conv_emb = nn.Conv3d(emb_dim * 4, out_dim, kernel_size=(3, 3, 3),stride=(1, 1, 1),padding=(1, 1, 1),)
+        self.DWT = WaveletTransform3D(inverse=False)
+    def forward(self, x, x_emb=None):
+        # embedding branch
+        LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = self.DWT(x_emb)
+        lo_temp = torch.cat([LLL, LHL, HLL, HHL], dim=1)
+        hi_temp = torch.cat([LLH, LHH, HLH, HHH], dim=1)
+        x_emb = torch.cat([lo_temp, hi_temp], dim=2)
+        x_emb = self.conv_emb(x_emb)
+        # downsampling branch
+        x = self.conv_0(x)
+        LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = self.DWT(x)
+        spatio_lo_coeffs = torch.cat([LLL, LLH], dim=2)
+        spatio_hi_coeffs = torch.cat([LHL, LHH, HLL, HLH, HHL, HHH], dim=1)
+        x = self.conv_1(spatio_lo_coeffs)
+        return (x + x_emb), spatio_hi_coeffs
+class Wavelet_3D_Reconstruction(nn.Module):
+    def __init__(self, in_dim, out_dim, hi_dim):
+        super().__init__()
+        self.conv_0 = nn.Sequential(nn.Conv3d(in_dim, out_dim, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1),),
+            nn.BatchNorm3d(out_dim),
+            nn.GELU(),)
+        self.conv_hi =  nn.Sequential(nn.Conv3d(int(hi_dim * 6), int(out_dim * 6), kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), groups=6),
+            nn.BatchNorm3d(out_dim * 6),
+            nn.GELU(),)
+        self.IDWT = WaveletTransform3D(inverse=True)
+    def forward(self, x, skip_hi=None):
+        LLL, LLH = torch.chunk(self.conv_0(x), chunks=2, dim=2)
+        LHL, LHH, HLL, HLH, HHL, HHH = torch.chunk(self.conv_hi(skip_hi), chunks=6, dim=1)
+        x = self.IDWT([LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH])
+        return x
+class WaST_level1(nn.Module):
+    def __init__(self, in_shape, encoder_dim, block_list=[2, 2, 2], drop_path_rate=0.1, mlp_ratio=4., **kwargs):
+        super().__init__()
+        frame, in_dim, H, W = in_shape
+        self.block_list = block_list
+        dp_list = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.block_list))]
+        indexes = list(accumulate(block_list))
+        dp_list = [dp_list[start:end] for start, end in zip([0] + indexes, indexes)]
+        self.conv_in = nn.Sequential(
+                    nn.Conv3d(
+                        in_dim,
+                        encoder_dim,
+                        kernel_size=(3, 3, 3),
+                        stride=(1, 1, 1),
+                        padding=(1, 1, 1),
+                    ),
+                    nn.BatchNorm3d(encoder_dim),
+                    nn.GELU()
+                )
+        self.translator1 = TF_AwareBlocks(dim=encoder_dim * frame, num_blocks=block_list[0], drop_path=dp_list[0], mlp_ratio=mlp_ratio, large_kernel=51, small_kernel=5)
+        self.wavelet_embed1 = Wavelet_3D_Embedding(in_dim=encoder_dim, out_dim=encoder_dim * 2, emb_dim=in_dim)  # wavelet_recon2: hi_dim = in_dim
+        self.bottleneck_translator = TF_AwareBlocks(dim=encoder_dim * 2 * frame, num_blocks=block_list[1], drop_path=dp_list[1], use_bottleneck=True, mlp_ratio=mlp_ratio, large_kernel=21, small_kernel=5)
+        self.wavelet_recon1 = Wavelet_3D_Reconstruction(in_dim=encoder_dim * 2, out_dim=encoder_dim, hi_dim=encoder_dim)
+        self.translator2 = TF_AwareBlocks(dim=encoder_dim * frame, num_blocks=block_list[2], drop_path=dp_list[2], use_hid=True, mlp_ratio=mlp_ratio, large_kernel=51, small_kernel=5)
+        self.conv_out = nn.Sequential(
+                    nn.BatchNorm3d(encoder_dim),
+                    nn.GELU(),
+                    nn.Conv3d(
+                        encoder_dim,
+                        in_dim,
+                        kernel_size=(3, 3, 3),
+                        stride=(1, 1, 1),
+                        padding=(1, 1, 1))
+        )
+    def update_drop_path(self, drop_path_rate):
+        dp_list = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.block_list))]
+        indexes = list(accumulate(self.block_list))
+        dp_lists = [dp_list[start:end] for start, end in zip([0] + indexes, indexes)]
+        dp_apply_blocks = [self.translator1.blocks, self.bottleneck_translator.blocks, self.translator2.blocks]
+        for translators, dp_list_translators in zip(dp_apply_blocks, dp_lists):
+            for translator, dp_list_translator in zip(translators, dp_list_translators):
+                translator.drop_path.drop_prob = dp_list_translator
+    def forward(self, x):
+        x = rearrange(x, 'b t c h w -> b c t h w')
+        ori_img = x
+        x = self.conv_in(x)
+        x, tskip1 = self.translator1(x)
+        x, skip1 = self.wavelet_embed1(x, x_emb=ori_img)
+        x = self.bottleneck_translator(x)
+        x = self.wavelet_recon1(x, skip1)
+        x = self.translator2(x, tskip1)
+        x = self.conv_out(x)
+        x = rearrange(x, 'b c t h w -> b t c h w')
+        return x
+if __name__ == "__main__":
+    from fvcore.nn import FlopCountAnalysis, flop_count_table
+    # import os
+    # os.environ["CUDA_VISIBLE_DEVICES"] = "3"
+    model = WaST_level1(in_shape=(4, 2, 32, 32), encoder_dim=20, block_list=[2, 8, 2]).cuda()
+    print(model)
+    dummy_tensor = torch.rand(1, 4, 2, 32, 32).cuda()
+    output = model(dummy_tensor)
+    print(f"input shape is {dummy_tensor.shape}, output shape is {output.shape}...")
+    flops = FlopCountAnalysis(model, dummy_tensor)
+    print(flop_count_table(flops))